1/**************************************************************************
2 *
3 * Copyright 2011 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28/**
29 * This module uploads user buffers and translates the vertex buffers which
30 * contain incompatible vertices (i.e. not supported by the driver/hardware)
31 * into compatible ones, based on the Gallium CAPs.
32 *
33 * It does not upload index buffers.
34 *
35 * The module heavily uses bitmasks to represent per-buffer and
36 * per-vertex-element flags to avoid looping over the list of buffers just
37 * to see if there's a non-zero stride, or user buffer, or unsupported format,
38 * etc.
39 *
40 * There are 3 categories of vertex elements, which are processed separately:
41 * - per-vertex attribs (stride != 0, instance_divisor == 0)
42 * - instanced attribs (stride != 0, instance_divisor > 0)
43 * - constant attribs (stride == 0)
44 *
45 * All needed uploads and translations are performed every draw command, but
46 * only the subset of vertices needed for that draw command is uploaded or
47 * translated. (the module never translates whole buffers)
48 *
49 *
50 * The module consists of two main parts:
51 *
52 *
53 * 1) Translate (u_vbuf_translate_begin/end)
54 *
55 * This is pretty much a vertex fetch fallback. It translates vertices from
56 * one vertex buffer to another in an unused vertex buffer slot. It does
57 * whatever is needed to make the vertices readable by the hardware (changes
58 * vertex formats and aligns offsets and strides). The translate module is
59 * used here.
60 *
61 * Each of the 3 categories is translated to a separate buffer.
62 * Only the [min_index, max_index] range is translated. For instanced attribs,
63 * the range is [start_instance, start_instance+instance_count]. For constant
64 * attribs, the range is [0, 1].
65 *
66 *
67 * 2) User buffer uploading (u_vbuf_upload_buffers)
68 *
69 * Only the [min_index, max_index] range is uploaded (just like Translate)
70 * with a single memcpy.
71 *
72 * This method works best for non-indexed draw operations or indexed draw
73 * operations where the [min_index, max_index] range is not being way bigger
74 * than the vertex count.
75 *
76 * If the range is too big (e.g. one triangle with indices {0, 1, 10000}),
77 * the per-vertex attribs are uploaded via the translate module, all packed
78 * into one vertex buffer, and the indexed draw call is turned into
79 * a non-indexed one in the process. This adds additional complexity
80 * to the translate part, but it prevents bad apps from bringing your frame
81 * rate down.
82 *
83 *
84 * If there is nothing to do, it forwards every command to the driver.
85 * The module also has its own CSO cache of vertex element states.
86 */
87
88#include "util/u_vbuf.h"
89
90#include "util/u_dump.h"
91#include "util/format/u_format.h"
92#include "util/u_helpers.h"
93#include "util/u_inlines.h"
94#include "util/u_memory.h"
95#include "indices/u_primconvert.h"
96#include "util/u_prim_restart.h"
97#include "util/u_screen.h"
98#include "util/u_upload_mgr.h"
99#include "translate/translate.h"
100#include "translate/translate_cache.h"
101#include "cso_cache/cso_cache.h"
102#include "cso_cache/cso_hash.h"
103
104struct u_vbuf_elements {
105   unsigned count;
106   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
107
108   unsigned src_format_size[PIPE_MAX_ATTRIBS];
109
110   /* If (velem[i].src_format != native_format[i]), the vertex buffer
111    * referenced by the vertex element cannot be used for rendering and
112    * its vertex data must be translated to native_format[i]. */
113   enum pipe_format native_format[PIPE_MAX_ATTRIBS];
114   unsigned native_format_size[PIPE_MAX_ATTRIBS];
115   unsigned component_size[PIPE_MAX_ATTRIBS];
116
117   /* Which buffers are used by the vertex element state. */
118   uint32_t used_vb_mask;
119   /* This might mean two things:
120    * - src_format != native_format, as discussed above.
121    * - src_offset % 4 != 0 (if the caps don't allow such an offset). */
122   uint32_t incompatible_elem_mask; /* each bit describes a corresp. attrib  */
123   /* Which buffer has at least one vertex element referencing it
124    * incompatible. */
125   uint32_t incompatible_vb_mask_any;
126   /* Which buffer has all vertex elements referencing it incompatible. */
127   uint32_t incompatible_vb_mask_all;
128   /* Which buffer has at least one vertex element referencing it
129    * compatible. */
130   uint32_t compatible_vb_mask_any;
131   uint32_t vb_align_mask[2]; //which buffers require 2/4 byte alignments
132   /* Which buffer has all vertex elements referencing it compatible. */
133   uint32_t compatible_vb_mask_all;
134
135   /* Which buffer has at least one vertex element referencing it
136    * non-instanced. */
137   uint32_t noninstance_vb_mask_any;
138
139   /* Which buffers are used by multiple vertex attribs. */
140   uint32_t interleaved_vb_mask;
141
142   void *driver_cso;
143};
144
145enum {
146   VB_VERTEX = 0,
147   VB_INSTANCE = 1,
148   VB_CONST = 2,
149   VB_NUM = 3
150};
151
152struct u_vbuf {
153   struct u_vbuf_caps caps;
154   bool has_signed_vb_offset;
155
156   struct pipe_context *pipe;
157   struct translate_cache *translate_cache;
158   struct cso_cache cso_cache;
159
160   struct primconvert_context *pc;
161   bool flatshade_first;
162
163   /* This is what was set in set_vertex_buffers.
164    * May contain user buffers. */
165   struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS];
166   uint32_t enabled_vb_mask;
167
168   uint32_t unaligned_vb_mask[2]; //16/32bit
169
170   /* Vertex buffers for the driver.
171    * There are usually no user buffers. */
172   struct pipe_vertex_buffer real_vertex_buffer[PIPE_MAX_ATTRIBS];
173   uint32_t dirty_real_vb_mask; /* which buffers are dirty since the last
174                                   call of set_vertex_buffers */
175
176   /* Vertex elements. */
177   struct u_vbuf_elements *ve, *ve_saved;
178
179   /* Vertex elements used for the translate fallback. */
180   struct cso_velems_state fallback_velems;
181   /* If non-NULL, this is a vertex element state used for the translate
182    * fallback and therefore used for rendering too. */
183   boolean using_translate;
184   /* The vertex buffer slot index where translated vertices have been
185    * stored in. */
186   unsigned fallback_vbs[VB_NUM];
187   unsigned fallback_vbs_mask;
188
189   /* Which buffer is a user buffer. */
190   uint32_t user_vb_mask; /* each bit describes a corresp. buffer */
191   /* Which buffer is incompatible (unaligned). */
192   uint32_t incompatible_vb_mask; /* each bit describes a corresp. buffer */
193   /* Which buffer has a non-zero stride. */
194   uint32_t nonzero_stride_vb_mask; /* each bit describes a corresp. buffer */
195   /* Which buffers are allowed (supported by hardware). */
196   uint32_t allowed_vb_mask;
197};
198
199static void *
200u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
201                              const struct pipe_vertex_element *attribs);
202static void u_vbuf_delete_vertex_elements(void *ctx, void *state,
203                                          enum cso_cache_type type);
204
205static const struct {
206   enum pipe_format from, to;
207} vbuf_format_fallbacks[] = {
208   { PIPE_FORMAT_R32_FIXED,            PIPE_FORMAT_R32_FLOAT },
209   { PIPE_FORMAT_R32G32_FIXED,         PIPE_FORMAT_R32G32_FLOAT },
210   { PIPE_FORMAT_R32G32B32_FIXED,      PIPE_FORMAT_R32G32B32_FLOAT },
211   { PIPE_FORMAT_R32G32B32A32_FIXED,   PIPE_FORMAT_R32G32B32A32_FLOAT },
212   { PIPE_FORMAT_R16_FLOAT,            PIPE_FORMAT_R32_FLOAT },
213   { PIPE_FORMAT_R16G16_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
214   { PIPE_FORMAT_R16G16B16_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
215   { PIPE_FORMAT_R16G16B16A16_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
216   { PIPE_FORMAT_R64_FLOAT,            PIPE_FORMAT_R32_FLOAT },
217   { PIPE_FORMAT_R64G64_FLOAT,         PIPE_FORMAT_R32G32_FLOAT },
218   { PIPE_FORMAT_R64G64B64_FLOAT,      PIPE_FORMAT_R32G32B32_FLOAT },
219   { PIPE_FORMAT_R64G64B64A64_FLOAT,   PIPE_FORMAT_R32G32B32A32_FLOAT },
220   { PIPE_FORMAT_R32_UNORM,            PIPE_FORMAT_R32_FLOAT },
221   { PIPE_FORMAT_R32G32_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
222   { PIPE_FORMAT_R32G32B32_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
223   { PIPE_FORMAT_R32G32B32A32_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
224   { PIPE_FORMAT_R32_SNORM,            PIPE_FORMAT_R32_FLOAT },
225   { PIPE_FORMAT_R32G32_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
226   { PIPE_FORMAT_R32G32B32_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
227   { PIPE_FORMAT_R32G32B32A32_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
228   { PIPE_FORMAT_R32_USCALED,          PIPE_FORMAT_R32_FLOAT },
229   { PIPE_FORMAT_R32G32_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
230   { PIPE_FORMAT_R32G32B32_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
231   { PIPE_FORMAT_R32G32B32A32_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
232   { PIPE_FORMAT_R32_SSCALED,          PIPE_FORMAT_R32_FLOAT },
233   { PIPE_FORMAT_R32G32_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
234   { PIPE_FORMAT_R32G32B32_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
235   { PIPE_FORMAT_R32G32B32A32_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
236   { PIPE_FORMAT_R16_UNORM,            PIPE_FORMAT_R32_FLOAT },
237   { PIPE_FORMAT_R16G16_UNORM,         PIPE_FORMAT_R32G32_FLOAT },
238   { PIPE_FORMAT_R16G16B16_UNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
239   { PIPE_FORMAT_R16G16B16A16_UNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
240   { PIPE_FORMAT_R16_SNORM,            PIPE_FORMAT_R32_FLOAT },
241   { PIPE_FORMAT_R16G16_SNORM,         PIPE_FORMAT_R32G32_FLOAT },
242   { PIPE_FORMAT_R16G16B16_SNORM,      PIPE_FORMAT_R32G32B32_FLOAT },
243   { PIPE_FORMAT_R16G16B16_SINT,       PIPE_FORMAT_R32G32B32_SINT },
244   { PIPE_FORMAT_R16G16B16_UINT,       PIPE_FORMAT_R32G32B32_UINT },
245   { PIPE_FORMAT_R16G16B16A16_SNORM,   PIPE_FORMAT_R32G32B32A32_FLOAT },
246   { PIPE_FORMAT_R16_USCALED,          PIPE_FORMAT_R32_FLOAT },
247   { PIPE_FORMAT_R16G16_USCALED,       PIPE_FORMAT_R32G32_FLOAT },
248   { PIPE_FORMAT_R16G16B16_USCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
249   { PIPE_FORMAT_R16G16B16A16_USCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
250   { PIPE_FORMAT_R16_SSCALED,          PIPE_FORMAT_R32_FLOAT },
251   { PIPE_FORMAT_R16G16_SSCALED,       PIPE_FORMAT_R32G32_FLOAT },
252   { PIPE_FORMAT_R16G16B16_SSCALED,    PIPE_FORMAT_R32G32B32_FLOAT },
253   { PIPE_FORMAT_R16G16B16A16_SSCALED, PIPE_FORMAT_R32G32B32A32_FLOAT },
254   { PIPE_FORMAT_R8_UNORM,             PIPE_FORMAT_R32_FLOAT },
255   { PIPE_FORMAT_R8G8_UNORM,           PIPE_FORMAT_R32G32_FLOAT },
256   { PIPE_FORMAT_R8G8B8_UNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
257   { PIPE_FORMAT_R8G8B8A8_UNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
258   { PIPE_FORMAT_R8_SNORM,             PIPE_FORMAT_R32_FLOAT },
259   { PIPE_FORMAT_R8G8_SNORM,           PIPE_FORMAT_R32G32_FLOAT },
260   { PIPE_FORMAT_R8G8B8_SNORM,         PIPE_FORMAT_R32G32B32_FLOAT },
261   { PIPE_FORMAT_R8G8B8A8_SNORM,       PIPE_FORMAT_R32G32B32A32_FLOAT },
262   { PIPE_FORMAT_R8_USCALED,           PIPE_FORMAT_R32_FLOAT },
263   { PIPE_FORMAT_R8G8_USCALED,         PIPE_FORMAT_R32G32_FLOAT },
264   { PIPE_FORMAT_R8G8B8_USCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
265   { PIPE_FORMAT_R8G8B8A8_USCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
266   { PIPE_FORMAT_R8_SSCALED,           PIPE_FORMAT_R32_FLOAT },
267   { PIPE_FORMAT_R8G8_SSCALED,         PIPE_FORMAT_R32G32_FLOAT },
268   { PIPE_FORMAT_R8G8B8_SSCALED,       PIPE_FORMAT_R32G32B32_FLOAT },
269   { PIPE_FORMAT_R8G8B8A8_SSCALED,     PIPE_FORMAT_R32G32B32A32_FLOAT },
270};
271
272void u_vbuf_get_caps(struct pipe_screen *screen, struct u_vbuf_caps *caps,
273                     bool needs64b)
274{
275   unsigned i;
276
277   memset(caps, 0, sizeof(*caps));
278
279   /* I'd rather have a bitfield of which formats are supported and a static
280    * table of the translations indexed by format, but since we don't have C99
281    * we can't easily make a sparsely-populated table indexed by format.  So,
282    * we construct the sparse table here.
283    */
284   for (i = 0; i < PIPE_FORMAT_COUNT; i++)
285      caps->format_translation[i] = i;
286
287   for (i = 0; i < ARRAY_SIZE(vbuf_format_fallbacks); i++) {
288      enum pipe_format format = vbuf_format_fallbacks[i].from;
289      unsigned comp_bits = util_format_get_component_bits(format, 0, 0);
290
291      if ((comp_bits > 32) && !needs64b)
292         continue;
293
294      if (!screen->is_format_supported(screen, format, PIPE_BUFFER, 0, 0,
295                                       PIPE_BIND_VERTEX_BUFFER)) {
296         caps->format_translation[format] = vbuf_format_fallbacks[i].to;
297         caps->fallback_always = true;
298      }
299   }
300
301   caps->buffer_offset_unaligned =
302      !screen->get_param(screen,
303                         PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY);
304   caps->buffer_stride_unaligned =
305     !screen->get_param(screen,
306                        PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY);
307   caps->velem_src_offset_unaligned =
308      !screen->get_param(screen,
309                         PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY);
310   caps->attrib_component_unaligned =
311      !screen->get_param(screen,
312                         PIPE_CAP_VERTEX_ATTRIB_ELEMENT_ALIGNED_ONLY);
313   assert(caps->attrib_component_unaligned ||
314          (caps->velem_src_offset_unaligned && caps->buffer_stride_unaligned && caps->buffer_offset_unaligned));
315   caps->user_vertex_buffers =
316      screen->get_param(screen, PIPE_CAP_USER_VERTEX_BUFFERS);
317   caps->max_vertex_buffers =
318      screen->get_param(screen, PIPE_CAP_MAX_VERTEX_BUFFERS);
319
320   if (screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART) ||
321       screen->get_param(screen, PIPE_CAP_PRIMITIVE_RESTART_FIXED_INDEX)) {
322      caps->rewrite_restart_index = screen->get_param(screen, PIPE_CAP_EMULATE_NONFIXED_PRIMITIVE_RESTART);
323      caps->supported_restart_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES_WITH_RESTART);
324      caps->supported_restart_modes |= BITFIELD_BIT(PIPE_PRIM_PATCHES);
325      if (caps->supported_restart_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
326         caps->fallback_always = true;
327      caps->fallback_always |= caps->rewrite_restart_index;
328   }
329   caps->supported_prim_modes = screen->get_param(screen, PIPE_CAP_SUPPORTED_PRIM_MODES);
330   if (caps->supported_prim_modes != BITFIELD_MASK(PIPE_PRIM_MAX))
331      caps->fallback_always = true;
332
333   if (!screen->is_format_supported(screen, PIPE_FORMAT_R8_UINT, PIPE_BUFFER, 0, 0, PIPE_BIND_INDEX_BUFFER))
334      caps->fallback_always = caps->rewrite_ubyte_ibs = true;
335
336   /* OpenGL 2.0 requires a minimum of 16 vertex buffers */
337   if (caps->max_vertex_buffers < 16)
338      caps->fallback_always = true;
339
340   if (!caps->buffer_offset_unaligned ||
341       !caps->buffer_stride_unaligned ||
342       !caps->attrib_component_unaligned ||
343       !caps->velem_src_offset_unaligned)
344      caps->fallback_always = true;
345
346   if (!caps->fallback_always && !caps->user_vertex_buffers)
347      caps->fallback_only_for_user_vbuffers = true;
348}
349
350struct u_vbuf *
351u_vbuf_create(struct pipe_context *pipe, struct u_vbuf_caps *caps)
352{
353   struct u_vbuf *mgr = CALLOC_STRUCT(u_vbuf);
354
355   mgr->caps = *caps;
356   mgr->pipe = pipe;
357   if (caps->rewrite_ubyte_ibs || caps->rewrite_restart_index ||
358       /* require all but patches */
359       ((caps->supported_prim_modes & caps->supported_restart_modes & BITFIELD_MASK(PIPE_PRIM_MAX))) !=
360                                      BITFIELD_MASK(PIPE_PRIM_MAX)) {
361      struct primconvert_config cfg;
362      cfg.fixed_prim_restart = caps->rewrite_restart_index;
363      cfg.primtypes_mask = caps->supported_prim_modes;
364      cfg.restart_primtypes_mask = caps->supported_restart_modes;
365      mgr->pc = util_primconvert_create_config(pipe, &cfg);
366   }
367   mgr->translate_cache = translate_cache_create();
368   memset(mgr->fallback_vbs, ~0, sizeof(mgr->fallback_vbs));
369   mgr->allowed_vb_mask = u_bit_consecutive(0, mgr->caps.max_vertex_buffers);
370
371   mgr->has_signed_vb_offset =
372      pipe->screen->get_param(pipe->screen,
373                              PIPE_CAP_SIGNED_VERTEX_BUFFER_OFFSET);
374
375   cso_cache_init(&mgr->cso_cache, pipe);
376   cso_cache_set_delete_cso_callback(&mgr->cso_cache,
377                                     u_vbuf_delete_vertex_elements, pipe);
378
379   return mgr;
380}
381
382/* u_vbuf uses its own caching for vertex elements, because it needs to keep
383 * its own preprocessed state per vertex element CSO. */
384static struct u_vbuf_elements *
385u_vbuf_set_vertex_elements_internal(struct u_vbuf *mgr,
386                                    const struct cso_velems_state *velems)
387{
388   struct pipe_context *pipe = mgr->pipe;
389   unsigned key_size, hash_key;
390   struct cso_hash_iter iter;
391   struct u_vbuf_elements *ve;
392
393   /* need to include the count into the stored state data too. */
394   key_size = sizeof(struct pipe_vertex_element) * velems->count +
395              sizeof(unsigned);
396   hash_key = cso_construct_key((void*)velems, key_size);
397   iter = cso_find_state_template(&mgr->cso_cache, hash_key, CSO_VELEMENTS,
398                                  (void*)velems, key_size);
399
400   if (cso_hash_iter_is_null(iter)) {
401      struct cso_velements *cso = MALLOC_STRUCT(cso_velements);
402      memcpy(&cso->state, velems, key_size);
403      cso->data = u_vbuf_create_vertex_elements(mgr, velems->count,
404                                                velems->velems);
405
406      iter = cso_insert_state(&mgr->cso_cache, hash_key, CSO_VELEMENTS, cso);
407      ve = cso->data;
408   } else {
409      ve = ((struct cso_velements *)cso_hash_iter_data(iter))->data;
410   }
411
412   assert(ve);
413
414   if (ve != mgr->ve)
415      pipe->bind_vertex_elements_state(pipe, ve->driver_cso);
416
417   return ve;
418}
419
420void u_vbuf_set_vertex_elements(struct u_vbuf *mgr,
421                                const struct cso_velems_state *velems)
422{
423   mgr->ve = u_vbuf_set_vertex_elements_internal(mgr, velems);
424}
425
426void u_vbuf_set_flatshade_first(struct u_vbuf *mgr, bool flatshade_first)
427{
428   mgr->flatshade_first = flatshade_first;
429}
430
431void u_vbuf_unset_vertex_elements(struct u_vbuf *mgr)
432{
433   mgr->ve = NULL;
434}
435
436void u_vbuf_destroy(struct u_vbuf *mgr)
437{
438   struct pipe_screen *screen = mgr->pipe->screen;
439   unsigned i;
440   const unsigned num_vb = screen->get_shader_param(screen, PIPE_SHADER_VERTEX,
441                                                    PIPE_SHADER_CAP_MAX_INPUTS);
442
443   mgr->pipe->set_vertex_buffers(mgr->pipe, 0, 0, num_vb, false, NULL);
444
445   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
446      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[i]);
447   for (i = 0; i < PIPE_MAX_ATTRIBS; i++)
448      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[i]);
449
450   if (mgr->pc)
451      util_primconvert_destroy(mgr->pc);
452
453   translate_cache_destroy(mgr->translate_cache);
454   cso_cache_delete(&mgr->cso_cache);
455   FREE(mgr);
456}
457
458static enum pipe_error
459u_vbuf_translate_buffers(struct u_vbuf *mgr, struct translate_key *key,
460                         const struct pipe_draw_info *info,
461                         const struct pipe_draw_start_count_bias *draw,
462                         unsigned vb_mask, unsigned out_vb,
463                         int start_vertex, unsigned num_vertices,
464                         int min_index, boolean unroll_indices)
465{
466   struct translate *tr;
467   struct pipe_transfer *vb_transfer[PIPE_MAX_ATTRIBS] = {0};
468   struct pipe_resource *out_buffer = NULL;
469   uint8_t *out_map;
470   unsigned out_offset, mask;
471
472   /* Get a translate object. */
473   tr = translate_cache_find(mgr->translate_cache, key);
474
475   /* Map buffers we want to translate. */
476   mask = vb_mask;
477   while (mask) {
478      struct pipe_vertex_buffer *vb;
479      unsigned offset;
480      uint8_t *map;
481      unsigned i = u_bit_scan(&mask);
482
483      vb = &mgr->vertex_buffer[i];
484      offset = vb->buffer_offset + vb->stride * start_vertex;
485
486      if (vb->is_user_buffer) {
487         map = (uint8_t*)vb->buffer.user + offset;
488      } else {
489         unsigned size = vb->stride ? num_vertices * vb->stride
490                                    : sizeof(double)*4;
491
492         if (!vb->buffer.resource) {
493            static uint64_t dummy_buf[4] = { 0 };
494            tr->set_buffer(tr, i, dummy_buf, 0, 0);
495            continue;
496         }
497
498         if (vb->stride) {
499            /* the stride cannot be used to calculate the map size of the buffer,
500             * as it only determines the bytes between elements, not the size of elements
501             * themselves, meaning that if stride < element_size, the mapped size will
502             * be too small and conversion will overrun the map buffer
503             *
504             * instead, add the size of the largest possible attribute to the final attribute's offset
505             * in order to ensure the map is large enough
506             */
507            unsigned last_offset = size - vb->stride;
508            size = MAX2(size, last_offset + sizeof(double)*4);
509         }
510
511         if (offset + size > vb->buffer.resource->width0) {
512            /* Don't try to map past end of buffer.  This often happens when
513             * we're translating an attribute that's at offset > 0 from the
514             * start of the vertex.  If we'd subtract attrib's offset from
515             * the size, this probably wouldn't happen.
516             */
517            size = vb->buffer.resource->width0 - offset;
518
519            /* Also adjust num_vertices.  A common user error is to call
520             * glDrawRangeElements() with incorrect 'end' argument.  The 'end
521             * value should be the max index value, but people often
522             * accidentally add one to this value.  This adjustment avoids
523             * crashing (by reading past the end of a hardware buffer mapping)
524             * when people do that.
525             */
526            num_vertices = (size + vb->stride - 1) / vb->stride;
527         }
528
529         map = pipe_buffer_map_range(mgr->pipe, vb->buffer.resource, offset, size,
530                                     PIPE_MAP_READ, &vb_transfer[i]);
531      }
532
533      /* Subtract min_index so that indexing with the index buffer works. */
534      if (unroll_indices) {
535         map -= (ptrdiff_t)vb->stride * min_index;
536      }
537
538      tr->set_buffer(tr, i, map, vb->stride, info->max_index);
539   }
540
541   /* Translate. */
542   if (unroll_indices) {
543      struct pipe_transfer *transfer = NULL;
544      const unsigned offset = draw->start * info->index_size;
545      uint8_t *map;
546
547      /* Create and map the output buffer. */
548      u_upload_alloc(mgr->pipe->stream_uploader, 0,
549                     key->output_stride * draw->count, 4,
550                     &out_offset, &out_buffer,
551                     (void**)&out_map);
552      if (!out_buffer)
553         return PIPE_ERROR_OUT_OF_MEMORY;
554
555      if (info->has_user_indices) {
556         map = (uint8_t*)info->index.user + offset;
557      } else {
558         map = pipe_buffer_map_range(mgr->pipe, info->index.resource, offset,
559                                     draw->count * info->index_size,
560                                     PIPE_MAP_READ, &transfer);
561      }
562
563      switch (info->index_size) {
564      case 4:
565         tr->run_elts(tr, (unsigned*)map, draw->count, 0, 0, out_map);
566         break;
567      case 2:
568         tr->run_elts16(tr, (uint16_t*)map, draw->count, 0, 0, out_map);
569         break;
570      case 1:
571         tr->run_elts8(tr, map, draw->count, 0, 0, out_map);
572         break;
573      }
574
575      if (transfer) {
576         pipe_buffer_unmap(mgr->pipe, transfer);
577      }
578   } else {
579      /* Create and map the output buffer. */
580      u_upload_alloc(mgr->pipe->stream_uploader,
581                     mgr->has_signed_vb_offset ?
582                        0 : key->output_stride * start_vertex,
583                     key->output_stride * num_vertices, 4,
584                     &out_offset, &out_buffer,
585                     (void**)&out_map);
586      if (!out_buffer)
587         return PIPE_ERROR_OUT_OF_MEMORY;
588
589      out_offset -= key->output_stride * start_vertex;
590
591      tr->run(tr, 0, num_vertices, 0, 0, out_map);
592   }
593
594   /* Unmap all buffers. */
595   mask = vb_mask;
596   while (mask) {
597      unsigned i = u_bit_scan(&mask);
598
599      if (vb_transfer[i]) {
600         pipe_buffer_unmap(mgr->pipe, vb_transfer[i]);
601      }
602   }
603
604   /* Setup the new vertex buffer. */
605   mgr->real_vertex_buffer[out_vb].buffer_offset = out_offset;
606   mgr->real_vertex_buffer[out_vb].stride = key->output_stride;
607
608   /* Move the buffer reference. */
609   pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[out_vb]);
610   mgr->real_vertex_buffer[out_vb].buffer.resource = out_buffer;
611   mgr->real_vertex_buffer[out_vb].is_user_buffer = false;
612
613   return PIPE_OK;
614}
615
616static boolean
617u_vbuf_translate_find_free_vb_slots(struct u_vbuf *mgr,
618                                    unsigned mask[VB_NUM])
619{
620   unsigned type;
621   unsigned fallback_vbs[VB_NUM];
622   /* Set the bit for each buffer which is incompatible, or isn't set. */
623   uint32_t unused_vb_mask =
624      mgr->ve->incompatible_vb_mask_all | mgr->incompatible_vb_mask |
625      ~mgr->enabled_vb_mask;
626   uint32_t unused_vb_mask_orig;
627   boolean insufficient_buffers = false;
628
629   /* No vertex buffers available at all */
630   if (!unused_vb_mask)
631      return FALSE;
632
633   memset(fallback_vbs, ~0, sizeof(fallback_vbs));
634   mgr->fallback_vbs_mask = 0;
635
636   /* Find free slots for each type if needed. */
637   unused_vb_mask_orig = unused_vb_mask;
638   for (type = 0; type < VB_NUM; type++) {
639      if (mask[type]) {
640         uint32_t index;
641
642         if (!unused_vb_mask) {
643            insufficient_buffers = true;
644            break;
645         }
646
647         index = ffs(unused_vb_mask) - 1;
648         fallback_vbs[type] = index;
649         mgr->fallback_vbs_mask |= 1 << index;
650         unused_vb_mask &= ~(1 << index);
651         /*printf("found slot=%i for type=%i\n", index, type);*/
652      }
653   }
654
655   if (insufficient_buffers) {
656      /* not enough vbs for all types supported by the hardware, they will have to share one
657       * buffer */
658      uint32_t index = ffs(unused_vb_mask_orig) - 1;
659      /* When sharing one vertex buffer use per-vertex frequency for everything. */
660      fallback_vbs[VB_VERTEX] = index;
661      mgr->fallback_vbs_mask = 1 << index;
662      mask[VB_VERTEX] = mask[VB_VERTEX] | mask[VB_CONST] | mask[VB_INSTANCE];
663      mask[VB_CONST] = 0;
664      mask[VB_INSTANCE] = 0;
665   }
666
667   for (type = 0; type < VB_NUM; type++) {
668      if (mask[type]) {
669         mgr->dirty_real_vb_mask |= 1 << fallback_vbs[type];
670      }
671   }
672
673   memcpy(mgr->fallback_vbs, fallback_vbs, sizeof(fallback_vbs));
674   return TRUE;
675}
676
677static boolean
678u_vbuf_translate_begin(struct u_vbuf *mgr,
679                       const struct pipe_draw_info *info,
680                       const struct pipe_draw_start_count_bias *draw,
681                       int start_vertex, unsigned num_vertices,
682                       int min_index, boolean unroll_indices,
683                       uint32_t misaligned)
684{
685   unsigned mask[VB_NUM] = {0};
686   struct translate_key key[VB_NUM];
687   unsigned elem_index[VB_NUM][PIPE_MAX_ATTRIBS]; /* ... into key.elements */
688   unsigned i, type;
689   const unsigned incompatible_vb_mask = (misaligned | mgr->incompatible_vb_mask) &
690                                         mgr->ve->used_vb_mask;
691
692   const int start[VB_NUM] = {
693      start_vertex,           /* VERTEX */
694      info->start_instance,   /* INSTANCE */
695      0                       /* CONST */
696   };
697
698   const unsigned num[VB_NUM] = {
699      num_vertices,           /* VERTEX */
700      info->instance_count,   /* INSTANCE */
701      1                       /* CONST */
702   };
703
704   memset(key, 0, sizeof(key));
705   memset(elem_index, ~0, sizeof(elem_index));
706
707   /* See if there are vertex attribs of each type to translate and
708    * which ones. */
709   for (i = 0; i < mgr->ve->count; i++) {
710      unsigned vb_index = mgr->ve->ve[i].vertex_buffer_index;
711
712      if (!mgr->vertex_buffer[vb_index].stride) {
713         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
714             !(incompatible_vb_mask & (1 << vb_index))) {
715            continue;
716         }
717         mask[VB_CONST] |= 1 << vb_index;
718      } else if (mgr->ve->ve[i].instance_divisor) {
719         if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
720             !(incompatible_vb_mask & (1 << vb_index))) {
721            continue;
722         }
723         mask[VB_INSTANCE] |= 1 << vb_index;
724      } else {
725         if (!unroll_indices &&
726             !(mgr->ve->incompatible_elem_mask & (1 << i)) &&
727             !(incompatible_vb_mask & (1 << vb_index))) {
728            continue;
729         }
730         mask[VB_VERTEX] |= 1 << vb_index;
731      }
732   }
733
734   assert(mask[VB_VERTEX] || mask[VB_INSTANCE] || mask[VB_CONST]);
735
736   /* Find free vertex buffer slots. */
737   if (!u_vbuf_translate_find_free_vb_slots(mgr, mask)) {
738      return FALSE;
739   }
740
741   unsigned min_alignment[VB_NUM] = {0};
742   /* Initialize the translate keys. */
743   for (i = 0; i < mgr->ve->count; i++) {
744      struct translate_key *k;
745      struct translate_element *te;
746      enum pipe_format output_format = mgr->ve->native_format[i];
747      unsigned bit, vb_index = mgr->ve->ve[i].vertex_buffer_index;
748      bit = 1 << vb_index;
749
750      if (!(mgr->ve->incompatible_elem_mask & (1 << i)) &&
751          !(incompatible_vb_mask & (1 << vb_index)) &&
752          (!unroll_indices || !(mask[VB_VERTEX] & bit))) {
753         continue;
754      }
755
756      /* Set type to what we will translate.
757       * Whether vertex, instance, or constant attribs. */
758      for (type = 0; type < VB_NUM; type++) {
759         if (mask[type] & bit) {
760            break;
761         }
762      }
763      assert(type < VB_NUM);
764      if (mgr->ve->ve[i].src_format != output_format)
765         assert(translate_is_output_format_supported(output_format));
766      /*printf("velem=%i type=%i\n", i, type);*/
767
768      /* Add the vertex element. */
769      k = &key[type];
770      elem_index[type][i] = k->nr_elements;
771
772      te = &k->element[k->nr_elements];
773      te->type = TRANSLATE_ELEMENT_NORMAL;
774      te->instance_divisor = 0;
775      te->input_buffer = vb_index;
776      te->input_format = mgr->ve->ve[i].src_format;
777      te->input_offset = mgr->ve->ve[i].src_offset;
778      te->output_format = output_format;
779      te->output_offset = k->output_stride;
780      unsigned adjustment = 0;
781      if (!mgr->caps.attrib_component_unaligned &&
782          te->output_offset % mgr->ve->component_size[i] != 0) {
783         unsigned aligned = align(te->output_offset, mgr->ve->component_size[i]);
784         adjustment = aligned - te->output_offset;
785         te->output_offset = aligned;
786      }
787
788      k->output_stride += mgr->ve->native_format_size[i] + adjustment;
789      k->nr_elements++;
790      min_alignment[type] = MAX2(min_alignment[type], mgr->ve->component_size[i]);
791   }
792
793   /* Translate buffers. */
794   for (type = 0; type < VB_NUM; type++) {
795      if (key[type].nr_elements) {
796         enum pipe_error err;
797         if (!mgr->caps.attrib_component_unaligned)
798            key[type].output_stride = align(key[type].output_stride, min_alignment[type]);
799         err = u_vbuf_translate_buffers(mgr, &key[type], info, draw,
800                                        mask[type], mgr->fallback_vbs[type],
801                                        start[type], num[type], min_index,
802                                        unroll_indices && type == VB_VERTEX);
803         if (err != PIPE_OK)
804            return FALSE;
805
806         /* Fixup the stride for constant attribs. */
807         if (type == VB_CONST) {
808            mgr->real_vertex_buffer[mgr->fallback_vbs[VB_CONST]].stride = 0;
809         }
810      }
811   }
812
813   /* Setup new vertex elements. */
814   for (i = 0; i < mgr->ve->count; i++) {
815      for (type = 0; type < VB_NUM; type++) {
816         if (elem_index[type][i] < key[type].nr_elements) {
817            struct translate_element *te = &key[type].element[elem_index[type][i]];
818            mgr->fallback_velems.velems[i].instance_divisor = mgr->ve->ve[i].instance_divisor;
819            mgr->fallback_velems.velems[i].src_format = te->output_format;
820            mgr->fallback_velems.velems[i].src_offset = te->output_offset;
821            mgr->fallback_velems.velems[i].vertex_buffer_index = mgr->fallback_vbs[type];
822
823            /* elem_index[type][i] can only be set for one type. */
824            assert(type > VB_INSTANCE || elem_index[type+1][i] == ~0u);
825            assert(type > VB_VERTEX   || elem_index[type+2][i] == ~0u);
826            break;
827         }
828      }
829      /* No translating, just copy the original vertex element over. */
830      if (type == VB_NUM) {
831         memcpy(&mgr->fallback_velems.velems[i], &mgr->ve->ve[i],
832                sizeof(struct pipe_vertex_element));
833      }
834   }
835
836   mgr->fallback_velems.count = mgr->ve->count;
837
838   u_vbuf_set_vertex_elements_internal(mgr, &mgr->fallback_velems);
839   mgr->using_translate = TRUE;
840   return TRUE;
841}
842
843static void u_vbuf_translate_end(struct u_vbuf *mgr)
844{
845   unsigned i;
846
847   /* Restore vertex elements. */
848   mgr->pipe->bind_vertex_elements_state(mgr->pipe, mgr->ve->driver_cso);
849   mgr->using_translate = FALSE;
850
851   /* Unreference the now-unused VBOs. */
852   for (i = 0; i < VB_NUM; i++) {
853      unsigned vb = mgr->fallback_vbs[i];
854      if (vb != ~0u) {
855         pipe_resource_reference(&mgr->real_vertex_buffer[vb].buffer.resource, NULL);
856         mgr->fallback_vbs[i] = ~0;
857      }
858   }
859   /* This will cause the buffer to be unbound in the driver later. */
860   mgr->dirty_real_vb_mask |= mgr->fallback_vbs_mask;
861   mgr->fallback_vbs_mask = 0;
862}
863
864static void *
865u_vbuf_create_vertex_elements(struct u_vbuf *mgr, unsigned count,
866                              const struct pipe_vertex_element *attribs)
867{
868   struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS];
869   util_lower_uint64_vertex_elements(&attribs, &count, tmp);
870
871   struct pipe_context *pipe = mgr->pipe;
872   unsigned i;
873   struct pipe_vertex_element driver_attribs[PIPE_MAX_ATTRIBS];
874   struct u_vbuf_elements *ve = CALLOC_STRUCT(u_vbuf_elements);
875   uint32_t used_buffers = 0;
876
877   ve->count = count;
878
879   memcpy(ve->ve, attribs, sizeof(struct pipe_vertex_element) * count);
880   memcpy(driver_attribs, attribs, sizeof(struct pipe_vertex_element) * count);
881
882   /* Set the best native format in case the original format is not
883    * supported. */
884   for (i = 0; i < count; i++) {
885      enum pipe_format format = ve->ve[i].src_format;
886      unsigned vb_index_bit = 1 << ve->ve[i].vertex_buffer_index;
887
888      ve->src_format_size[i] = util_format_get_blocksize(format);
889
890      if (used_buffers & vb_index_bit)
891         ve->interleaved_vb_mask |= vb_index_bit;
892
893      used_buffers |= vb_index_bit;
894
895      if (!ve->ve[i].instance_divisor) {
896         ve->noninstance_vb_mask_any |= vb_index_bit;
897      }
898
899      format = mgr->caps.format_translation[format];
900
901      driver_attribs[i].src_format = format;
902      ve->native_format[i] = format;
903      ve->native_format_size[i] =
904            util_format_get_blocksize(ve->native_format[i]);
905
906      const struct util_format_description *desc = util_format_description(format);
907      bool is_packed = false;
908      for (unsigned c = 0; c < desc->nr_channels; c++)
909         is_packed |= desc->channel[c].size != desc->channel[0].size || desc->channel[c].size % 8 != 0;
910      unsigned component_size = is_packed ?
911                                ve->native_format_size[i] : (ve->native_format_size[i] / desc->nr_channels);
912      ve->component_size[i] = component_size;
913
914      if (ve->ve[i].src_format != format ||
915          (!mgr->caps.velem_src_offset_unaligned &&
916           ve->ve[i].src_offset % 4 != 0) ||
917          (!mgr->caps.attrib_component_unaligned &&
918           ve->ve[i].src_offset % component_size != 0)) {
919         ve->incompatible_elem_mask |= 1 << i;
920         ve->incompatible_vb_mask_any |= vb_index_bit;
921      } else {
922         ve->compatible_vb_mask_any |= vb_index_bit;
923         if (component_size == 2)
924            ve->vb_align_mask[0] |= vb_index_bit;
925         else if (component_size == 4)
926            ve->vb_align_mask[1] |= vb_index_bit;
927      }
928   }
929
930   if (used_buffers & ~mgr->allowed_vb_mask) {
931      /* More vertex buffers are used than the hardware supports.  In
932       * principle, we only need to make sure that less vertex buffers are
933       * used, and mark some of the latter vertex buffers as incompatible.
934       * For now, mark all vertex buffers as incompatible.
935       */
936      ve->incompatible_vb_mask_any = used_buffers;
937      ve->compatible_vb_mask_any = 0;
938      ve->incompatible_elem_mask = u_bit_consecutive(0, count);
939   }
940
941   ve->used_vb_mask = used_buffers;
942   ve->compatible_vb_mask_all = ~ve->incompatible_vb_mask_any & used_buffers;
943   ve->incompatible_vb_mask_all = ~ve->compatible_vb_mask_any & used_buffers;
944
945   /* Align the formats and offsets to the size of DWORD if needed. */
946   if (!mgr->caps.velem_src_offset_unaligned) {
947      for (i = 0; i < count; i++) {
948         ve->native_format_size[i] = align(ve->native_format_size[i], 4);
949         driver_attribs[i].src_offset = align(ve->ve[i].src_offset, 4);
950      }
951   }
952
953   /* Only create driver CSO if no incompatible elements */
954   if (!ve->incompatible_elem_mask) {
955      ve->driver_cso =
956         pipe->create_vertex_elements_state(pipe, count, driver_attribs);
957   }
958
959   return ve;
960}
961
962static void u_vbuf_delete_vertex_elements(void *ctx, void *state,
963                                          enum cso_cache_type type)
964{
965   struct pipe_context *pipe = (struct pipe_context*)ctx;
966   struct cso_velements *cso = (struct cso_velements*)state;
967   struct u_vbuf_elements *ve = (struct u_vbuf_elements*)cso->data;
968
969   if (ve->driver_cso)
970      pipe->delete_vertex_elements_state(pipe, ve->driver_cso);
971   FREE(ve);
972   FREE(cso);
973}
974
975void u_vbuf_set_vertex_buffers(struct u_vbuf *mgr,
976                               unsigned start_slot, unsigned count,
977                               unsigned unbind_num_trailing_slots,
978                               bool take_ownership,
979                               const struct pipe_vertex_buffer *bufs)
980{
981   unsigned i;
982   /* which buffers are enabled */
983   uint32_t enabled_vb_mask = 0;
984   /* which buffers are in user memory */
985   uint32_t user_vb_mask = 0;
986   /* which buffers are incompatible with the driver */
987   uint32_t incompatible_vb_mask = 0;
988   /* which buffers have a non-zero stride */
989   uint32_t nonzero_stride_vb_mask = 0;
990   /* which buffers are unaligned to 2/4 bytes */
991   uint32_t unaligned_vb_mask[2] = {0};
992   uint32_t mask =
993      ~(((1ull << (count + unbind_num_trailing_slots)) - 1) << start_slot);
994
995   if (!bufs) {
996      struct pipe_context *pipe = mgr->pipe;
997      /* Unbind. */
998      unsigned total_count = count + unbind_num_trailing_slots;
999      mgr->dirty_real_vb_mask &= mask;
1000
1001      /* Zero out the bits we are going to rewrite completely. */
1002      mgr->user_vb_mask &= mask;
1003      mgr->incompatible_vb_mask &= mask;
1004      mgr->nonzero_stride_vb_mask &= mask;
1005      mgr->enabled_vb_mask &= mask;
1006      mgr->unaligned_vb_mask[0] &= mask;
1007      mgr->unaligned_vb_mask[1] &= mask;
1008
1009      for (i = 0; i < total_count; i++) {
1010         unsigned dst_index = start_slot + i;
1011
1012         pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
1013         pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
1014      }
1015
1016      pipe->set_vertex_buffers(pipe, start_slot, count,
1017                               unbind_num_trailing_slots, false, NULL);
1018      return;
1019   }
1020
1021   for (i = 0; i < count; i++) {
1022      unsigned dst_index = start_slot + i;
1023      const struct pipe_vertex_buffer *vb = &bufs[i];
1024      struct pipe_vertex_buffer *orig_vb = &mgr->vertex_buffer[dst_index];
1025      struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[dst_index];
1026
1027      if (!vb->buffer.resource) {
1028         pipe_vertex_buffer_unreference(orig_vb);
1029         pipe_vertex_buffer_unreference(real_vb);
1030         continue;
1031      }
1032
1033      bool not_user = !vb->is_user_buffer && vb->is_user_buffer == orig_vb->is_user_buffer;
1034      /* struct isn't tightly packed: do not use memcmp */
1035      if (not_user && orig_vb->stride == vb->stride &&
1036          orig_vb->buffer_offset == vb->buffer_offset && orig_vb->buffer.resource == vb->buffer.resource) {
1037         mask |= BITFIELD_BIT(dst_index);
1038         if (take_ownership) {
1039             pipe_vertex_buffer_unreference(orig_vb);
1040             /* the pointer was unset in the line above, so copy it back */
1041             orig_vb->buffer.resource = vb->buffer.resource;
1042         }
1043         if (mask == UINT32_MAX)
1044            return;
1045         continue;
1046      }
1047
1048      if (take_ownership) {
1049         pipe_vertex_buffer_unreference(orig_vb);
1050         memcpy(orig_vb, vb, sizeof(*vb));
1051      } else {
1052         pipe_vertex_buffer_reference(orig_vb, vb);
1053      }
1054
1055      if (vb->stride) {
1056         nonzero_stride_vb_mask |= 1 << dst_index;
1057      }
1058      enabled_vb_mask |= 1 << dst_index;
1059
1060      if ((!mgr->caps.buffer_offset_unaligned && vb->buffer_offset % 4 != 0) ||
1061          (!mgr->caps.buffer_stride_unaligned && vb->stride % 4 != 0)) {
1062         incompatible_vb_mask |= 1 << dst_index;
1063         real_vb->buffer_offset = vb->buffer_offset;
1064         real_vb->stride = vb->stride;
1065         pipe_vertex_buffer_unreference(real_vb);
1066         real_vb->is_user_buffer = false;
1067         continue;
1068      }
1069
1070      if (!mgr->caps.attrib_component_unaligned) {
1071         if (vb->buffer_offset % 2 != 0 || vb->stride % 2 != 0)
1072            unaligned_vb_mask[0] |= BITFIELD_BIT(dst_index);
1073         if (vb->buffer_offset % 4 != 0 || vb->stride % 4 != 0)
1074            unaligned_vb_mask[1] |= BITFIELD_BIT(dst_index);
1075      }
1076
1077      if (!mgr->caps.user_vertex_buffers && vb->is_user_buffer) {
1078         user_vb_mask |= 1 << dst_index;
1079         real_vb->buffer_offset = vb->buffer_offset;
1080         real_vb->stride = vb->stride;
1081         pipe_vertex_buffer_unreference(real_vb);
1082         real_vb->is_user_buffer = false;
1083         continue;
1084      }
1085
1086      pipe_vertex_buffer_reference(real_vb, vb);
1087   }
1088
1089   for (i = 0; i < unbind_num_trailing_slots; i++) {
1090      unsigned dst_index = start_slot + count + i;
1091
1092      pipe_vertex_buffer_unreference(&mgr->vertex_buffer[dst_index]);
1093      pipe_vertex_buffer_unreference(&mgr->real_vertex_buffer[dst_index]);
1094   }
1095
1096
1097   /* Zero out the bits we are going to rewrite completely. */
1098   mgr->user_vb_mask &= mask;
1099   mgr->incompatible_vb_mask &= mask;
1100   mgr->nonzero_stride_vb_mask &= mask;
1101   mgr->enabled_vb_mask &= mask;
1102   mgr->unaligned_vb_mask[0] &= mask;
1103   mgr->unaligned_vb_mask[1] &= mask;
1104
1105   mgr->user_vb_mask |= user_vb_mask;
1106   mgr->incompatible_vb_mask |= incompatible_vb_mask;
1107   mgr->nonzero_stride_vb_mask |= nonzero_stride_vb_mask;
1108   mgr->enabled_vb_mask |= enabled_vb_mask;
1109   mgr->unaligned_vb_mask[0] |= unaligned_vb_mask[0];
1110   mgr->unaligned_vb_mask[1] |= unaligned_vb_mask[1];
1111
1112   /* All changed buffers are marked as dirty, even the NULL ones,
1113    * which will cause the NULL buffers to be unbound in the driver later. */
1114   mgr->dirty_real_vb_mask |= ~mask;
1115}
1116
1117static ALWAYS_INLINE bool
1118get_upload_offset_size(struct u_vbuf *mgr,
1119                       const struct pipe_vertex_buffer *vb,
1120                       struct u_vbuf_elements *ve,
1121                       const struct pipe_vertex_element *velem,
1122                       unsigned vb_index, unsigned velem_index,
1123                       int start_vertex, unsigned num_vertices,
1124                       int start_instance, unsigned num_instances,
1125                       unsigned *offset, unsigned *size)
1126{
1127   /* Skip the buffers generated by translate. */
1128   if ((1 << vb_index) & mgr->fallback_vbs_mask || !vb->is_user_buffer)
1129      return false;
1130
1131   unsigned instance_div = velem->instance_divisor;
1132   *offset = vb->buffer_offset + velem->src_offset;
1133
1134   if (!vb->stride) {
1135      /* Constant attrib. */
1136      *size = ve->src_format_size[velem_index];
1137   } else if (instance_div) {
1138      /* Per-instance attrib. */
1139
1140      /* Figure out how many instances we'll render given instance_div.  We
1141       * can't use the typical div_round_up() pattern because the CTS uses
1142       * instance_div = ~0 for a test, which overflows div_round_up()'s
1143       * addition.
1144       */
1145      unsigned count = num_instances / instance_div;
1146      if (count * instance_div != num_instances)
1147         count++;
1148
1149      *offset += vb->stride * start_instance;
1150      *size = vb->stride * (count - 1) + ve->src_format_size[velem_index];
1151   } else {
1152      /* Per-vertex attrib. */
1153      *offset += vb->stride * start_vertex;
1154      *size = vb->stride * (num_vertices - 1) + ve->src_format_size[velem_index];
1155   }
1156   return true;
1157}
1158
1159
1160static enum pipe_error
1161u_vbuf_upload_buffers(struct u_vbuf *mgr,
1162                      int start_vertex, unsigned num_vertices,
1163                      int start_instance, unsigned num_instances)
1164{
1165   unsigned i;
1166   struct u_vbuf_elements *ve = mgr->ve;
1167   unsigned nr_velems = ve->count;
1168   const struct pipe_vertex_element *velems =
1169         mgr->using_translate ? mgr->fallback_velems.velems : ve->ve;
1170
1171   /* Faster path when no vertex attribs are interleaved. */
1172   if ((ve->interleaved_vb_mask & mgr->user_vb_mask) == 0) {
1173      for (i = 0; i < nr_velems; i++) {
1174         const struct pipe_vertex_element *velem = &velems[i];
1175         unsigned index = velem->vertex_buffer_index;
1176         struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
1177         unsigned offset, size;
1178
1179         if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
1180                                     num_vertices, start_instance, num_instances,
1181                                     &offset, &size))
1182            continue;
1183
1184         struct pipe_vertex_buffer *real_vb = &mgr->real_vertex_buffer[index];
1185         const uint8_t *ptr = mgr->vertex_buffer[index].buffer.user;
1186
1187         u_upload_data(mgr->pipe->stream_uploader,
1188                       mgr->has_signed_vb_offset ? 0 : offset,
1189                       size, 4, ptr + offset, &real_vb->buffer_offset,
1190                       &real_vb->buffer.resource);
1191         if (!real_vb->buffer.resource)
1192            return PIPE_ERROR_OUT_OF_MEMORY;
1193
1194         real_vb->buffer_offset -= offset;
1195      }
1196      return PIPE_OK;
1197   }
1198
1199   unsigned start_offset[PIPE_MAX_ATTRIBS];
1200   unsigned end_offset[PIPE_MAX_ATTRIBS];
1201   uint32_t buffer_mask = 0;
1202
1203   /* Slower path supporting interleaved vertex attribs using 2 loops. */
1204   /* Determine how much data needs to be uploaded. */
1205   for (i = 0; i < nr_velems; i++) {
1206      const struct pipe_vertex_element *velem = &velems[i];
1207      unsigned index = velem->vertex_buffer_index;
1208      struct pipe_vertex_buffer *vb = &mgr->vertex_buffer[index];
1209      unsigned first, size, index_bit;
1210
1211      if (!get_upload_offset_size(mgr, vb, ve, velem, index, i, start_vertex,
1212                                  num_vertices, start_instance, num_instances,
1213                                  &first, &size))
1214         continue;
1215
1216      index_bit = 1 << index;
1217
1218      /* Update offsets. */
1219      if (!(buffer_mask & index_bit)) {
1220         start_offset[index] = first;
1221         end_offset[index] = first + size;
1222      } else {
1223         if (first < start_offset[index])
1224            start_offset[index] = first;
1225         if (first + size > end_offset[index])
1226            end_offset[index] = first + size;
1227      }
1228
1229      buffer_mask |= index_bit;
1230   }
1231
1232   /* Upload buffers. */
1233   while (buffer_mask) {
1234      unsigned start, end;
1235      struct pipe_vertex_buffer *real_vb;
1236      const uint8_t *ptr;
1237
1238      i = u_bit_scan(&buffer_mask);
1239
1240      start = start_offset[i];
1241      end = end_offset[i];
1242      assert(start < end);
1243
1244      real_vb = &mgr->real_vertex_buffer[i];
1245      ptr = mgr->vertex_buffer[i].buffer.user;
1246
1247      u_upload_data(mgr->pipe->stream_uploader,
1248                    mgr->has_signed_vb_offset ? 0 : start,
1249                    end - start, 4,
1250                    ptr + start, &real_vb->buffer_offset, &real_vb->buffer.resource);
1251      if (!real_vb->buffer.resource)
1252         return PIPE_ERROR_OUT_OF_MEMORY;
1253
1254      real_vb->buffer_offset -= start;
1255   }
1256
1257   return PIPE_OK;
1258}
1259
1260static boolean u_vbuf_need_minmax_index(const struct u_vbuf *mgr, uint32_t misaligned)
1261{
1262   /* See if there are any per-vertex attribs which will be uploaded or
1263    * translated. Use bitmasks to get the info instead of looping over vertex
1264    * elements. */
1265   return (mgr->ve->used_vb_mask &
1266           ((mgr->user_vb_mask |
1267             mgr->incompatible_vb_mask |
1268             misaligned |
1269             mgr->ve->incompatible_vb_mask_any) &
1270            mgr->ve->noninstance_vb_mask_any &
1271            mgr->nonzero_stride_vb_mask)) != 0;
1272}
1273
1274static boolean u_vbuf_mapping_vertex_buffer_blocks(const struct u_vbuf *mgr, uint32_t misaligned)
1275{
1276   /* Return true if there are hw buffers which don't need to be translated.
1277    *
1278    * We could query whether each buffer is busy, but that would
1279    * be way more costly than this. */
1280   return (mgr->ve->used_vb_mask &
1281           (~mgr->user_vb_mask &
1282            ~mgr->incompatible_vb_mask &
1283            ~misaligned &
1284            mgr->ve->compatible_vb_mask_all &
1285            mgr->ve->noninstance_vb_mask_any &
1286            mgr->nonzero_stride_vb_mask)) != 0;
1287}
1288
1289static void
1290u_vbuf_get_minmax_index_mapped(const struct pipe_draw_info *info,
1291                               unsigned count,
1292                               const void *indices, unsigned *out_min_index,
1293                               unsigned *out_max_index)
1294{
1295   if (!count) {
1296      *out_min_index = 0;
1297      *out_max_index = 0;
1298      return;
1299   }
1300
1301   switch (info->index_size) {
1302   case 4: {
1303      const unsigned *ui_indices = (const unsigned*)indices;
1304      unsigned max = 0;
1305      unsigned min = ~0u;
1306      if (info->primitive_restart) {
1307         for (unsigned i = 0; i < count; i++) {
1308            if (ui_indices[i] != info->restart_index) {
1309               if (ui_indices[i] > max) max = ui_indices[i];
1310               if (ui_indices[i] < min) min = ui_indices[i];
1311            }
1312         }
1313      }
1314      else {
1315         for (unsigned i = 0; i < count; i++) {
1316            if (ui_indices[i] > max) max = ui_indices[i];
1317            if (ui_indices[i] < min) min = ui_indices[i];
1318         }
1319      }
1320      *out_min_index = min;
1321      *out_max_index = max;
1322      break;
1323   }
1324   case 2: {
1325      const unsigned short *us_indices = (const unsigned short*)indices;
1326      unsigned short max = 0;
1327      unsigned short min = ~((unsigned short)0);
1328      if (info->primitive_restart) {
1329         for (unsigned i = 0; i < count; i++) {
1330            if (us_indices[i] != info->restart_index) {
1331               if (us_indices[i] > max) max = us_indices[i];
1332               if (us_indices[i] < min) min = us_indices[i];
1333            }
1334         }
1335      }
1336      else {
1337         for (unsigned i = 0; i < count; i++) {
1338            if (us_indices[i] > max) max = us_indices[i];
1339            if (us_indices[i] < min) min = us_indices[i];
1340         }
1341      }
1342      *out_min_index = min;
1343      *out_max_index = max;
1344      break;
1345   }
1346   case 1: {
1347      const unsigned char *ub_indices = (const unsigned char*)indices;
1348      unsigned char max = 0;
1349      unsigned char min = ~((unsigned char)0);
1350      if (info->primitive_restart) {
1351         for (unsigned i = 0; i < count; i++) {
1352            if (ub_indices[i] != info->restart_index) {
1353               if (ub_indices[i] > max) max = ub_indices[i];
1354               if (ub_indices[i] < min) min = ub_indices[i];
1355            }
1356         }
1357      }
1358      else {
1359         for (unsigned i = 0; i < count; i++) {
1360            if (ub_indices[i] > max) max = ub_indices[i];
1361            if (ub_indices[i] < min) min = ub_indices[i];
1362         }
1363      }
1364      *out_min_index = min;
1365      *out_max_index = max;
1366      break;
1367   }
1368   default:
1369      unreachable("bad index size");
1370   }
1371}
1372
1373void u_vbuf_get_minmax_index(struct pipe_context *pipe,
1374                             const struct pipe_draw_info *info,
1375                             const struct pipe_draw_start_count_bias *draw,
1376                             unsigned *out_min_index, unsigned *out_max_index)
1377{
1378   struct pipe_transfer *transfer = NULL;
1379   const void *indices;
1380
1381   if (info->has_user_indices) {
1382      indices = (uint8_t*)info->index.user +
1383                draw->start * info->index_size;
1384   } else {
1385      indices = pipe_buffer_map_range(pipe, info->index.resource,
1386                                      draw->start * info->index_size,
1387                                      draw->count * info->index_size,
1388                                      PIPE_MAP_READ, &transfer);
1389   }
1390
1391   u_vbuf_get_minmax_index_mapped(info, draw->count, indices,
1392                                  out_min_index, out_max_index);
1393
1394   if (transfer) {
1395      pipe_buffer_unmap(pipe, transfer);
1396   }
1397}
1398
1399static void u_vbuf_set_driver_vertex_buffers(struct u_vbuf *mgr)
1400{
1401   struct pipe_context *pipe = mgr->pipe;
1402   unsigned start_slot, count;
1403
1404   start_slot = ffs(mgr->dirty_real_vb_mask) - 1;
1405   count = util_last_bit(mgr->dirty_real_vb_mask >> start_slot);
1406
1407   if (mgr->dirty_real_vb_mask == mgr->enabled_vb_mask &&
1408       mgr->dirty_real_vb_mask == mgr->user_vb_mask) {
1409      /* Fast path that allows us to transfer the VBO references to the driver
1410       * to skip atomic reference counting there. These are freshly uploaded
1411       * user buffers that can be discarded after this call.
1412       */
1413      pipe->set_vertex_buffers(pipe, start_slot, count, 0, true,
1414                               mgr->real_vertex_buffer + start_slot);
1415
1416      /* We don't own the VBO references now. Set them to NULL. */
1417      for (unsigned i = 0; i < count; i++) {
1418         assert(!mgr->real_vertex_buffer[start_slot + i].is_user_buffer);
1419         mgr->real_vertex_buffer[start_slot + i].buffer.resource = NULL;
1420      }
1421   } else {
1422      /* Slow path where we have to keep VBO references. */
1423      pipe->set_vertex_buffers(pipe, start_slot, count, 0, false,
1424                               mgr->real_vertex_buffer + start_slot);
1425   }
1426   mgr->dirty_real_vb_mask = 0;
1427}
1428
1429static void
1430u_vbuf_split_indexed_multidraw(struct u_vbuf *mgr, struct pipe_draw_info *info,
1431                               unsigned drawid_offset,
1432                               unsigned *indirect_data, unsigned stride,
1433                               unsigned draw_count)
1434{
1435   /* Increase refcount to be able to use take_index_buffer_ownership with
1436    * all draws.
1437    */
1438   if (draw_count > 1 && info->take_index_buffer_ownership)
1439      p_atomic_add(&info->index.resource->reference.count, draw_count - 1);
1440
1441   assert(info->index_size);
1442
1443   for (unsigned i = 0; i < draw_count; i++) {
1444      struct pipe_draw_start_count_bias draw;
1445      unsigned offset = i * stride / 4;
1446
1447      draw.count = indirect_data[offset + 0];
1448      info->instance_count = indirect_data[offset + 1];
1449      draw.start = indirect_data[offset + 2];
1450      draw.index_bias = indirect_data[offset + 3];
1451      info->start_instance = indirect_data[offset + 4];
1452
1453      u_vbuf_draw_vbo(mgr, info, drawid_offset, NULL, &draw, 1);
1454   }
1455}
1456
1457void u_vbuf_draw_vbo(struct u_vbuf *mgr, const struct pipe_draw_info *info,
1458                     unsigned drawid_offset,
1459                     const struct pipe_draw_indirect_info *indirect,
1460                     const struct pipe_draw_start_count_bias *draws,
1461                     unsigned num_draws)
1462{
1463   struct pipe_context *pipe = mgr->pipe;
1464   int start_vertex;
1465   unsigned min_index;
1466   unsigned num_vertices;
1467   boolean unroll_indices = FALSE;
1468   const uint32_t used_vb_mask = mgr->ve->used_vb_mask;
1469   uint32_t user_vb_mask = mgr->user_vb_mask & used_vb_mask;
1470   struct pipe_draw_info new_info;
1471   struct pipe_draw_start_count_bias new_draw;
1472   unsigned fixed_restart_index = info->index_size ? util_prim_restart_index_from_size(info->index_size) : 0;
1473
1474   uint32_t misaligned = 0;
1475   if (!mgr->caps.attrib_component_unaligned) {
1476      for (unsigned i = 0; i < ARRAY_SIZE(mgr->unaligned_vb_mask); i++) {
1477         misaligned |= mgr->ve->vb_align_mask[i] & mgr->unaligned_vb_mask[i];
1478      }
1479   }
1480   const uint32_t incompatible_vb_mask =
1481      (mgr->incompatible_vb_mask | misaligned) & used_vb_mask;
1482
1483   /* Normal draw. No fallback and no user buffers. */
1484   if (!incompatible_vb_mask &&
1485       !mgr->ve->incompatible_elem_mask &&
1486       !user_vb_mask &&
1487       (info->index_size != 1 || !mgr->caps.rewrite_ubyte_ibs) &&
1488       (!info->primitive_restart ||
1489        info->restart_index == fixed_restart_index ||
1490        !mgr->caps.rewrite_restart_index) &&
1491       (!info->primitive_restart || mgr->caps.supported_restart_modes & BITFIELD_BIT(info->mode)) &&
1492       mgr->caps.supported_prim_modes & BITFIELD_BIT(info->mode)) {
1493
1494      /* Set vertex buffers if needed. */
1495      if (mgr->dirty_real_vb_mask & used_vb_mask) {
1496         u_vbuf_set_driver_vertex_buffers(mgr);
1497      }
1498
1499      pipe->draw_vbo(pipe, info, drawid_offset, indirect, draws, num_draws);
1500      return;
1501   }
1502
1503   /* Increase refcount to be able to use take_index_buffer_ownership with
1504    * all draws.
1505    */
1506   if (num_draws > 1 && info->take_index_buffer_ownership)
1507      p_atomic_add(&info->index.resource->reference.count, num_draws - 1);
1508   new_info = *info;
1509
1510   for (unsigned d = 0; d < num_draws; d++) {
1511      new_draw = draws[d];
1512      if (info->increment_draw_id)
1513         drawid_offset++;
1514
1515      /* Handle indirect (multi)draws. */
1516      if (indirect && indirect->buffer) {
1517         unsigned draw_count = 0;
1518
1519         /* Get the number of draws. */
1520         if (indirect->indirect_draw_count) {
1521            pipe_buffer_read(pipe, indirect->indirect_draw_count,
1522                             indirect->indirect_draw_count_offset,
1523                             4, &draw_count);
1524         } else {
1525            draw_count = indirect->draw_count;
1526         }
1527
1528         if (!draw_count)
1529            goto cleanup;
1530
1531         unsigned data_size = (draw_count - 1) * indirect->stride +
1532                              (new_info.index_size ? 20 : 16);
1533         unsigned *data = malloc(data_size);
1534         if (!data)
1535            goto cleanup; /* report an error? */
1536
1537         /* Read the used buffer range only once, because the read can be
1538          * uncached.
1539          */
1540         pipe_buffer_read(pipe, indirect->buffer, indirect->offset, data_size,
1541                          data);
1542
1543         if (info->index_size) {
1544            /* Indexed multidraw. */
1545            unsigned index_bias0 = data[3];
1546            bool index_bias_same = true;
1547
1548            /* If we invoke the translate path, we have to split the multidraw. */
1549            if (incompatible_vb_mask ||
1550                mgr->ve->incompatible_elem_mask) {
1551               u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
1552                                              indirect->stride, draw_count);
1553               free(data);
1554               return;
1555            }
1556
1557            /* See if index_bias is the same for all draws. */
1558            for (unsigned i = 1; i < draw_count; i++) {
1559               if (data[i * indirect->stride / 4 + 3] != index_bias0) {
1560                  index_bias_same = false;
1561                  break;
1562               }
1563            }
1564
1565            /* Split the multidraw if index_bias is different. */
1566            if (!index_bias_same) {
1567               u_vbuf_split_indexed_multidraw(mgr, &new_info, drawid_offset, data,
1568                                              indirect->stride, draw_count);
1569               free(data);
1570               return;
1571            }
1572
1573            /* If we don't need to use the translate path and index_bias is
1574             * the same, we can process the multidraw with the time complexity
1575             * equal to 1 draw call (except for the index range computation).
1576             * We only need to compute the index range covering all draw calls
1577             * of the multidraw.
1578             *
1579             * The driver will not look at these values because indirect != NULL.
1580             * These values determine the user buffer bounds to upload.
1581             */
1582            new_draw.index_bias = index_bias0;
1583            new_info.index_bounds_valid = true;
1584            new_info.min_index = ~0u;
1585            new_info.max_index = 0;
1586            new_info.start_instance = ~0u;
1587            unsigned end_instance = 0;
1588
1589            struct pipe_transfer *transfer = NULL;
1590            const uint8_t *indices;
1591
1592            if (info->has_user_indices) {
1593               indices = (uint8_t*)info->index.user;
1594            } else {
1595               indices = (uint8_t*)pipe_buffer_map(pipe, info->index.resource,
1596                                                   PIPE_MAP_READ, &transfer);
1597            }
1598
1599            for (unsigned i = 0; i < draw_count; i++) {
1600               unsigned offset = i * indirect->stride / 4;
1601               unsigned start = data[offset + 2];
1602               unsigned count = data[offset + 0];
1603               unsigned start_instance = data[offset + 4];
1604               unsigned instance_count = data[offset + 1];
1605
1606               if (!count || !instance_count)
1607                  continue;
1608
1609               /* Update the ranges of instances. */
1610               new_info.start_instance = MIN2(new_info.start_instance,
1611                                              start_instance);
1612               end_instance = MAX2(end_instance, start_instance + instance_count);
1613
1614               /* Update the index range. */
1615               unsigned min, max;
1616               u_vbuf_get_minmax_index_mapped(&new_info, count,
1617                                              indices +
1618                                              new_info.index_size * start,
1619                                              &min, &max);
1620
1621               new_info.min_index = MIN2(new_info.min_index, min);
1622               new_info.max_index = MAX2(new_info.max_index, max);
1623            }
1624            free(data);
1625
1626            if (transfer)
1627               pipe_buffer_unmap(pipe, transfer);
1628
1629            /* Set the final instance count. */
1630            new_info.instance_count = end_instance - new_info.start_instance;
1631
1632            if (new_info.start_instance == ~0u || !new_info.instance_count)
1633               goto cleanup;
1634         } else {
1635            /* Non-indexed multidraw.
1636             *
1637             * Keep the draw call indirect and compute minimums & maximums,
1638             * which will determine the user buffer bounds to upload, but
1639             * the driver will not look at these values because indirect != NULL.
1640             *
1641             * This efficiently processes the multidraw with the time complexity
1642             * equal to 1 draw call.
1643             */
1644            new_draw.start = ~0u;
1645            new_info.start_instance = ~0u;
1646            unsigned end_vertex = 0;
1647            unsigned end_instance = 0;
1648
1649            for (unsigned i = 0; i < draw_count; i++) {
1650               unsigned offset = i * indirect->stride / 4;
1651               unsigned start = data[offset + 2];
1652               unsigned count = data[offset + 0];
1653               unsigned start_instance = data[offset + 3];
1654               unsigned instance_count = data[offset + 1];
1655
1656               new_draw.start = MIN2(new_draw.start, start);
1657               new_info.start_instance = MIN2(new_info.start_instance,
1658                                              start_instance);
1659
1660               end_vertex = MAX2(end_vertex, start + count);
1661               end_instance = MAX2(end_instance, start_instance + instance_count);
1662            }
1663            free(data);
1664
1665            /* Set the final counts. */
1666            new_draw.count = end_vertex - new_draw.start;
1667            new_info.instance_count = end_instance - new_info.start_instance;
1668
1669            if (new_draw.start == ~0u || !new_draw.count || !new_info.instance_count)
1670               goto cleanup;
1671         }
1672      } else {
1673         if ((!indirect && !new_draw.count) || !new_info.instance_count)
1674            goto cleanup;
1675      }
1676
1677      if (new_info.index_size) {
1678         /* See if anything needs to be done for per-vertex attribs. */
1679         if (u_vbuf_need_minmax_index(mgr, misaligned)) {
1680            unsigned max_index;
1681
1682            if (new_info.index_bounds_valid) {
1683               min_index = new_info.min_index;
1684               max_index = new_info.max_index;
1685            } else {
1686               u_vbuf_get_minmax_index(mgr->pipe, &new_info, &new_draw,
1687                                       &min_index, &max_index);
1688            }
1689
1690            assert(min_index <= max_index);
1691
1692            start_vertex = min_index + new_draw.index_bias;
1693            num_vertices = max_index + 1 - min_index;
1694
1695            /* Primitive restart doesn't work when unrolling indices.
1696             * We would have to break this drawing operation into several ones. */
1697            /* Use some heuristic to see if unrolling indices improves
1698             * performance. */
1699            if (!indirect &&
1700                !new_info.primitive_restart &&
1701                util_is_vbo_upload_ratio_too_large(new_draw.count, num_vertices) &&
1702                !u_vbuf_mapping_vertex_buffer_blocks(mgr, misaligned)) {
1703               unroll_indices = TRUE;
1704               user_vb_mask &= ~(mgr->nonzero_stride_vb_mask &
1705                                 mgr->ve->noninstance_vb_mask_any);
1706            }
1707         } else {
1708            /* Nothing to do for per-vertex attribs. */
1709            start_vertex = 0;
1710            num_vertices = 0;
1711            min_index = 0;
1712         }
1713      } else {
1714         start_vertex = new_draw.start;
1715         num_vertices = new_draw.count;
1716         min_index = 0;
1717      }
1718
1719      /* Translate vertices with non-native layouts or formats. */
1720      if (unroll_indices ||
1721          incompatible_vb_mask ||
1722          mgr->ve->incompatible_elem_mask) {
1723         if (!u_vbuf_translate_begin(mgr, &new_info, &new_draw,
1724                                     start_vertex, num_vertices,
1725                                     min_index, unroll_indices, misaligned)) {
1726            debug_warn_once("u_vbuf_translate_begin() failed");
1727            goto cleanup;
1728         }
1729
1730         if (unroll_indices) {
1731            new_info.index_size = 0;
1732            new_draw.index_bias = 0;
1733            new_info.index_bounds_valid = true;
1734            new_info.min_index = 0;
1735            new_info.max_index = new_draw.count - 1;
1736            new_draw.start = 0;
1737         }
1738
1739         user_vb_mask &= ~(incompatible_vb_mask |
1740                           mgr->ve->incompatible_vb_mask_all);
1741      }
1742
1743      /* Upload user buffers. */
1744      if (user_vb_mask) {
1745         if (u_vbuf_upload_buffers(mgr, start_vertex, num_vertices,
1746                                   new_info.start_instance,
1747                                   new_info.instance_count) != PIPE_OK) {
1748            debug_warn_once("u_vbuf_upload_buffers() failed");
1749            goto cleanup;
1750         }
1751
1752         mgr->dirty_real_vb_mask |= user_vb_mask;
1753      }
1754
1755      /*
1756      if (unroll_indices) {
1757         printf("unrolling indices: start_vertex = %i, num_vertices = %i\n",
1758                start_vertex, num_vertices);
1759         util_dump_draw_info(stdout, info);
1760         printf("\n");
1761      }
1762
1763      unsigned i;
1764      for (i = 0; i < mgr->nr_vertex_buffers; i++) {
1765         printf("input %i: ", i);
1766         util_dump_vertex_buffer(stdout, mgr->vertex_buffer+i);
1767         printf("\n");
1768      }
1769      for (i = 0; i < mgr->nr_real_vertex_buffers; i++) {
1770         printf("real %i: ", i);
1771         util_dump_vertex_buffer(stdout, mgr->real_vertex_buffer+i);
1772         printf("\n");
1773      }
1774      */
1775
1776      u_upload_unmap(pipe->stream_uploader);
1777      if (mgr->dirty_real_vb_mask)
1778         u_vbuf_set_driver_vertex_buffers(mgr);
1779
1780      if ((new_info.index_size == 1 && mgr->caps.rewrite_ubyte_ibs) ||
1781          (new_info.primitive_restart &&
1782           ((new_info.restart_index != fixed_restart_index && mgr->caps.rewrite_restart_index) ||
1783           !(mgr->caps.supported_restart_modes & BITFIELD_BIT(new_info.mode)))) ||
1784          !(mgr->caps.supported_prim_modes & BITFIELD_BIT(new_info.mode))) {
1785         util_primconvert_save_flatshade_first(mgr->pc, mgr->flatshade_first);
1786         util_primconvert_draw_vbo(mgr->pc, &new_info, drawid_offset, indirect, &new_draw, 1);
1787      } else
1788         pipe->draw_vbo(pipe, &new_info, drawid_offset, indirect, &new_draw, 1);
1789   }
1790
1791   if (mgr->using_translate) {
1792      u_vbuf_translate_end(mgr);
1793   }
1794   return;
1795
1796cleanup:
1797   if (info->take_index_buffer_ownership) {
1798      struct pipe_resource *indexbuf = info->index.resource;
1799      pipe_resource_reference(&indexbuf, NULL);
1800   }
1801}
1802
1803void u_vbuf_save_vertex_elements(struct u_vbuf *mgr)
1804{
1805   assert(!mgr->ve_saved);
1806   mgr->ve_saved = mgr->ve;
1807}
1808
1809void u_vbuf_restore_vertex_elements(struct u_vbuf *mgr)
1810{
1811   if (mgr->ve != mgr->ve_saved) {
1812      struct pipe_context *pipe = mgr->pipe;
1813
1814      mgr->ve = mgr->ve_saved;
1815      pipe->bind_vertex_elements_state(pipe,
1816                                       mgr->ve ? mgr->ve->driver_cso : NULL);
1817   }
1818   mgr->ve_saved = NULL;
1819}
1820