1/**************************************************************************
2 *
3 * Copyright 2012 Marek Olšák <maraeo@gmail.com>
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28#include "util/format/format_utils.h"
29#include "util/u_cpu_detect.h"
30#include "util/u_helpers.h"
31#include "util/u_inlines.h"
32#include "util/u_upload_mgr.h"
33#include "util/u_thread.h"
34#include "util/os_time.h"
35#include <inttypes.h>
36
37/**
38 * This function is used to copy an array of pipe_vertex_buffer structures,
39 * while properly referencing the pipe_vertex_buffer::buffer member.
40 *
41 * enabled_buffers is updated such that the bits corresponding to the indices
42 * of disabled buffers are set to 0 and the enabled ones are set to 1.
43 *
44 * \sa util_copy_framebuffer_state
45 */
46void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst,
47                                  uint32_t *enabled_buffers,
48                                  const struct pipe_vertex_buffer *src,
49                                  unsigned start_slot, unsigned count,
50                                  unsigned unbind_num_trailing_slots,
51                                  bool take_ownership)
52{
53   unsigned i;
54   uint32_t bitmask = 0;
55
56   dst += start_slot;
57
58   *enabled_buffers &= ~u_bit_consecutive(start_slot, count);
59
60   if (src) {
61      for (i = 0; i < count; i++) {
62         if (src[i].buffer.resource)
63            bitmask |= 1 << i;
64
65         pipe_vertex_buffer_unreference(&dst[i]);
66
67         if (!take_ownership && !src[i].is_user_buffer)
68            pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource);
69      }
70
71      /* Copy over the other members of pipe_vertex_buffer. */
72      memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer));
73
74      *enabled_buffers |= bitmask << start_slot;
75   }
76   else {
77      /* Unreference the buffers. */
78      for (i = 0; i < count; i++)
79         pipe_vertex_buffer_unreference(&dst[i]);
80   }
81
82   for (i = 0; i < unbind_num_trailing_slots; i++)
83      pipe_vertex_buffer_unreference(&dst[count + i]);
84}
85
86/**
87 * Same as util_set_vertex_buffers_mask, but it only returns the number
88 * of bound buffers.
89 */
90void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst,
91                                   unsigned *dst_count,
92                                   const struct pipe_vertex_buffer *src,
93                                   unsigned start_slot, unsigned count,
94                                   unsigned unbind_num_trailing_slots,
95                                   bool take_ownership)
96{
97   unsigned i;
98   uint32_t enabled_buffers = 0;
99
100   for (i = 0; i < *dst_count; i++) {
101      if (dst[i].buffer.resource)
102         enabled_buffers |= (1ull << i);
103   }
104
105   util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot,
106                                count, unbind_num_trailing_slots,
107                                take_ownership);
108
109   *dst_count = util_last_bit(enabled_buffers);
110}
111
112/**
113 * This function is used to copy an array of pipe_shader_buffer structures,
114 * while properly referencing the pipe_shader_buffer::buffer member.
115 *
116 * \sa util_set_vertex_buffer_mask
117 */
118void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst,
119                                  uint32_t *enabled_buffers,
120                                  const struct pipe_shader_buffer *src,
121                                  unsigned start_slot, unsigned count)
122{
123   unsigned i;
124
125   dst += start_slot;
126
127   if (src) {
128      for (i = 0; i < count; i++) {
129         pipe_resource_reference(&dst[i].buffer, src[i].buffer);
130
131         if (src[i].buffer)
132            *enabled_buffers |= (1ull << (start_slot + i));
133         else
134            *enabled_buffers &= ~(1ull << (start_slot + i));
135      }
136
137      /* Copy over the other members of pipe_shader_buffer. */
138      memcpy(dst, src, count * sizeof(struct pipe_shader_buffer));
139   }
140   else {
141      /* Unreference the buffers. */
142      for (i = 0; i < count; i++)
143         pipe_resource_reference(&dst[i].buffer, NULL);
144
145      *enabled_buffers &= ~(((1ull << count) - 1) << start_slot);
146   }
147}
148
149/**
150 * Given a user index buffer, save the structure to "saved", and upload it.
151 */
152bool
153util_upload_index_buffer(struct pipe_context *pipe,
154                         const struct pipe_draw_info *info,
155                         const struct pipe_draw_start_count_bias *draw,
156                         struct pipe_resource **out_buffer,
157                         unsigned *out_offset, unsigned alignment)
158{
159   unsigned start_offset = draw->start * info->index_size;
160
161   u_upload_data(pipe->stream_uploader, start_offset,
162                 draw->count * info->index_size, alignment,
163                 (char*)info->index.user + start_offset,
164                 out_offset, out_buffer);
165   u_upload_unmap(pipe->stream_uploader);
166   *out_offset -= start_offset;
167   return *out_buffer != NULL;
168}
169
170/**
171 * Lower each UINT64 vertex element to 1 or 2 UINT32 vertex elements.
172 * 3 and 4 component formats are expanded into 2 slots.
173 *
174 * @param velems        Original vertex elements, will be updated to contain
175 *                      the lowered vertex elements.
176 * @param velem_count   Original count, will be updated to contain the count
177 *                      after lowering.
178 * @param tmp           Temporary array of PIPE_MAX_ATTRIBS vertex elements.
179 */
180void
181util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems,
182                                  unsigned *velem_count,
183                                  struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS])
184{
185   const struct pipe_vertex_element *input = *velems;
186   unsigned count = *velem_count;
187   bool has_64bit = false;
188
189   for (unsigned i = 0; i < count; i++) {
190      has_64bit |= input[i].src_format >= PIPE_FORMAT_R64_UINT &&
191                   input[i].src_format <= PIPE_FORMAT_R64G64B64A64_UINT;
192   }
193
194   /* Return the original vertex elements if there is nothing to do. */
195   if (!has_64bit)
196      return;
197
198   /* Lower 64_UINT to 32_UINT. */
199   unsigned new_count = 0;
200
201   for (unsigned i = 0; i < count; i++) {
202      enum pipe_format format = input[i].src_format;
203
204      /* If the shader input is dvec2 or smaller, reduce the number of
205       * components to 2 at most. If the shader input is dvec3 or larger,
206       * expand the number of components to 3 at least. If the 3rd component
207       * is out of bounds, the hardware shouldn't skip loading the first
208       * 2 components.
209       */
210      if (format >= PIPE_FORMAT_R64_UINT &&
211          format <= PIPE_FORMAT_R64G64B64A64_UINT) {
212         if (input[i].dual_slot)
213            format = MAX2(format, PIPE_FORMAT_R64G64B64_UINT);
214         else
215            format = MIN2(format, PIPE_FORMAT_R64G64_UINT);
216      }
217
218      switch (format) {
219      case PIPE_FORMAT_R64_UINT:
220         tmp[new_count] = input[i];
221         tmp[new_count].src_format = PIPE_FORMAT_R32G32_UINT;
222         new_count++;
223         break;
224
225      case PIPE_FORMAT_R64G64_UINT:
226         tmp[new_count] = input[i];
227         tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
228         new_count++;
229         break;
230
231      case PIPE_FORMAT_R64G64B64_UINT:
232      case PIPE_FORMAT_R64G64B64A64_UINT:
233         assert(new_count + 2 <= PIPE_MAX_ATTRIBS);
234         tmp[new_count] = tmp[new_count + 1] = input[i];
235         tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT;
236         tmp[new_count + 1].src_format =
237            format == PIPE_FORMAT_R64G64B64_UINT ?
238                  PIPE_FORMAT_R32G32_UINT :
239                  PIPE_FORMAT_R32G32B32A32_UINT;
240         tmp[new_count + 1].src_offset += 16;
241         new_count += 2;
242         break;
243
244      default:
245         tmp[new_count++] = input[i];
246         break;
247      }
248   }
249
250   *velem_count = new_count;
251   *velems = tmp;
252}
253
254/* This is a helper for hardware bring-up. Don't remove. */
255struct pipe_query *
256util_begin_pipestat_query(struct pipe_context *ctx)
257{
258   struct pipe_query *q =
259      ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0);
260   if (!q)
261      return NULL;
262
263   ctx->begin_query(ctx, q);
264   return q;
265}
266
267/* This is a helper for hardware bring-up. Don't remove. */
268void
269util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q,
270                        FILE *f)
271{
272   static unsigned counter;
273   struct pipe_query_data_pipeline_statistics stats;
274
275   ctx->end_query(ctx, q);
276   ctx->get_query_result(ctx, q, true, (void*)&stats);
277   ctx->destroy_query(ctx, q);
278
279   fprintf(f,
280           "Draw call %u:\n"
281           "    ia_vertices    = %"PRIu64"\n"
282           "    ia_primitives  = %"PRIu64"\n"
283           "    vs_invocations = %"PRIu64"\n"
284           "    gs_invocations = %"PRIu64"\n"
285           "    gs_primitives  = %"PRIu64"\n"
286           "    c_invocations  = %"PRIu64"\n"
287           "    c_primitives   = %"PRIu64"\n"
288           "    ps_invocations = %"PRIu64"\n"
289           "    hs_invocations = %"PRIu64"\n"
290           "    ds_invocations = %"PRIu64"\n"
291           "    cs_invocations = %"PRIu64"\n",
292           (unsigned)p_atomic_inc_return(&counter),
293           stats.ia_vertices,
294           stats.ia_primitives,
295           stats.vs_invocations,
296           stats.gs_invocations,
297           stats.gs_primitives,
298           stats.c_invocations,
299           stats.c_primitives,
300           stats.ps_invocations,
301           stats.hs_invocations,
302           stats.ds_invocations,
303           stats.cs_invocations);
304}
305
306/* This is a helper for profiling. Don't remove. */
307struct pipe_query *
308util_begin_time_query(struct pipe_context *ctx)
309{
310   struct pipe_query *q =
311      ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0);
312   if (!q)
313      return NULL;
314
315   ctx->begin_query(ctx, q);
316   return q;
317}
318
319/* This is a helper for profiling. Don't remove. */
320void
321util_end_time_query(struct pipe_context *ctx, struct pipe_query *q, FILE *f,
322                    const char *name)
323{
324   union pipe_query_result result;
325
326   ctx->end_query(ctx, q);
327   ctx->get_query_result(ctx, q, true, &result);
328   ctx->destroy_query(ctx, q);
329
330   fprintf(f, "Time elapsed: %s - %"PRIu64".%u us\n", name, result.u64 / 1000, (unsigned)(result.u64 % 1000) / 100);
331}
332
333/* This is a helper for hardware bring-up. Don't remove. */
334void
335util_wait_for_idle(struct pipe_context *ctx)
336{
337   struct pipe_fence_handle *fence = NULL;
338
339   ctx->flush(ctx, &fence, 0);
340   ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE);
341}
342
343void
344util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage)
345{
346   t->max_mem_usage = max_mem_usage;
347}
348
349void
350util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t)
351{
352   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
353      screen->fence_reference(screen, &t->ring[i].fence, NULL);
354}
355
356static uint64_t
357util_get_throttle_total_memory_usage(struct util_throttle *t)
358{
359   uint64_t total_usage = 0;
360
361   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++)
362      total_usage += t->ring[i].mem_usage;
363   return total_usage;
364}
365
366static void util_dump_throttle_ring(struct util_throttle *t)
367{
368   printf("Throttle:\n");
369   for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) {
370      printf("  ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n",
371             i, t->ring[i].fence ? "yes" : " no",
372             t->ring[i].mem_usage,
373             t->flush_index == i ? " [flush]" : "",
374             t->wait_index == i ? " [wait]" : "");
375   }
376}
377
378/**
379 * Notify util_throttle that the next operation allocates memory.
380 * util_throttle tracks memory usage and waits for fences until its tracked
381 * memory usage decreases.
382 *
383 * Example:
384 *   util_throttle_memory_usage(..., w*h*d*Bpp);
385 *   TexSubImage(..., w, h, d, ...);
386 *
387 * This means that TexSubImage can't allocate more memory its maximum limit
388 * set during initialization.
389 */
390void
391util_throttle_memory_usage(struct pipe_context *pipe,
392                           struct util_throttle *t, uint64_t memory_size)
393{
394   (void)util_dump_throttle_ring; /* silence warning */
395
396   if (!t->max_mem_usage)
397      return;
398
399   struct pipe_screen *screen = pipe->screen;
400   struct pipe_fence_handle **fence = NULL;
401   unsigned ring_size = ARRAY_SIZE(t->ring);
402   uint64_t total = util_get_throttle_total_memory_usage(t);
403
404   /* If there is not enough memory, walk the list of fences and find
405    * the latest one that we need to wait for.
406    */
407   while (t->wait_index != t->flush_index &&
408          total && total + memory_size > t->max_mem_usage) {
409      assert(t->ring[t->wait_index].fence);
410
411      /* Release an older fence if we need to wait for a newer one. */
412      if (fence)
413         screen->fence_reference(screen, fence, NULL);
414
415      fence = &t->ring[t->wait_index].fence;
416      t->ring[t->wait_index].mem_usage = 0;
417      t->wait_index = (t->wait_index + 1) % ring_size;
418
419      total = util_get_throttle_total_memory_usage(t);
420   }
421
422   /* Wait for the fence to decrease memory usage. */
423   if (fence) {
424      screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
425      screen->fence_reference(screen, fence, NULL);
426   }
427
428   /* Flush and get a fence if we've exhausted memory usage for the current
429    * slot.
430    */
431   if (t->ring[t->flush_index].mem_usage &&
432       t->ring[t->flush_index].mem_usage + memory_size >
433       t->max_mem_usage / (ring_size / 2)) {
434      struct pipe_fence_handle **fence =
435         &t->ring[t->flush_index].fence;
436
437      /* Expect that the current flush slot doesn't have a fence yet. */
438      assert(!*fence);
439
440      pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC);
441      t->flush_index = (t->flush_index + 1) % ring_size;
442
443      /* Vacate the next slot if it's occupied. This should be rare. */
444      if (t->flush_index == t->wait_index) {
445         struct pipe_fence_handle **fence =
446            &t->ring[t->wait_index].fence;
447
448         t->ring[t->wait_index].mem_usage = 0;
449         t->wait_index = (t->wait_index + 1) % ring_size;
450
451         assert(*fence);
452         screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE);
453         screen->fence_reference(screen, fence, NULL);
454      }
455
456      assert(!t->ring[t->flush_index].mem_usage);
457      assert(!t->ring[t->flush_index].fence);
458   }
459
460   t->ring[t->flush_index].mem_usage += memory_size;
461}
462
463bool
464util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped)
465{
466   /* Reduce a large clear value size if possible. */
467   if (*clearValueSize > 4) {
468      bool clear_dword_duplicated = true;
469      const uint32_t *clear_value = clearValue;
470
471      /* See if we can lower large fills to dword fills. */
472      for (unsigned i = 1; i < *clearValueSize / 4; i++) {
473         if (clear_value[0] != clear_value[i]) {
474            clear_dword_duplicated = false;
475            break;
476         }
477      }
478      if (clear_dword_duplicated) {
479         *clamped = *clear_value;
480         *clearValueSize = 4;
481      }
482      return clear_dword_duplicated;
483   }
484
485   /* Expand a small clear value size. */
486   if (*clearValueSize <= 2) {
487      if (*clearValueSize == 1) {
488         *clamped = *(uint8_t *)clearValue;
489         *clamped |=
490            (*clamped << 8) | (*clamped << 16) | (*clamped << 24);
491      } else {
492         *clamped = *(uint16_t *)clearValue;
493         *clamped |= *clamped << 16;
494      }
495      *clearValueSize = 4;
496      return true;
497   }
498   return false;
499}
500
501void
502util_init_pipe_vertex_state(struct pipe_screen *screen,
503                            struct pipe_vertex_buffer *buffer,
504                            const struct pipe_vertex_element *elements,
505                            unsigned num_elements,
506                            struct pipe_resource *indexbuf,
507                            uint32_t full_velem_mask,
508                            struct pipe_vertex_state *state)
509{
510   assert(num_elements == util_bitcount(full_velem_mask));
511
512   pipe_reference_init(&state->reference, 1);
513   state->screen = screen;
514
515   pipe_vertex_buffer_reference(&state->input.vbuffer, buffer);
516   pipe_resource_reference(&state->input.indexbuf, indexbuf);
517   state->input.num_elements = num_elements;
518   for (unsigned i = 0; i < num_elements; i++)
519      state->input.elements[i] = elements[i];
520   state->input.full_velem_mask = full_velem_mask;
521}
522
523/**
524 * Clamp color value to format range.
525 */
526union pipe_color_union
527util_clamp_color(enum pipe_format format,
528                 const union pipe_color_union *color)
529{
530   union pipe_color_union clamp_color = *color;
531   int i;
532
533   for (i = 0; i < util_format_get_nr_components(format); i++) {
534      uint8_t bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, i);
535
536      if (util_format_is_unorm(format))
537         clamp_color.ui[i] = _mesa_unorm_to_unorm(clamp_color.ui[i], bits, bits);
538      else if (util_format_is_snorm(format))
539         clamp_color.i[i] = _mesa_snorm_to_snorm(clamp_color.i[i], bits, bits);
540      else if (util_format_is_pure_uint(format))
541         clamp_color.ui[i] = _mesa_unsigned_to_unsigned(clamp_color.ui[i], bits);
542      else if (util_format_is_pure_sint(format))
543         clamp_color.i[i] = _mesa_signed_to_signed(clamp_color.i[i], bits);
544   }
545
546   return clamp_color;
547}
548