1/************************************************************************** 2 * 3 * Copyright 2012 Marek Olšák <maraeo@gmail.com> 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining a 7 * copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial portions 16 * of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 21 * IN NO EVENT SHALL THE AUTHORS AND/OR THEIR SUPPLIERS BE LIABLE FOR 22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 **************************************************************************/ 27 28#include "util/format/format_utils.h" 29#include "util/u_cpu_detect.h" 30#include "util/u_helpers.h" 31#include "util/u_inlines.h" 32#include "util/u_upload_mgr.h" 33#include "util/u_thread.h" 34#include "util/os_time.h" 35#include <inttypes.h> 36 37/** 38 * This function is used to copy an array of pipe_vertex_buffer structures, 39 * while properly referencing the pipe_vertex_buffer::buffer member. 40 * 41 * enabled_buffers is updated such that the bits corresponding to the indices 42 * of disabled buffers are set to 0 and the enabled ones are set to 1. 43 * 44 * \sa util_copy_framebuffer_state 45 */ 46void util_set_vertex_buffers_mask(struct pipe_vertex_buffer *dst, 47 uint32_t *enabled_buffers, 48 const struct pipe_vertex_buffer *src, 49 unsigned start_slot, unsigned count, 50 unsigned unbind_num_trailing_slots, 51 bool take_ownership) 52{ 53 unsigned i; 54 uint32_t bitmask = 0; 55 56 dst += start_slot; 57 58 *enabled_buffers &= ~u_bit_consecutive(start_slot, count); 59 60 if (src) { 61 for (i = 0; i < count; i++) { 62 if (src[i].buffer.resource) 63 bitmask |= 1 << i; 64 65 pipe_vertex_buffer_unreference(&dst[i]); 66 67 if (!take_ownership && !src[i].is_user_buffer) 68 pipe_resource_reference(&dst[i].buffer.resource, src[i].buffer.resource); 69 } 70 71 /* Copy over the other members of pipe_vertex_buffer. */ 72 memcpy(dst, src, count * sizeof(struct pipe_vertex_buffer)); 73 74 *enabled_buffers |= bitmask << start_slot; 75 } 76 else { 77 /* Unreference the buffers. */ 78 for (i = 0; i < count; i++) 79 pipe_vertex_buffer_unreference(&dst[i]); 80 } 81 82 for (i = 0; i < unbind_num_trailing_slots; i++) 83 pipe_vertex_buffer_unreference(&dst[count + i]); 84} 85 86/** 87 * Same as util_set_vertex_buffers_mask, but it only returns the number 88 * of bound buffers. 89 */ 90void util_set_vertex_buffers_count(struct pipe_vertex_buffer *dst, 91 unsigned *dst_count, 92 const struct pipe_vertex_buffer *src, 93 unsigned start_slot, unsigned count, 94 unsigned unbind_num_trailing_slots, 95 bool take_ownership) 96{ 97 unsigned i; 98 uint32_t enabled_buffers = 0; 99 100 for (i = 0; i < *dst_count; i++) { 101 if (dst[i].buffer.resource) 102 enabled_buffers |= (1ull << i); 103 } 104 105 util_set_vertex_buffers_mask(dst, &enabled_buffers, src, start_slot, 106 count, unbind_num_trailing_slots, 107 take_ownership); 108 109 *dst_count = util_last_bit(enabled_buffers); 110} 111 112/** 113 * This function is used to copy an array of pipe_shader_buffer structures, 114 * while properly referencing the pipe_shader_buffer::buffer member. 115 * 116 * \sa util_set_vertex_buffer_mask 117 */ 118void util_set_shader_buffers_mask(struct pipe_shader_buffer *dst, 119 uint32_t *enabled_buffers, 120 const struct pipe_shader_buffer *src, 121 unsigned start_slot, unsigned count) 122{ 123 unsigned i; 124 125 dst += start_slot; 126 127 if (src) { 128 for (i = 0; i < count; i++) { 129 pipe_resource_reference(&dst[i].buffer, src[i].buffer); 130 131 if (src[i].buffer) 132 *enabled_buffers |= (1ull << (start_slot + i)); 133 else 134 *enabled_buffers &= ~(1ull << (start_slot + i)); 135 } 136 137 /* Copy over the other members of pipe_shader_buffer. */ 138 memcpy(dst, src, count * sizeof(struct pipe_shader_buffer)); 139 } 140 else { 141 /* Unreference the buffers. */ 142 for (i = 0; i < count; i++) 143 pipe_resource_reference(&dst[i].buffer, NULL); 144 145 *enabled_buffers &= ~(((1ull << count) - 1) << start_slot); 146 } 147} 148 149/** 150 * Given a user index buffer, save the structure to "saved", and upload it. 151 */ 152bool 153util_upload_index_buffer(struct pipe_context *pipe, 154 const struct pipe_draw_info *info, 155 const struct pipe_draw_start_count_bias *draw, 156 struct pipe_resource **out_buffer, 157 unsigned *out_offset, unsigned alignment) 158{ 159 unsigned start_offset = draw->start * info->index_size; 160 161 u_upload_data(pipe->stream_uploader, start_offset, 162 draw->count * info->index_size, alignment, 163 (char*)info->index.user + start_offset, 164 out_offset, out_buffer); 165 u_upload_unmap(pipe->stream_uploader); 166 *out_offset -= start_offset; 167 return *out_buffer != NULL; 168} 169 170/** 171 * Lower each UINT64 vertex element to 1 or 2 UINT32 vertex elements. 172 * 3 and 4 component formats are expanded into 2 slots. 173 * 174 * @param velems Original vertex elements, will be updated to contain 175 * the lowered vertex elements. 176 * @param velem_count Original count, will be updated to contain the count 177 * after lowering. 178 * @param tmp Temporary array of PIPE_MAX_ATTRIBS vertex elements. 179 */ 180void 181util_lower_uint64_vertex_elements(const struct pipe_vertex_element **velems, 182 unsigned *velem_count, 183 struct pipe_vertex_element tmp[PIPE_MAX_ATTRIBS]) 184{ 185 const struct pipe_vertex_element *input = *velems; 186 unsigned count = *velem_count; 187 bool has_64bit = false; 188 189 for (unsigned i = 0; i < count; i++) { 190 has_64bit |= input[i].src_format >= PIPE_FORMAT_R64_UINT && 191 input[i].src_format <= PIPE_FORMAT_R64G64B64A64_UINT; 192 } 193 194 /* Return the original vertex elements if there is nothing to do. */ 195 if (!has_64bit) 196 return; 197 198 /* Lower 64_UINT to 32_UINT. */ 199 unsigned new_count = 0; 200 201 for (unsigned i = 0; i < count; i++) { 202 enum pipe_format format = input[i].src_format; 203 204 /* If the shader input is dvec2 or smaller, reduce the number of 205 * components to 2 at most. If the shader input is dvec3 or larger, 206 * expand the number of components to 3 at least. If the 3rd component 207 * is out of bounds, the hardware shouldn't skip loading the first 208 * 2 components. 209 */ 210 if (format >= PIPE_FORMAT_R64_UINT && 211 format <= PIPE_FORMAT_R64G64B64A64_UINT) { 212 if (input[i].dual_slot) 213 format = MAX2(format, PIPE_FORMAT_R64G64B64_UINT); 214 else 215 format = MIN2(format, PIPE_FORMAT_R64G64_UINT); 216 } 217 218 switch (format) { 219 case PIPE_FORMAT_R64_UINT: 220 tmp[new_count] = input[i]; 221 tmp[new_count].src_format = PIPE_FORMAT_R32G32_UINT; 222 new_count++; 223 break; 224 225 case PIPE_FORMAT_R64G64_UINT: 226 tmp[new_count] = input[i]; 227 tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT; 228 new_count++; 229 break; 230 231 case PIPE_FORMAT_R64G64B64_UINT: 232 case PIPE_FORMAT_R64G64B64A64_UINT: 233 assert(new_count + 2 <= PIPE_MAX_ATTRIBS); 234 tmp[new_count] = tmp[new_count + 1] = input[i]; 235 tmp[new_count].src_format = PIPE_FORMAT_R32G32B32A32_UINT; 236 tmp[new_count + 1].src_format = 237 format == PIPE_FORMAT_R64G64B64_UINT ? 238 PIPE_FORMAT_R32G32_UINT : 239 PIPE_FORMAT_R32G32B32A32_UINT; 240 tmp[new_count + 1].src_offset += 16; 241 new_count += 2; 242 break; 243 244 default: 245 tmp[new_count++] = input[i]; 246 break; 247 } 248 } 249 250 *velem_count = new_count; 251 *velems = tmp; 252} 253 254/* This is a helper for hardware bring-up. Don't remove. */ 255struct pipe_query * 256util_begin_pipestat_query(struct pipe_context *ctx) 257{ 258 struct pipe_query *q = 259 ctx->create_query(ctx, PIPE_QUERY_PIPELINE_STATISTICS, 0); 260 if (!q) 261 return NULL; 262 263 ctx->begin_query(ctx, q); 264 return q; 265} 266 267/* This is a helper for hardware bring-up. Don't remove. */ 268void 269util_end_pipestat_query(struct pipe_context *ctx, struct pipe_query *q, 270 FILE *f) 271{ 272 static unsigned counter; 273 struct pipe_query_data_pipeline_statistics stats; 274 275 ctx->end_query(ctx, q); 276 ctx->get_query_result(ctx, q, true, (void*)&stats); 277 ctx->destroy_query(ctx, q); 278 279 fprintf(f, 280 "Draw call %u:\n" 281 " ia_vertices = %"PRIu64"\n" 282 " ia_primitives = %"PRIu64"\n" 283 " vs_invocations = %"PRIu64"\n" 284 " gs_invocations = %"PRIu64"\n" 285 " gs_primitives = %"PRIu64"\n" 286 " c_invocations = %"PRIu64"\n" 287 " c_primitives = %"PRIu64"\n" 288 " ps_invocations = %"PRIu64"\n" 289 " hs_invocations = %"PRIu64"\n" 290 " ds_invocations = %"PRIu64"\n" 291 " cs_invocations = %"PRIu64"\n", 292 (unsigned)p_atomic_inc_return(&counter), 293 stats.ia_vertices, 294 stats.ia_primitives, 295 stats.vs_invocations, 296 stats.gs_invocations, 297 stats.gs_primitives, 298 stats.c_invocations, 299 stats.c_primitives, 300 stats.ps_invocations, 301 stats.hs_invocations, 302 stats.ds_invocations, 303 stats.cs_invocations); 304} 305 306/* This is a helper for profiling. Don't remove. */ 307struct pipe_query * 308util_begin_time_query(struct pipe_context *ctx) 309{ 310 struct pipe_query *q = 311 ctx->create_query(ctx, PIPE_QUERY_TIME_ELAPSED, 0); 312 if (!q) 313 return NULL; 314 315 ctx->begin_query(ctx, q); 316 return q; 317} 318 319/* This is a helper for profiling. Don't remove. */ 320void 321util_end_time_query(struct pipe_context *ctx, struct pipe_query *q, FILE *f, 322 const char *name) 323{ 324 union pipe_query_result result; 325 326 ctx->end_query(ctx, q); 327 ctx->get_query_result(ctx, q, true, &result); 328 ctx->destroy_query(ctx, q); 329 330 fprintf(f, "Time elapsed: %s - %"PRIu64".%u us\n", name, result.u64 / 1000, (unsigned)(result.u64 % 1000) / 100); 331} 332 333/* This is a helper for hardware bring-up. Don't remove. */ 334void 335util_wait_for_idle(struct pipe_context *ctx) 336{ 337 struct pipe_fence_handle *fence = NULL; 338 339 ctx->flush(ctx, &fence, 0); 340 ctx->screen->fence_finish(ctx->screen, NULL, fence, PIPE_TIMEOUT_INFINITE); 341} 342 343void 344util_throttle_init(struct util_throttle *t, uint64_t max_mem_usage) 345{ 346 t->max_mem_usage = max_mem_usage; 347} 348 349void 350util_throttle_deinit(struct pipe_screen *screen, struct util_throttle *t) 351{ 352 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) 353 screen->fence_reference(screen, &t->ring[i].fence, NULL); 354} 355 356static uint64_t 357util_get_throttle_total_memory_usage(struct util_throttle *t) 358{ 359 uint64_t total_usage = 0; 360 361 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) 362 total_usage += t->ring[i].mem_usage; 363 return total_usage; 364} 365 366static void util_dump_throttle_ring(struct util_throttle *t) 367{ 368 printf("Throttle:\n"); 369 for (unsigned i = 0; i < ARRAY_SIZE(t->ring); i++) { 370 printf(" ring[%u]: fence = %s, mem_usage = %"PRIu64"%s%s\n", 371 i, t->ring[i].fence ? "yes" : " no", 372 t->ring[i].mem_usage, 373 t->flush_index == i ? " [flush]" : "", 374 t->wait_index == i ? " [wait]" : ""); 375 } 376} 377 378/** 379 * Notify util_throttle that the next operation allocates memory. 380 * util_throttle tracks memory usage and waits for fences until its tracked 381 * memory usage decreases. 382 * 383 * Example: 384 * util_throttle_memory_usage(..., w*h*d*Bpp); 385 * TexSubImage(..., w, h, d, ...); 386 * 387 * This means that TexSubImage can't allocate more memory its maximum limit 388 * set during initialization. 389 */ 390void 391util_throttle_memory_usage(struct pipe_context *pipe, 392 struct util_throttle *t, uint64_t memory_size) 393{ 394 (void)util_dump_throttle_ring; /* silence warning */ 395 396 if (!t->max_mem_usage) 397 return; 398 399 struct pipe_screen *screen = pipe->screen; 400 struct pipe_fence_handle **fence = NULL; 401 unsigned ring_size = ARRAY_SIZE(t->ring); 402 uint64_t total = util_get_throttle_total_memory_usage(t); 403 404 /* If there is not enough memory, walk the list of fences and find 405 * the latest one that we need to wait for. 406 */ 407 while (t->wait_index != t->flush_index && 408 total && total + memory_size > t->max_mem_usage) { 409 assert(t->ring[t->wait_index].fence); 410 411 /* Release an older fence if we need to wait for a newer one. */ 412 if (fence) 413 screen->fence_reference(screen, fence, NULL); 414 415 fence = &t->ring[t->wait_index].fence; 416 t->ring[t->wait_index].mem_usage = 0; 417 t->wait_index = (t->wait_index + 1) % ring_size; 418 419 total = util_get_throttle_total_memory_usage(t); 420 } 421 422 /* Wait for the fence to decrease memory usage. */ 423 if (fence) { 424 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE); 425 screen->fence_reference(screen, fence, NULL); 426 } 427 428 /* Flush and get a fence if we've exhausted memory usage for the current 429 * slot. 430 */ 431 if (t->ring[t->flush_index].mem_usage && 432 t->ring[t->flush_index].mem_usage + memory_size > 433 t->max_mem_usage / (ring_size / 2)) { 434 struct pipe_fence_handle **fence = 435 &t->ring[t->flush_index].fence; 436 437 /* Expect that the current flush slot doesn't have a fence yet. */ 438 assert(!*fence); 439 440 pipe->flush(pipe, fence, PIPE_FLUSH_ASYNC); 441 t->flush_index = (t->flush_index + 1) % ring_size; 442 443 /* Vacate the next slot if it's occupied. This should be rare. */ 444 if (t->flush_index == t->wait_index) { 445 struct pipe_fence_handle **fence = 446 &t->ring[t->wait_index].fence; 447 448 t->ring[t->wait_index].mem_usage = 0; 449 t->wait_index = (t->wait_index + 1) % ring_size; 450 451 assert(*fence); 452 screen->fence_finish(screen, pipe, *fence, PIPE_TIMEOUT_INFINITE); 453 screen->fence_reference(screen, fence, NULL); 454 } 455 456 assert(!t->ring[t->flush_index].mem_usage); 457 assert(!t->ring[t->flush_index].fence); 458 } 459 460 t->ring[t->flush_index].mem_usage += memory_size; 461} 462 463bool 464util_lower_clearsize_to_dword(const void *clearValue, int *clearValueSize, uint32_t *clamped) 465{ 466 /* Reduce a large clear value size if possible. */ 467 if (*clearValueSize > 4) { 468 bool clear_dword_duplicated = true; 469 const uint32_t *clear_value = clearValue; 470 471 /* See if we can lower large fills to dword fills. */ 472 for (unsigned i = 1; i < *clearValueSize / 4; i++) { 473 if (clear_value[0] != clear_value[i]) { 474 clear_dword_duplicated = false; 475 break; 476 } 477 } 478 if (clear_dword_duplicated) { 479 *clamped = *clear_value; 480 *clearValueSize = 4; 481 } 482 return clear_dword_duplicated; 483 } 484 485 /* Expand a small clear value size. */ 486 if (*clearValueSize <= 2) { 487 if (*clearValueSize == 1) { 488 *clamped = *(uint8_t *)clearValue; 489 *clamped |= 490 (*clamped << 8) | (*clamped << 16) | (*clamped << 24); 491 } else { 492 *clamped = *(uint16_t *)clearValue; 493 *clamped |= *clamped << 16; 494 } 495 *clearValueSize = 4; 496 return true; 497 } 498 return false; 499} 500 501void 502util_init_pipe_vertex_state(struct pipe_screen *screen, 503 struct pipe_vertex_buffer *buffer, 504 const struct pipe_vertex_element *elements, 505 unsigned num_elements, 506 struct pipe_resource *indexbuf, 507 uint32_t full_velem_mask, 508 struct pipe_vertex_state *state) 509{ 510 assert(num_elements == util_bitcount(full_velem_mask)); 511 512 pipe_reference_init(&state->reference, 1); 513 state->screen = screen; 514 515 pipe_vertex_buffer_reference(&state->input.vbuffer, buffer); 516 pipe_resource_reference(&state->input.indexbuf, indexbuf); 517 state->input.num_elements = num_elements; 518 for (unsigned i = 0; i < num_elements; i++) 519 state->input.elements[i] = elements[i]; 520 state->input.full_velem_mask = full_velem_mask; 521} 522 523/** 524 * Clamp color value to format range. 525 */ 526union pipe_color_union 527util_clamp_color(enum pipe_format format, 528 const union pipe_color_union *color) 529{ 530 union pipe_color_union clamp_color = *color; 531 int i; 532 533 for (i = 0; i < util_format_get_nr_components(format); i++) { 534 uint8_t bits = util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, i); 535 536 if (util_format_is_unorm(format)) 537 clamp_color.ui[i] = _mesa_unorm_to_unorm(clamp_color.ui[i], bits, bits); 538 else if (util_format_is_snorm(format)) 539 clamp_color.i[i] = _mesa_snorm_to_snorm(clamp_color.i[i], bits, bits); 540 else if (util_format_is_pure_uint(format)) 541 clamp_color.ui[i] = _mesa_unsigned_to_unsigned(clamp_color.ui[i], bits); 542 else if (util_format_is_pure_sint(format)) 543 clamp_color.i[i] = _mesa_signed_to_signed(clamp_color.i[i], bits); 544 } 545 546 return clamp_color; 547} 548