1/* 2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark@freedesktop.org> 25 */ 26 27#ifndef FREEDRENO_UTIL_H_ 28#define FREEDRENO_UTIL_H_ 29 30#include "common/freedreno_common.h" 31 32#include "drm/freedreno_drmif.h" 33#include "drm/freedreno_ringbuffer.h" 34 35#include "pipe/p_format.h" 36#include "pipe/p_state.h" 37#include "util/compiler.h" 38#include "util/half_float.h" 39#include "util/log.h" 40#include "util/u_debug.h" 41#include "util/u_dynarray.h" 42#include "util/u_math.h" 43#include "util/u_pack_color.h" 44 45#include "adreno_common.xml.h" 46#include "adreno_pm4.xml.h" 47#include "disasm.h" 48 49#ifdef __cplusplus 50extern "C" { 51#endif 52 53enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format); 54enum pc_di_index_size fd_pipe2index(enum pipe_format format); 55enum pipe_format fd_gmem_restore_format(enum pipe_format format); 56enum adreno_rb_blend_factor fd_blend_factor(unsigned factor); 57enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode); 58enum adreno_stencil_op fd_stencil_op(unsigned op); 59 60#define A3XX_MAX_MIP_LEVELS 14 61 62#define A2XX_MAX_RENDER_TARGETS 1 63#define A3XX_MAX_RENDER_TARGETS 4 64#define A4XX_MAX_RENDER_TARGETS 8 65#define A5XX_MAX_RENDER_TARGETS 8 66#define A6XX_MAX_RENDER_TARGETS 8 67 68#define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS 69 70/* clang-format off */ 71enum fd_debug_flag { 72 FD_DBG_MSGS = BITFIELD_BIT(0), 73 FD_DBG_DISASM = BITFIELD_BIT(1), 74 FD_DBG_DCLEAR = BITFIELD_BIT(2), 75 FD_DBG_DDRAW = BITFIELD_BIT(3), 76 FD_DBG_NOSCIS = BITFIELD_BIT(4), 77 FD_DBG_DIRECT = BITFIELD_BIT(5), 78 FD_DBG_GMEM = BITFIELD_BIT(6), 79 FD_DBG_PERF = BITFIELD_BIT(7), 80 FD_DBG_NOBIN = BITFIELD_BIT(8), 81 FD_DBG_SYSMEM = BITFIELD_BIT(9), 82 FD_DBG_SERIALC = BITFIELD_BIT(10), 83 FD_DBG_SHADERDB = BITFIELD_BIT(11), 84 FD_DBG_FLUSH = BITFIELD_BIT(12), 85 FD_DBG_DEQP = BITFIELD_BIT(13), 86 FD_DBG_INORDER = BITFIELD_BIT(14), 87 FD_DBG_BSTAT = BITFIELD_BIT(15), 88 FD_DBG_NOGROW = BITFIELD_BIT(16), 89 FD_DBG_LRZ = BITFIELD_BIT(17), 90 FD_DBG_NOINDR = BITFIELD_BIT(18), 91 FD_DBG_NOBLIT = BITFIELD_BIT(19), 92 FD_DBG_HIPRIO = BITFIELD_BIT(20), 93 FD_DBG_TTILE = BITFIELD_BIT(21), 94 FD_DBG_PERFC = BITFIELD_BIT(22), 95 FD_DBG_NOUBWC = BITFIELD_BIT(23), 96 FD_DBG_NOLRZ = BITFIELD_BIT(24), 97 FD_DBG_NOTILE = BITFIELD_BIT(25), 98 FD_DBG_LAYOUT = BITFIELD_BIT(26), 99 FD_DBG_NOFP16 = BITFIELD_BIT(27), 100 FD_DBG_NOHW = BITFIELD_BIT(28), 101 FD_DBG_NOSBIN = BITFIELD_BIT(29), 102}; 103/* clang-format on */ 104 105extern int fd_mesa_debug; 106extern bool fd_binning_enabled; 107 108#define FD_DBG(category) unlikely(fd_mesa_debug &FD_DBG_##category) 109 110#include <unistd.h> 111#include <sys/types.h> 112#include <sys/syscall.h> 113 114#define DBG(fmt, ...) \ 115 do { \ 116 if (FD_DBG(MSGS)) \ 117 mesa_logi("%5d: %s:%d: " fmt, ((pid_t)syscall(SYS_gettid)), \ 118 __FUNCTION__, __LINE__, \ 119 ##__VA_ARGS__); \ 120 } while (0) 121 122#define perf_debug_message(debug, type, ...) \ 123 do { \ 124 if (FD_DBG(PERF)) \ 125 mesa_logw(__VA_ARGS__); \ 126 struct util_debug_callback *__d = (debug); \ 127 if (__d) \ 128 util_debug_message(__d, type, __VA_ARGS__); \ 129 } while (0) 130 131#define perf_debug_ctx(ctx, ...) \ 132 do { \ 133 struct fd_context *__c = (ctx); \ 134 perf_debug_message(__c ? &__c->debug : NULL, PERF_INFO, __VA_ARGS__); \ 135 } while (0) 136 137#define perf_debug(...) perf_debug_ctx(NULL, __VA_ARGS__) 138 139#define perf_time_ctx(ctx, limit_ns, fmt, ...) \ 140 for (struct __perf_time_state __s = \ 141 { \ 142 .t = -__perf_get_time(ctx), \ 143 }; \ 144 !__s.done; ({ \ 145 __s.t += __perf_get_time(ctx); \ 146 __s.done = true; \ 147 if (__s.t > (limit_ns)) { \ 148 perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__, \ 149 (double)__s.t / 1000000.0); \ 150 } \ 151 })) 152 153#define perf_time(limit_ns, fmt, ...) \ 154 perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__) 155 156struct __perf_time_state { 157 int64_t t; 158 bool done; 159}; 160 161/* static inline would be nice here, except 'struct fd_context' is not 162 * defined yet: 163 */ 164#define __perf_get_time(ctx) \ 165 ((FD_DBG(PERF) || ({ \ 166 struct fd_context *__c = (ctx); \ 167 unlikely(__c && __c->debug.debug_message); \ 168 })) \ 169 ? os_time_get_nano() \ 170 : 0) 171 172struct fd_context; 173 174/** 175 * A psuedo-variable for defining where various parts of the fd_context 176 * can be safely accessed. 177 * 178 * With threaded_context, certain pctx funcs are called from gallium 179 * front-end/state-tracker (eg. CSO creation), while others are called 180 * from the driver thread. Things called from driver thread can safely 181 * access anything in the ctx, while things called from the fe/st thread 182 * must limit themselves to "safe" things (ie. ctx->screen is safe as it 183 * is immutable, but the blitter_context is not). 184 */ 185extern lock_cap_t fd_context_access_cap; 186 187/** 188 * Make the annotation a bit less verbose.. mark fields which should only 189 * be accessed by driver-thread with 'dt' 190 */ 191#define dt guarded_by(fd_context_access_cap) 192 193/** 194 * Annotation for entry-point functions only called in driver thread. 195 * 196 * For static functions, apply the annotation to the function declaration. 197 * Otherwise apply to the function prototype. 198 */ 199#define in_dt assert_cap(fd_context_access_cap) 200 201/** 202 * Annotation for internal functions which are only called from entry- 203 * point functions (with 'in_dt' annotation) or other internal functions 204 * with the 'assert_dt' annotation. 205 * 206 * For static functions, apply the annotation to the function declaration. 207 * Otherwise apply to the function prototype. 208 */ 209#define assert_dt requires_cap(fd_context_access_cap) 210 211/** 212 * Special helpers for context access outside of driver thread. For ex, 213 * pctx->get_query_result() is not called on driver thread, but the 214 * query is guaranteed to be flushed, or the driver thread queue is 215 * guaranteed to be flushed. 216 * 217 * Use with caution! 218 */ 219static inline void 220fd_context_access_begin(struct fd_context *ctx) 221 acquire_cap(fd_context_access_cap) 222{ 223} 224 225static inline void 226fd_context_access_end(struct fd_context *ctx) release_cap(fd_context_access_cap) 227{ 228} 229 230#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000)))) 231 232static inline uint32_t 233DRAW(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select, 234 enum pc_di_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode, 235 uint8_t instances) 236{ 237 return (prim_type << 0) | (source_select << 6) | ((index_size & 1) << 11) | 238 ((index_size >> 1) << 13) | (vis_cull_mode << 9) | (1 << 14) | 239 (instances << 24); 240} 241 242static inline uint32_t 243DRAW_A20X(enum pc_di_primtype prim_type, 244 enum pc_di_face_cull_sel faceness_cull_select, 245 enum pc_di_src_sel source_select, enum pc_di_index_size index_size, 246 bool pre_fetch_cull_enable, bool grp_cull_enable, uint16_t count) 247{ 248 return (prim_type << 0) | (source_select << 6) | 249 (faceness_cull_select << 8) | ((index_size & 1) << 11) | 250 ((index_size >> 1) << 13) | (pre_fetch_cull_enable << 14) | 251 (grp_cull_enable << 15) | (count << 16); 252} 253 254/* for tracking cmdstream positions that need to be patched: */ 255struct fd_cs_patch { 256 uint32_t *cs; 257 uint32_t val; 258}; 259#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch)) 260#define fd_patch_element(buf, i) \ 261 util_dynarray_element(buf, struct fd_cs_patch, i) 262 263static inline enum pipe_format 264pipe_surface_format(struct pipe_surface *psurf) 265{ 266 if (!psurf) 267 return PIPE_FORMAT_NONE; 268 return psurf->format; 269} 270 271static inline bool 272fd_surface_half_precision(const struct pipe_surface *psurf) 273{ 274 enum pipe_format format; 275 276 if (!psurf) 277 return true; 278 279 format = psurf->format; 280 281 /* colors are provided in consts, which go through cov.f32f16, which will 282 * break these values 283 */ 284 if (util_format_is_pure_integer(format)) 285 return false; 286 287 /* avoid losing precision on 32-bit float formats */ 288 if (util_format_is_float(format) && 289 util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) == 290 32) 291 return false; 292 293 return true; 294} 295 296static inline unsigned 297fd_sampler_first_level(const struct pipe_sampler_view *view) 298{ 299 if (view->target == PIPE_BUFFER) 300 return 0; 301 return view->u.tex.first_level; 302} 303 304static inline unsigned 305fd_sampler_last_level(const struct pipe_sampler_view *view) 306{ 307 if (view->target == PIPE_BUFFER) 308 return 0; 309 return view->u.tex.last_level; 310} 311 312static inline bool 313fd_half_precision(struct pipe_framebuffer_state *pfb) 314{ 315 unsigned i; 316 317 for (i = 0; i < pfb->nr_cbufs; i++) 318 if (!fd_surface_half_precision(pfb->cbufs[i])) 319 return false; 320 321 return true; 322} 323 324static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx); 325 326/* like OUT_RING() but appends a cmdstream patch point to 'buf' */ 327static inline void 328OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, struct util_dynarray *buf) 329{ 330 if (LOG_DWORDS) { 331 DBG("ring[%p]: OUT_RINGP %04x: %08x", ring, 332 (uint32_t)(ring->cur - ring->start), data); 333 } 334 util_dynarray_append(buf, struct fd_cs_patch, 335 ((struct fd_cs_patch){ 336 .cs = ring->cur++, 337 .val = data, 338 })); 339} 340 341static inline void 342__OUT_IB(struct fd_ringbuffer *ring, bool prefetch, 343 struct fd_ringbuffer *target) 344{ 345 if (target->cur == target->start) 346 return; 347 348 unsigned count = fd_ringbuffer_cmd_count(target); 349 350 /* for debug after a lock up, write a unique counter value 351 * to scratch6 for each IB, to make it easier to match up 352 * register dumps to cmdstream. The combination of IB and 353 * DRAW (scratch7) is enough to "triangulate" the particular 354 * draw that caused lockup. 355 */ 356 emit_marker(ring, 6); 357 358 for (unsigned i = 0; i < count; i++) { 359 uint32_t dwords; 360 OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD, 361 2); 362 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 363 assert(dwords > 0); 364 OUT_RING(ring, dwords); 365 OUT_PKT2(ring); 366 } 367 368 emit_marker(ring, 6); 369} 370 371static inline void 372__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target) 373{ 374 if (target->cur == target->start) 375 return; 376 377 unsigned count = fd_ringbuffer_cmd_count(target); 378 379 for (unsigned i = 0; i < count; i++) { 380 uint32_t dwords; 381 OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3); 382 dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4; 383 assert(dwords > 0); 384 OUT_RING(ring, dwords); 385 } 386} 387 388/* CP_SCRATCH_REG4 is used to hold base address for query results: */ 389// XXX annoyingly scratch regs move on a5xx.. and additionally different 390// packet types.. so freedreno_query_hw is going to need a bit of 391// rework.. 392#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4 393 394#ifdef DEBUG 395#define __EMIT_MARKER 1 396#else 397#define __EMIT_MARKER 0 398#endif 399 400static inline void 401emit_marker(struct fd_ringbuffer *ring, int scratch_idx) 402{ 403 extern int32_t marker_cnt; 404 unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx; 405 assert(reg != HW_QUERY_BASE_REG); 406 if (reg == HW_QUERY_BASE_REG) 407 return; 408 if (__EMIT_MARKER) { 409 OUT_WFI(ring); 410 OUT_PKT0(ring, reg, 1); 411 OUT_RING(ring, p_atomic_inc_return(&marker_cnt)); 412 } 413} 414 415static inline uint32_t 416pack_rgba(enum pipe_format format, const float *rgba) 417{ 418 union util_color uc; 419 util_pack_color(rgba, format, &uc); 420 return uc.ui[0]; 421} 422 423/* 424 * a3xx+ helpers: 425 */ 426 427static inline enum a3xx_msaa_samples 428fd_msaa_samples(unsigned samples) 429{ 430 switch (samples) { 431 default: 432 assert(0); 433 case 0: 434 case 1: 435 return MSAA_ONE; 436 case 2: 437 return MSAA_TWO; 438 case 4: 439 return MSAA_FOUR; 440 case 8: 441 return MSAA_EIGHT; 442 } 443} 444 445/* 446 * a4xx+ helpers: 447 */ 448 449static inline enum a4xx_state_block 450fd4_stage2shadersb(gl_shader_stage type) 451{ 452 switch (type) { 453 case MESA_SHADER_VERTEX: 454 return SB4_VS_SHADER; 455 case MESA_SHADER_FRAGMENT: 456 return SB4_FS_SHADER; 457 case MESA_SHADER_COMPUTE: 458 case MESA_SHADER_KERNEL: 459 return SB4_CS_SHADER; 460 default: 461 unreachable("bad shader type"); 462 return (enum a4xx_state_block) ~0; 463 } 464} 465 466static inline enum a4xx_index_size 467fd4_size2indextype(unsigned index_size) 468{ 469 switch (index_size) { 470 case 1: 471 return INDEX4_SIZE_8_BIT; 472 case 2: 473 return INDEX4_SIZE_16_BIT; 474 case 4: 475 return INDEX4_SIZE_32_BIT; 476 } 477 DBG("unsupported index size: %d", index_size); 478 assert(0); 479 return INDEX4_SIZE_32_BIT; 480} 481 482#ifdef __cplusplus 483} 484#endif 485 486#endif /* FREEDRENO_UTIL_H_ */ 487