1/*
2 * Copyright (C) 2012 Rob Clark <robclark@freedesktop.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 *    Rob Clark <robclark@freedesktop.org>
25 */
26
27#ifndef FREEDRENO_UTIL_H_
28#define FREEDRENO_UTIL_H_
29
30#include "common/freedreno_common.h"
31
32#include "drm/freedreno_drmif.h"
33#include "drm/freedreno_ringbuffer.h"
34
35#include "pipe/p_format.h"
36#include "pipe/p_state.h"
37#include "util/compiler.h"
38#include "util/half_float.h"
39#include "util/log.h"
40#include "util/u_debug.h"
41#include "util/u_dynarray.h"
42#include "util/u_math.h"
43#include "util/u_pack_color.h"
44
45#include "adreno_common.xml.h"
46#include "adreno_pm4.xml.h"
47#include "disasm.h"
48
49#ifdef __cplusplus
50extern "C" {
51#endif
52
53enum adreno_rb_depth_format fd_pipe2depth(enum pipe_format format);
54enum pc_di_index_size fd_pipe2index(enum pipe_format format);
55enum pipe_format fd_gmem_restore_format(enum pipe_format format);
56enum adreno_rb_blend_factor fd_blend_factor(unsigned factor);
57enum adreno_pa_su_sc_draw fd_polygon_mode(unsigned mode);
58enum adreno_stencil_op fd_stencil_op(unsigned op);
59
60#define A3XX_MAX_MIP_LEVELS 14
61
62#define A2XX_MAX_RENDER_TARGETS 1
63#define A3XX_MAX_RENDER_TARGETS 4
64#define A4XX_MAX_RENDER_TARGETS 8
65#define A5XX_MAX_RENDER_TARGETS 8
66#define A6XX_MAX_RENDER_TARGETS 8
67
68#define MAX_RENDER_TARGETS A6XX_MAX_RENDER_TARGETS
69
70/* clang-format off */
71enum fd_debug_flag {
72   FD_DBG_MSGS         = BITFIELD_BIT(0),
73   FD_DBG_DISASM       = BITFIELD_BIT(1),
74   FD_DBG_DCLEAR       = BITFIELD_BIT(2),
75   FD_DBG_DDRAW        = BITFIELD_BIT(3),
76   FD_DBG_NOSCIS       = BITFIELD_BIT(4),
77   FD_DBG_DIRECT       = BITFIELD_BIT(5),
78   FD_DBG_GMEM         = BITFIELD_BIT(6),
79   FD_DBG_PERF         = BITFIELD_BIT(7),
80   FD_DBG_NOBIN        = BITFIELD_BIT(8),
81   FD_DBG_SYSMEM       = BITFIELD_BIT(9),
82   FD_DBG_SERIALC      = BITFIELD_BIT(10),
83   FD_DBG_SHADERDB     = BITFIELD_BIT(11),
84   FD_DBG_FLUSH        = BITFIELD_BIT(12),
85   FD_DBG_DEQP         = BITFIELD_BIT(13),
86   FD_DBG_INORDER      = BITFIELD_BIT(14),
87   FD_DBG_BSTAT        = BITFIELD_BIT(15),
88   FD_DBG_NOGROW       = BITFIELD_BIT(16),
89   FD_DBG_LRZ          = BITFIELD_BIT(17),
90   FD_DBG_NOINDR       = BITFIELD_BIT(18),
91   FD_DBG_NOBLIT       = BITFIELD_BIT(19),
92   FD_DBG_HIPRIO       = BITFIELD_BIT(20),
93   FD_DBG_TTILE        = BITFIELD_BIT(21),
94   FD_DBG_PERFC        = BITFIELD_BIT(22),
95   FD_DBG_NOUBWC       = BITFIELD_BIT(23),
96   FD_DBG_NOLRZ        = BITFIELD_BIT(24),
97   FD_DBG_NOTILE       = BITFIELD_BIT(25),
98   FD_DBG_LAYOUT       = BITFIELD_BIT(26),
99   FD_DBG_NOFP16       = BITFIELD_BIT(27),
100   FD_DBG_NOHW         = BITFIELD_BIT(28),
101   FD_DBG_NOSBIN       = BITFIELD_BIT(29),
102};
103/* clang-format on */
104
105extern int fd_mesa_debug;
106extern bool fd_binning_enabled;
107
108#define FD_DBG(category) unlikely(fd_mesa_debug &FD_DBG_##category)
109
110#include <unistd.h>
111#include <sys/types.h>
112#include <sys/syscall.h>
113
114#define DBG(fmt, ...)                                                          \
115   do {                                                                        \
116      if (FD_DBG(MSGS))                                                        \
117         mesa_logi("%5d: %s:%d: " fmt, ((pid_t)syscall(SYS_gettid)),           \
118                                        __FUNCTION__, __LINE__,                \
119                                        ##__VA_ARGS__);                        \
120   } while (0)
121
122#define perf_debug_message(debug, type, ...)                                   \
123   do {                                                                        \
124      if (FD_DBG(PERF))                                                        \
125         mesa_logw(__VA_ARGS__);                                               \
126      struct util_debug_callback *__d = (debug);                               \
127      if (__d)                                                                 \
128         util_debug_message(__d, type, __VA_ARGS__);                           \
129   } while (0)
130
131#define perf_debug_ctx(ctx, ...)                                               \
132   do {                                                                        \
133      struct fd_context *__c = (ctx);                                          \
134      perf_debug_message(__c ? &__c->debug : NULL, PERF_INFO, __VA_ARGS__);    \
135   } while (0)
136
137#define perf_debug(...) perf_debug_ctx(NULL, __VA_ARGS__)
138
139#define perf_time_ctx(ctx, limit_ns, fmt, ...)                                 \
140   for (struct __perf_time_state __s =                                         \
141           {                                                                   \
142              .t = -__perf_get_time(ctx),                                      \
143           };                                                                  \
144        !__s.done; ({                                                          \
145           __s.t += __perf_get_time(ctx);                                      \
146           __s.done = true;                                                    \
147           if (__s.t > (limit_ns)) {                                           \
148              perf_debug_ctx(ctx, fmt " (%.03f ms)", ##__VA_ARGS__,            \
149                             (double)__s.t / 1000000.0);                       \
150           }                                                                   \
151        }))
152
153#define perf_time(limit_ns, fmt, ...)                                          \
154   perf_time_ctx(NULL, limit_ns, fmt, ##__VA_ARGS__)
155
156struct __perf_time_state {
157   int64_t t;
158   bool done;
159};
160
161/* static inline would be nice here, except 'struct fd_context' is not
162 * defined yet:
163 */
164#define __perf_get_time(ctx)                                                   \
165   ((FD_DBG(PERF) || ({                                                        \
166        struct fd_context *__c = (ctx);                                        \
167        unlikely(__c && __c->debug.debug_message);                             \
168     }))                                                                       \
169       ? os_time_get_nano()                                                    \
170       : 0)
171
172struct fd_context;
173
174/**
175 * A psuedo-variable for defining where various parts of the fd_context
176 * can be safely accessed.
177 *
178 * With threaded_context, certain pctx funcs are called from gallium
179 * front-end/state-tracker (eg. CSO creation), while others are called
180 * from the driver thread.  Things called from driver thread can safely
181 * access anything in the ctx, while things called from the fe/st thread
182 * must limit themselves to "safe" things (ie. ctx->screen is safe as it
183 * is immutable, but the blitter_context is not).
184 */
185extern lock_cap_t fd_context_access_cap;
186
187/**
188 * Make the annotation a bit less verbose.. mark fields which should only
189 * be accessed by driver-thread with 'dt'
190 */
191#define dt guarded_by(fd_context_access_cap)
192
193/**
194 * Annotation for entry-point functions only called in driver thread.
195 *
196 * For static functions, apply the annotation to the function declaration.
197 * Otherwise apply to the function prototype.
198 */
199#define in_dt assert_cap(fd_context_access_cap)
200
201/**
202 * Annotation for internal functions which are only called from entry-
203 * point functions (with 'in_dt' annotation) or other internal functions
204 * with the 'assert_dt' annotation.
205 *
206 * For static functions, apply the annotation to the function declaration.
207 * Otherwise apply to the function prototype.
208 */
209#define assert_dt requires_cap(fd_context_access_cap)
210
211/**
212 * Special helpers for context access outside of driver thread.  For ex,
213 * pctx->get_query_result() is not called on driver thread, but the
214 * query is guaranteed to be flushed, or the driver thread queue is
215 * guaranteed to be flushed.
216 *
217 * Use with caution!
218 */
219static inline void
220fd_context_access_begin(struct fd_context *ctx)
221   acquire_cap(fd_context_access_cap)
222{
223}
224
225static inline void
226fd_context_access_end(struct fd_context *ctx) release_cap(fd_context_access_cap)
227{
228}
229
230#define CP_REG(reg) ((0x4 << 16) | ((unsigned int)((reg) - (0x2000))))
231
232static inline uint32_t
233DRAW(enum pc_di_primtype prim_type, enum pc_di_src_sel source_select,
234     enum pc_di_index_size index_size, enum pc_di_vis_cull_mode vis_cull_mode,
235     uint8_t instances)
236{
237   return (prim_type << 0) | (source_select << 6) | ((index_size & 1) << 11) |
238          ((index_size >> 1) << 13) | (vis_cull_mode << 9) | (1 << 14) |
239          (instances << 24);
240}
241
242static inline uint32_t
243DRAW_A20X(enum pc_di_primtype prim_type,
244          enum pc_di_face_cull_sel faceness_cull_select,
245          enum pc_di_src_sel source_select, enum pc_di_index_size index_size,
246          bool pre_fetch_cull_enable, bool grp_cull_enable, uint16_t count)
247{
248   return (prim_type << 0) | (source_select << 6) |
249          (faceness_cull_select << 8) | ((index_size & 1) << 11) |
250          ((index_size >> 1) << 13) | (pre_fetch_cull_enable << 14) |
251          (grp_cull_enable << 15) | (count << 16);
252}
253
254/* for tracking cmdstream positions that need to be patched: */
255struct fd_cs_patch {
256   uint32_t *cs;
257   uint32_t val;
258};
259#define fd_patch_num_elements(buf) ((buf)->size / sizeof(struct fd_cs_patch))
260#define fd_patch_element(buf, i)                                               \
261   util_dynarray_element(buf, struct fd_cs_patch, i)
262
263static inline enum pipe_format
264pipe_surface_format(struct pipe_surface *psurf)
265{
266   if (!psurf)
267      return PIPE_FORMAT_NONE;
268   return psurf->format;
269}
270
271static inline bool
272fd_surface_half_precision(const struct pipe_surface *psurf)
273{
274   enum pipe_format format;
275
276   if (!psurf)
277      return true;
278
279   format = psurf->format;
280
281   /* colors are provided in consts, which go through cov.f32f16, which will
282    * break these values
283    */
284   if (util_format_is_pure_integer(format))
285      return false;
286
287   /* avoid losing precision on 32-bit float formats */
288   if (util_format_is_float(format) &&
289       util_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, 0) ==
290          32)
291      return false;
292
293   return true;
294}
295
296static inline unsigned
297fd_sampler_first_level(const struct pipe_sampler_view *view)
298{
299   if (view->target == PIPE_BUFFER)
300      return 0;
301   return view->u.tex.first_level;
302}
303
304static inline unsigned
305fd_sampler_last_level(const struct pipe_sampler_view *view)
306{
307   if (view->target == PIPE_BUFFER)
308      return 0;
309   return view->u.tex.last_level;
310}
311
312static inline bool
313fd_half_precision(struct pipe_framebuffer_state *pfb)
314{
315   unsigned i;
316
317   for (i = 0; i < pfb->nr_cbufs; i++)
318      if (!fd_surface_half_precision(pfb->cbufs[i]))
319         return false;
320
321   return true;
322}
323
324static inline void emit_marker(struct fd_ringbuffer *ring, int scratch_idx);
325
326/* like OUT_RING() but appends a cmdstream patch point to 'buf' */
327static inline void
328OUT_RINGP(struct fd_ringbuffer *ring, uint32_t data, struct util_dynarray *buf)
329{
330   if (LOG_DWORDS) {
331      DBG("ring[%p]: OUT_RINGP  %04x:  %08x", ring,
332          (uint32_t)(ring->cur - ring->start), data);
333   }
334   util_dynarray_append(buf, struct fd_cs_patch,
335                        ((struct fd_cs_patch){
336                           .cs = ring->cur++,
337                           .val = data,
338                        }));
339}
340
341static inline void
342__OUT_IB(struct fd_ringbuffer *ring, bool prefetch,
343         struct fd_ringbuffer *target)
344{
345   if (target->cur == target->start)
346      return;
347
348   unsigned count = fd_ringbuffer_cmd_count(target);
349
350   /* for debug after a lock up, write a unique counter value
351    * to scratch6 for each IB, to make it easier to match up
352    * register dumps to cmdstream.  The combination of IB and
353    * DRAW (scratch7) is enough to "triangulate" the particular
354    * draw that caused lockup.
355    */
356   emit_marker(ring, 6);
357
358   for (unsigned i = 0; i < count; i++) {
359      uint32_t dwords;
360      OUT_PKT3(ring, prefetch ? CP_INDIRECT_BUFFER_PFE : CP_INDIRECT_BUFFER_PFD,
361               2);
362      dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
363      assert(dwords > 0);
364      OUT_RING(ring, dwords);
365      OUT_PKT2(ring);
366   }
367
368   emit_marker(ring, 6);
369}
370
371static inline void
372__OUT_IB5(struct fd_ringbuffer *ring, struct fd_ringbuffer *target)
373{
374   if (target->cur == target->start)
375      return;
376
377   unsigned count = fd_ringbuffer_cmd_count(target);
378
379   for (unsigned i = 0; i < count; i++) {
380      uint32_t dwords;
381      OUT_PKT7(ring, CP_INDIRECT_BUFFER, 3);
382      dwords = fd_ringbuffer_emit_reloc_ring_full(ring, target, i) / 4;
383      assert(dwords > 0);
384      OUT_RING(ring, dwords);
385   }
386}
387
388/* CP_SCRATCH_REG4 is used to hold base address for query results: */
389// XXX annoyingly scratch regs move on a5xx.. and additionally different
390// packet types.. so freedreno_query_hw is going to need a bit of
391// rework..
392#define HW_QUERY_BASE_REG REG_AXXX_CP_SCRATCH_REG4
393
394#ifdef DEBUG
395#define __EMIT_MARKER 1
396#else
397#define __EMIT_MARKER 0
398#endif
399
400static inline void
401emit_marker(struct fd_ringbuffer *ring, int scratch_idx)
402{
403   extern int32_t marker_cnt;
404   unsigned reg = REG_AXXX_CP_SCRATCH_REG0 + scratch_idx;
405   assert(reg != HW_QUERY_BASE_REG);
406   if (reg == HW_QUERY_BASE_REG)
407      return;
408   if (__EMIT_MARKER) {
409      OUT_WFI(ring);
410      OUT_PKT0(ring, reg, 1);
411      OUT_RING(ring, p_atomic_inc_return(&marker_cnt));
412   }
413}
414
415static inline uint32_t
416pack_rgba(enum pipe_format format, const float *rgba)
417{
418   union util_color uc;
419   util_pack_color(rgba, format, &uc);
420   return uc.ui[0];
421}
422
423/*
424 * a3xx+ helpers:
425 */
426
427static inline enum a3xx_msaa_samples
428fd_msaa_samples(unsigned samples)
429{
430   switch (samples) {
431   default:
432      assert(0);
433   case 0:
434   case 1:
435      return MSAA_ONE;
436   case 2:
437      return MSAA_TWO;
438   case 4:
439      return MSAA_FOUR;
440   case 8:
441      return MSAA_EIGHT;
442   }
443}
444
445/*
446 * a4xx+ helpers:
447 */
448
449static inline enum a4xx_state_block
450fd4_stage2shadersb(gl_shader_stage type)
451{
452   switch (type) {
453   case MESA_SHADER_VERTEX:
454      return SB4_VS_SHADER;
455   case MESA_SHADER_FRAGMENT:
456      return SB4_FS_SHADER;
457   case MESA_SHADER_COMPUTE:
458   case MESA_SHADER_KERNEL:
459      return SB4_CS_SHADER;
460   default:
461      unreachable("bad shader type");
462      return (enum a4xx_state_block) ~0;
463   }
464}
465
466static inline enum a4xx_index_size
467fd4_size2indextype(unsigned index_size)
468{
469   switch (index_size) {
470   case 1:
471      return INDEX4_SIZE_8_BIT;
472   case 2:
473      return INDEX4_SIZE_16_BIT;
474   case 4:
475      return INDEX4_SIZE_32_BIT;
476   }
477   DBG("unsupported index size: %d", index_size);
478   assert(0);
479   return INDEX4_SIZE_32_BIT;
480}
481
482#ifdef __cplusplus
483}
484#endif
485
486#endif /* FREEDRENO_UTIL_H_ */
487