1/*
2 * Copyright © 2020 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 *
23 */
24
25#ifndef FREEDRENO_DEVICE_INFO_H
26#define FREEDRENO_DEVICE_INFO_H
27
28#include <assert.h>
29#include <stdbool.h>
30#include <stdint.h>
31
32#ifdef __cplusplus
33extern "C" {
34#endif
35
36/**
37 * Freedreno hardware description and quirks
38 */
39
40struct fd_dev_info {
41   /* alignment for size of tiles */
42   uint32_t tile_align_w, tile_align_h;
43   /* gmem load/store granularity */
44   uint32_t gmem_align_w, gmem_align_h;
45   /* max tile size */
46   uint32_t tile_max_w, tile_max_h;
47
48   uint32_t num_vsc_pipes;
49
50   /* number of CCU is always equal to the number of SP */
51   union {
52      uint32_t num_sp_cores;
53      uint32_t num_ccu;
54   };
55
56   union {
57      struct {
58         /* Information for private memory calculations */
59         uint32_t fibers_per_sp;
60
61         uint32_t reg_size_vec4;
62
63         /* The size (in instrlen units (128 bytes)) of instruction cache where
64          * we preload a shader. Loading more than this could trigger a hang
65          * on gen3 and later.
66          */
67         uint32_t instr_cache_size;
68
69         /* Whether the PC_MULTIVIEW_MASK register exists. */
70         bool supports_multiview_mask;
71
72         /* info for setting RB_CCU_CNTL */
73         bool concurrent_resolve;
74         bool has_z24uint_s8uint;
75
76         bool tess_use_shared;
77
78         /* Does the hw support GL_QCOM_shading_rate? */
79         bool has_shading_rate;
80
81         /* newer a6xx allows using 16-bit descriptor for both 16-bit
82          * and 32-bit access
83          */
84         bool storage_16bit;
85
86         /* The latest known a630_sqe.fw fails to wait for WFI before
87          * reading the indirect buffer when using CP_DRAW_INDIRECT_MULTI,
88          * so we have to fall back to CP_WAIT_FOR_ME except for a650
89          * which has a fixed firmware.
90          *
91          * TODO: There may be newer a630_sqe.fw released in the future
92          * which fixes this, if so we should detect it and avoid this
93          * workaround.  Once we have uapi to query fw version, we can
94          * replace this with minimum fw version.
95          */
96         bool indirect_draw_wfm_quirk;
97
98         /* On some GPUs, the depth test needs to be enabled when the
99          * depth bounds test is enabled and the depth attachment uses UBWC.
100          */
101         bool depth_bounds_require_depth_test_quirk;
102
103         bool has_tex_filter_cubic;
104
105         bool has_sample_locations;
106
107         /* The firmware on newer a6xx drops CP_REG_WRITE support as we
108          * can now use direct register writes for these regs.
109          */
110         bool has_cp_reg_write;
111
112         bool has_8bpp_ubwc;
113
114         /* a650 seems to be affected by a bug where flushing CCU color into
115          * depth or vice-versa requires a WFI. In particular, clearing a
116          * depth attachment (which writes to it as a color attachment) then
117          * using it as a normal depth attachment requires a WFI in addition
118          * to the expected CCU_FLUSH_COLOR + CCU_INVALIDATE_DEPTH, even
119          * though all those operations happen in the same stage. As this is
120          * usually the only scenario where a CCU flush doesn't require a WFI
121          * we just insert a WFI after every CCU flush.
122          *
123          * Tests affected include
124          * dEQP-VK.renderpass.suballocation.formats.d16_unorm.* in sysmem
125          * mode (a few tests flake when the entire series is run).
126          */
127         bool has_ccu_flush_bug;
128
129         bool has_lpac;
130
131         bool has_getfiberid;
132
133         bool has_dp2acc;
134         bool has_dp4acc;
135
136         /* LRZ fast-clear works on all gens, however blob disables it on
137          * gen1 and gen2. We also elect to disable fast-clear on these gens
138          * because for close to none gains it adds complexity and seem to work
139          * a bit differently from gen3+. Which creates at least one edge case:
140          * if first draw which uses LRZ fast-clear doesn't lock LRZ direction
141          * the fast-clear value is undefined. For details see
142          * https://gitlab.freedesktop.org/mesa/mesa/-/issues/6829
143          */
144         bool enable_lrz_fast_clear;
145         bool has_lrz_dir_tracking;
146         bool lrz_track_quirk;
147
148         struct {
149            uint32_t RB_UNKNOWN_8E04_blit;
150            uint32_t PC_POWER_CNTL;
151            uint32_t TPL1_DBG_ECO_CNTL;
152         } magic;
153      } a6xx;
154   };
155};
156
157struct fd_dev_id {
158   uint32_t gpu_id;
159   uint64_t chip_id;
160};
161
162/**
163 * Note that gpu-id should be considered deprecated.  For newer a6xx, if
164 * there is no gpu-id, this attempts to generate one from the chip-id.
165 * But that may not work forever, so avoid depending on this for newer
166 * gens
167 */
168static inline uint32_t
169fd_dev_gpu_id(const struct fd_dev_id *id)
170{
171   assert(id->gpu_id || id->chip_id);
172   if (!id->gpu_id) {
173      return ((id->chip_id >> 24) & 0xff) * 100 +
174             ((id->chip_id >> 16) & 0xff) * 10 +
175             ((id->chip_id >>  8) & 0xff);
176
177   }
178   return id->gpu_id;
179}
180
181static uint8_t
182fd_dev_gen(const struct fd_dev_id *id)
183{
184   return fd_dev_gpu_id(id) / 100;
185}
186
187static inline bool
188fd_dev_64b(const struct fd_dev_id *id)
189{
190   return fd_dev_gen(id) >= 5;
191}
192
193/* per CCU GMEM amount reserved for depth cache for direct rendering */
194#define A6XX_CCU_DEPTH_SIZE (64 * 1024)
195/* per CCU GMEM amount reserved for color cache used by GMEM resolves
196 * which require color cache (non-BLIT event case).
197 * this is smaller than what is normally used by direct rendering
198 * (RB_CCU_CNTL.GMEM bit enables this smaller size)
199 * if a GMEM resolve requires color cache, the driver needs to make sure
200 * it will not overwrite pixel data in GMEM that is still needed
201 */
202#define A6XX_CCU_GMEM_COLOR_SIZE (16 * 1024)
203
204const struct fd_dev_info * fd_dev_info(const struct fd_dev_id *id);
205const char * fd_dev_name(const struct fd_dev_id *id);
206
207#ifdef __cplusplus
208} /* end of extern "C" */
209#endif
210
211#endif /* FREEDRENO_DEVICE_INFO_H */
212