1/**************************************************************************
2 *
3 * Copyright 2018-2019 Alyssa Rosenzweig
4 * Copyright 2018-2019 Collabora, Ltd.
5 * Copyright © 2015 Intel Corporation
6 * All Rights Reserved.
7 *
8 * Permission is hereby granted, free of charge, to any person obtaining a
9 * copy of this software and associated documentation files (the
10 * "Software"), to deal in the Software without restriction, including
11 * without limitation the rights to use, copy, modify, merge, publish,
12 * distribute, sub license, and/or sell copies of the Software, and to
13 * permit persons to whom the Software is furnished to do so, subject to
14 * the following conditions:
15 *
16 * The above copyright notice and this permission notice (including the
17 * next paragraph) shall be included in all copies or substantial portions
18 * of the Software.
19 *
20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
27 *
28 **************************************************************************/
29
30#ifndef PAN_DEVICE_H
31#define PAN_DEVICE_H
32
33#include <xf86drm.h>
34#include "renderonly/renderonly.h"
35#include "util/u_dynarray.h"
36#include "util/bitset.h"
37#include "util/list.h"
38#include "util/sparse_array.h"
39
40#include "panfrost/util/pan_ir.h"
41#include "pan_pool.h"
42#include "pan_util.h"
43
44#include <genxml/gen_macros.h>
45
46#if defined(__cplusplus)
47extern "C" {
48#endif
49
50/* Driver limits */
51#define PAN_MAX_CONST_BUFFERS 16
52
53/* How many power-of-two levels in the BO cache do we want? 2^12
54 * minimum chosen as it is the page size that all allocations are
55 * rounded to */
56
57#define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */
58#define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */
59
60/* Fencepost problem, hence the off-by-one */
61#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1)
62
63struct pan_blitter {
64        struct {
65                struct pan_pool *pool;
66                struct hash_table *blit;
67                struct hash_table *blend;
68                pthread_mutex_t lock;
69        } shaders;
70        struct {
71                struct pan_pool *pool;
72                struct hash_table *rsds;
73                pthread_mutex_t lock;
74        } rsds;
75};
76
77struct pan_blend_shaders {
78        struct hash_table *shaders;
79        pthread_mutex_t lock;
80};
81
82enum pan_indirect_draw_flags {
83        PAN_INDIRECT_DRAW_NO_INDEX = 0 << 0,
84        PAN_INDIRECT_DRAW_1B_INDEX = 1 << 0,
85        PAN_INDIRECT_DRAW_2B_INDEX = 2 << 0,
86        PAN_INDIRECT_DRAW_4B_INDEX = 3 << 0,
87        PAN_INDIRECT_DRAW_INDEX_SIZE_MASK = 3 << 0,
88        PAN_INDIRECT_DRAW_HAS_PSIZ = 1 << 2,
89        PAN_INDIRECT_DRAW_PRIMITIVE_RESTART = 1 << 3,
90        PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE = 1 << 4,
91        PAN_INDIRECT_DRAW_IDVS = 1 << 5,
92        PAN_INDIRECT_DRAW_LAST_FLAG = PAN_INDIRECT_DRAW_IDVS,
93        PAN_INDIRECT_DRAW_FLAGS_MASK = (PAN_INDIRECT_DRAW_LAST_FLAG << 1) - 1,
94        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX = PAN_INDIRECT_DRAW_LAST_FLAG << 1,
95        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX,
96        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_4B_INDEX,
97        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART,
98        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX_PRIM_RESTART,
99        PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_3B_INDEX_PRIM_RESTART,
100        PAN_INDIRECT_DRAW_NUM_SHADERS,
101};
102
103struct pan_indirect_draw_shader {
104        struct panfrost_ubo_push push;
105        mali_ptr rsd;
106};
107
108struct pan_indirect_draw_shaders {
109        struct pan_indirect_draw_shader shaders[PAN_INDIRECT_DRAW_NUM_SHADERS];
110
111        /* Take the lock when initializing the draw shaders context or when
112         * allocating from the binary pool.
113         */
114        pthread_mutex_t lock;
115
116        /* A memory pool for shader binaries. We currently don't allocate a
117         * single BO for all shaders up-front because estimating shader size
118         * is not trivial, and changes to the compiler might influence this
119         * estimation.
120         */
121        struct pan_pool *bin_pool;
122
123        /* BO containing all renderer states attached to the compute shaders.
124         * Those are built at shader compilation time and re-used every time
125         * panfrost_emit_indirect_draw() is called.
126         */
127        struct panfrost_bo *states;
128
129        /* Varying memory is allocated dynamically by compute jobs from this
130         * heap.
131         */
132        struct panfrost_bo *varying_heap;
133};
134
135struct pan_indirect_dispatch {
136        struct panfrost_ubo_push push;
137        struct panfrost_bo *bin;
138        struct panfrost_bo *descs;
139};
140
141/** Implementation-defined tiler features */
142struct panfrost_tiler_features {
143        /** Number of bytes per tiler bin */
144        unsigned bin_size;
145
146        /** Maximum number of levels that may be simultaneously enabled.
147         * Invariant: bitcount(hierarchy_mask) <= max_levels */
148        unsigned max_levels;
149};
150
151struct panfrost_model {
152        /* GPU ID */
153        uint32_t gpu_id;
154
155        /* Marketing name for the GPU, used as the GL_RENDERER */
156        const char *name;
157
158        /* Set of associated performance counters */
159        const char *performance_counters;
160
161        /* Minimum GPU revision required for anisotropic filtering. ~0 and 0
162         * means "no revisions support anisotropy" and "all revisions support
163         * anistropy" respectively -- so checking for anisotropy is simply
164         * comparing the reivsion.
165         */
166        uint32_t min_rev_anisotropic;
167
168        /* Default tilebuffer size in bytes for the model. */
169        unsigned tilebuffer_size;
170
171        struct {
172                /* The GPU lacks the capability for hierarchical tiling, without
173                 * an "Advanced Tiling Unit", instead requiring a single bin
174                 * size for the entire framebuffer be selected by the driver
175                 */
176                bool no_hierarchical_tiling;
177        } quirks;
178};
179
180struct panfrost_device {
181        /* For ralloc */
182        void *memctx;
183
184        int fd;
185
186        /* Properties of the GPU in use */
187        unsigned arch;
188        unsigned gpu_id;
189        unsigned revision;
190
191        /* Number of shader cores */
192        unsigned core_count;
193
194        /* Range of core IDs, equal to the maximum core ID + 1. Satisfies
195         * core_id_range >= core_count.
196         */
197        unsigned core_id_range;
198
199        /* Maximum tilebuffer size in bytes for optimal performance. */
200        unsigned optimal_tib_size;
201
202        unsigned thread_tls_alloc;
203        struct panfrost_tiler_features tiler_features;
204        const struct panfrost_model *model;
205        bool has_afbc;
206
207        /* Table of formats, indexed by a PIPE format */
208        const struct panfrost_format *formats;
209
210        /* Bitmask of supported compressed texture formats */
211        uint32_t compressed_formats;
212
213        /* debug flags, see pan_util.h how to interpret */
214        unsigned debug;
215
216        drmVersionPtr kernel_version;
217
218        struct renderonly *ro;
219
220        pthread_mutex_t bo_map_lock;
221        struct util_sparse_array bo_map;
222
223        struct {
224                pthread_mutex_t lock;
225
226                /* List containing all cached BOs sorted in LRU (Least
227                 * Recently Used) order. This allows us to quickly evict BOs
228                 * that are more than 1 second old.
229                 */
230                struct list_head lru;
231
232                /* The BO cache is a set of buckets with power-of-two sizes
233                 * ranging from 2^12 (4096, the page size) to
234                 * 2^(12 + MAX_BO_CACHE_BUCKETS).
235                 * Each bucket is a linked list of free panfrost_bo objects. */
236
237                struct list_head buckets[NR_BO_CACHE_BUCKETS];
238        } bo_cache;
239
240        struct pan_blitter blitter;
241        struct pan_blend_shaders blend_shaders;
242        struct pan_indirect_draw_shaders indirect_draw_shaders;
243        struct pan_indirect_dispatch indirect_dispatch;
244
245        /* Tiler heap shared across all tiler jobs, allocated against the
246         * device since there's only a single tiler. Since this is invisible to
247         * the CPU, it's okay for multiple contexts to reference it
248         * simultaneously; by keeping on the device struct, we eliminate a
249         * costly per-context allocation. */
250
251        struct panfrost_bo *tiler_heap;
252
253        /* The tiler heap is shared by all contexts, and is written by tiler
254         * jobs and read by fragment job. We need to ensure that a
255         * vertex/tiler job chain from one context is not inserted between
256         * the vertex/tiler and fragment job of another context, otherwise
257         * we end up with tiler heap corruption.
258         */
259        pthread_mutex_t submit_lock;
260
261        /* Sample positions are preloaded into a write-once constant buffer,
262         * such that they can be referenced fore free later. Needed
263         * unconditionally on Bifrost, and useful for sharing with Midgard */
264
265        struct panfrost_bo *sample_positions;
266};
267
268void
269panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev);
270
271void
272panfrost_close_device(struct panfrost_device *dev);
273
274bool
275panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt);
276
277void
278panfrost_upload_sample_positions(struct panfrost_device *dev);
279
280mali_ptr
281panfrost_sample_positions(const struct panfrost_device *dev,
282                enum mali_sample_pattern pattern);
283void
284panfrost_query_sample_position(
285                enum mali_sample_pattern pattern,
286                unsigned sample_idx,
287                float *out);
288
289unsigned
290panfrost_query_l2_slices(const struct panfrost_device *dev);
291
292static inline struct panfrost_bo *
293pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle)
294{
295        return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle);
296}
297
298static inline bool
299pan_is_bifrost(const struct panfrost_device *dev)
300{
301        return dev->arch >= 6 && dev->arch <= 7;
302}
303
304const struct panfrost_model * panfrost_get_model(uint32_t gpu_id);
305
306#if defined(__cplusplus)
307} // extern "C"
308#endif
309
310#endif
311