1/************************************************************************** 2 * 3 * Copyright 2018-2019 Alyssa Rosenzweig 4 * Copyright 2018-2019 Collabora, Ltd. 5 * Copyright © 2015 Intel Corporation 6 * All Rights Reserved. 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sub license, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the 17 * next paragraph) shall be included in all copies or substantial portions 18 * of the Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 21 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 22 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. 23 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR 24 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 25 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 26 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 27 * 28 **************************************************************************/ 29 30#ifndef PAN_DEVICE_H 31#define PAN_DEVICE_H 32 33#include <xf86drm.h> 34#include "renderonly/renderonly.h" 35#include "util/u_dynarray.h" 36#include "util/bitset.h" 37#include "util/list.h" 38#include "util/sparse_array.h" 39 40#include "panfrost/util/pan_ir.h" 41#include "pan_pool.h" 42#include "pan_util.h" 43 44#include <genxml/gen_macros.h> 45 46#if defined(__cplusplus) 47extern "C" { 48#endif 49 50/* Driver limits */ 51#define PAN_MAX_CONST_BUFFERS 16 52 53/* How many power-of-two levels in the BO cache do we want? 2^12 54 * minimum chosen as it is the page size that all allocations are 55 * rounded to */ 56 57#define MIN_BO_CACHE_BUCKET (12) /* 2^12 = 4KB */ 58#define MAX_BO_CACHE_BUCKET (22) /* 2^22 = 4MB */ 59 60/* Fencepost problem, hence the off-by-one */ 61#define NR_BO_CACHE_BUCKETS (MAX_BO_CACHE_BUCKET - MIN_BO_CACHE_BUCKET + 1) 62 63struct pan_blitter { 64 struct { 65 struct pan_pool *pool; 66 struct hash_table *blit; 67 struct hash_table *blend; 68 pthread_mutex_t lock; 69 } shaders; 70 struct { 71 struct pan_pool *pool; 72 struct hash_table *rsds; 73 pthread_mutex_t lock; 74 } rsds; 75}; 76 77struct pan_blend_shaders { 78 struct hash_table *shaders; 79 pthread_mutex_t lock; 80}; 81 82enum pan_indirect_draw_flags { 83 PAN_INDIRECT_DRAW_NO_INDEX = 0 << 0, 84 PAN_INDIRECT_DRAW_1B_INDEX = 1 << 0, 85 PAN_INDIRECT_DRAW_2B_INDEX = 2 << 0, 86 PAN_INDIRECT_DRAW_4B_INDEX = 3 << 0, 87 PAN_INDIRECT_DRAW_INDEX_SIZE_MASK = 3 << 0, 88 PAN_INDIRECT_DRAW_HAS_PSIZ = 1 << 2, 89 PAN_INDIRECT_DRAW_PRIMITIVE_RESTART = 1 << 3, 90 PAN_INDIRECT_DRAW_UPDATE_PRIM_SIZE = 1 << 4, 91 PAN_INDIRECT_DRAW_IDVS = 1 << 5, 92 PAN_INDIRECT_DRAW_LAST_FLAG = PAN_INDIRECT_DRAW_IDVS, 93 PAN_INDIRECT_DRAW_FLAGS_MASK = (PAN_INDIRECT_DRAW_LAST_FLAG << 1) - 1, 94 PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX = PAN_INDIRECT_DRAW_LAST_FLAG << 1, 95 PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX, 96 PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_4B_INDEX, 97 PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_1B_INDEX_PRIM_RESTART, 98 PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_2B_INDEX_PRIM_RESTART, 99 PAN_INDIRECT_DRAW_MIN_MAX_SEARCH_3B_INDEX_PRIM_RESTART, 100 PAN_INDIRECT_DRAW_NUM_SHADERS, 101}; 102 103struct pan_indirect_draw_shader { 104 struct panfrost_ubo_push push; 105 mali_ptr rsd; 106}; 107 108struct pan_indirect_draw_shaders { 109 struct pan_indirect_draw_shader shaders[PAN_INDIRECT_DRAW_NUM_SHADERS]; 110 111 /* Take the lock when initializing the draw shaders context or when 112 * allocating from the binary pool. 113 */ 114 pthread_mutex_t lock; 115 116 /* A memory pool for shader binaries. We currently don't allocate a 117 * single BO for all shaders up-front because estimating shader size 118 * is not trivial, and changes to the compiler might influence this 119 * estimation. 120 */ 121 struct pan_pool *bin_pool; 122 123 /* BO containing all renderer states attached to the compute shaders. 124 * Those are built at shader compilation time and re-used every time 125 * panfrost_emit_indirect_draw() is called. 126 */ 127 struct panfrost_bo *states; 128 129 /* Varying memory is allocated dynamically by compute jobs from this 130 * heap. 131 */ 132 struct panfrost_bo *varying_heap; 133}; 134 135struct pan_indirect_dispatch { 136 struct panfrost_ubo_push push; 137 struct panfrost_bo *bin; 138 struct panfrost_bo *descs; 139}; 140 141/** Implementation-defined tiler features */ 142struct panfrost_tiler_features { 143 /** Number of bytes per tiler bin */ 144 unsigned bin_size; 145 146 /** Maximum number of levels that may be simultaneously enabled. 147 * Invariant: bitcount(hierarchy_mask) <= max_levels */ 148 unsigned max_levels; 149}; 150 151struct panfrost_model { 152 /* GPU ID */ 153 uint32_t gpu_id; 154 155 /* Marketing name for the GPU, used as the GL_RENDERER */ 156 const char *name; 157 158 /* Set of associated performance counters */ 159 const char *performance_counters; 160 161 /* Minimum GPU revision required for anisotropic filtering. ~0 and 0 162 * means "no revisions support anisotropy" and "all revisions support 163 * anistropy" respectively -- so checking for anisotropy is simply 164 * comparing the reivsion. 165 */ 166 uint32_t min_rev_anisotropic; 167 168 /* Default tilebuffer size in bytes for the model. */ 169 unsigned tilebuffer_size; 170 171 struct { 172 /* The GPU lacks the capability for hierarchical tiling, without 173 * an "Advanced Tiling Unit", instead requiring a single bin 174 * size for the entire framebuffer be selected by the driver 175 */ 176 bool no_hierarchical_tiling; 177 } quirks; 178}; 179 180struct panfrost_device { 181 /* For ralloc */ 182 void *memctx; 183 184 int fd; 185 186 /* Properties of the GPU in use */ 187 unsigned arch; 188 unsigned gpu_id; 189 unsigned revision; 190 191 /* Number of shader cores */ 192 unsigned core_count; 193 194 /* Range of core IDs, equal to the maximum core ID + 1. Satisfies 195 * core_id_range >= core_count. 196 */ 197 unsigned core_id_range; 198 199 /* Maximum tilebuffer size in bytes for optimal performance. */ 200 unsigned optimal_tib_size; 201 202 unsigned thread_tls_alloc; 203 struct panfrost_tiler_features tiler_features; 204 const struct panfrost_model *model; 205 bool has_afbc; 206 207 /* Table of formats, indexed by a PIPE format */ 208 const struct panfrost_format *formats; 209 210 /* Bitmask of supported compressed texture formats */ 211 uint32_t compressed_formats; 212 213 /* debug flags, see pan_util.h how to interpret */ 214 unsigned debug; 215 216 drmVersionPtr kernel_version; 217 218 struct renderonly *ro; 219 220 pthread_mutex_t bo_map_lock; 221 struct util_sparse_array bo_map; 222 223 struct { 224 pthread_mutex_t lock; 225 226 /* List containing all cached BOs sorted in LRU (Least 227 * Recently Used) order. This allows us to quickly evict BOs 228 * that are more than 1 second old. 229 */ 230 struct list_head lru; 231 232 /* The BO cache is a set of buckets with power-of-two sizes 233 * ranging from 2^12 (4096, the page size) to 234 * 2^(12 + MAX_BO_CACHE_BUCKETS). 235 * Each bucket is a linked list of free panfrost_bo objects. */ 236 237 struct list_head buckets[NR_BO_CACHE_BUCKETS]; 238 } bo_cache; 239 240 struct pan_blitter blitter; 241 struct pan_blend_shaders blend_shaders; 242 struct pan_indirect_draw_shaders indirect_draw_shaders; 243 struct pan_indirect_dispatch indirect_dispatch; 244 245 /* Tiler heap shared across all tiler jobs, allocated against the 246 * device since there's only a single tiler. Since this is invisible to 247 * the CPU, it's okay for multiple contexts to reference it 248 * simultaneously; by keeping on the device struct, we eliminate a 249 * costly per-context allocation. */ 250 251 struct panfrost_bo *tiler_heap; 252 253 /* The tiler heap is shared by all contexts, and is written by tiler 254 * jobs and read by fragment job. We need to ensure that a 255 * vertex/tiler job chain from one context is not inserted between 256 * the vertex/tiler and fragment job of another context, otherwise 257 * we end up with tiler heap corruption. 258 */ 259 pthread_mutex_t submit_lock; 260 261 /* Sample positions are preloaded into a write-once constant buffer, 262 * such that they can be referenced fore free later. Needed 263 * unconditionally on Bifrost, and useful for sharing with Midgard */ 264 265 struct panfrost_bo *sample_positions; 266}; 267 268void 269panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev); 270 271void 272panfrost_close_device(struct panfrost_device *dev); 273 274bool 275panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt); 276 277void 278panfrost_upload_sample_positions(struct panfrost_device *dev); 279 280mali_ptr 281panfrost_sample_positions(const struct panfrost_device *dev, 282 enum mali_sample_pattern pattern); 283void 284panfrost_query_sample_position( 285 enum mali_sample_pattern pattern, 286 unsigned sample_idx, 287 float *out); 288 289unsigned 290panfrost_query_l2_slices(const struct panfrost_device *dev); 291 292static inline struct panfrost_bo * 293pan_lookup_bo(struct panfrost_device *dev, uint32_t gem_handle) 294{ 295 return (struct panfrost_bo *)util_sparse_array_get(&dev->bo_map, gem_handle); 296} 297 298static inline bool 299pan_is_bifrost(const struct panfrost_device *dev) 300{ 301 return dev->arch >= 6 && dev->arch <= 7; 302} 303 304const struct panfrost_model * panfrost_get_model(uint32_t gpu_id); 305 306#if defined(__cplusplus) 307} // extern "C" 308#endif 309 310#endif 311