1/* 2 * Copyright (C) 2019 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: 24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25 */ 26 27#include <xf86drm.h> 28 29#include "util/u_math.h" 30#include "util/macros.h" 31#include "util/hash_table.h" 32#include "util/u_thread.h" 33#include "drm-uapi/panfrost_drm.h" 34#include "pan_encoder.h" 35#include "pan_device.h" 36#include "pan_bo.h" 37#include "pan_texture.h" 38#include "wrap.h" 39#include "pan_util.h" 40 41/* Fixed "minimum revisions" */ 42#define NO_ANISO (~0) 43#define HAS_ANISO (0) 44 45#define MODEL(gpu_id_, shortname, counters_, min_rev_anisotropic_, tib_size_, quirks_) \ 46 { \ 47 .gpu_id = gpu_id_, \ 48 .name = "Mali-" shortname " (Panfrost)", \ 49 .performance_counters = counters_, \ 50 .min_rev_anisotropic = min_rev_anisotropic_, \ 51 .tilebuffer_size = tib_size_, \ 52 .quirks = quirks_, \ 53 } 54 55/* Table of supported Mali GPUs */ 56const struct panfrost_model panfrost_model_list[] = { 57 MODEL(0x720, "T720", "T72x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), 58 MODEL(0x750, "T760", "T76x", NO_ANISO, 8192, {}), 59 MODEL(0x820, "T820", "T82x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), 60 MODEL(0x830, "T830", "T83x", NO_ANISO, 8192, { .no_hierarchical_tiling = true }), 61 MODEL(0x860, "T860", "T86x", NO_ANISO, 8192, {}), 62 MODEL(0x880, "T880", "T88x", NO_ANISO, 8192, {}), 63 64 MODEL(0x6000, "G71", "TMIx", NO_ANISO, 8192, {}), 65 MODEL(0x6221, "G72", "THEx", 0x0030 /* r0p3 */, 16384, {}), 66 MODEL(0x7090, "G51", "TSIx", 0x1010 /* r1p1 */, 16384, {}), 67 MODEL(0x7093, "G31", "TDVx", HAS_ANISO, 16384, {}), 68 MODEL(0x7211, "G76", "TNOx", HAS_ANISO, 16384, {}), 69 MODEL(0x7212, "G52", "TGOx", HAS_ANISO, 16384, {}), 70 MODEL(0x7402, "G52 r1", "TGOx", HAS_ANISO, 16384, {}), 71 MODEL(0x9093, "G57", "TNAx", HAS_ANISO, 16384, {}), 72}; 73 74#undef NO_ANISO 75#undef HAS_ANISO 76#undef MODEL 77 78/* 79 * Look up a supported model by its GPU ID, or return NULL if the model is not 80 * supported at this time. 81 */ 82const struct panfrost_model * 83panfrost_get_model(uint32_t gpu_id) 84{ 85 for (unsigned i = 0; i < ARRAY_SIZE(panfrost_model_list); ++i) { 86 if (panfrost_model_list[i].gpu_id == gpu_id) 87 return &panfrost_model_list[i]; 88 } 89 90 return NULL; 91} 92 93/* Abstraction over the raw drm_panfrost_get_param ioctl for fetching 94 * information about devices */ 95 96static __u64 97panfrost_query_raw( 98 int fd, 99 enum drm_panfrost_param param, 100 bool required, 101 unsigned default_value) 102{ 103 struct drm_panfrost_get_param get_param = {0,}; 104 ASSERTED int ret; 105 106 get_param.param = param; 107 ret = drmIoctl(fd, DRM_IOCTL_PANFROST_GET_PARAM, &get_param); 108 109 if (ret) { 110 assert(!required); 111 return default_value; 112 } 113 114 return get_param.value; 115} 116 117static unsigned 118panfrost_query_gpu_version(int fd) 119{ 120 return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_PROD_ID, true, 0); 121} 122 123static unsigned 124panfrost_query_gpu_revision(int fd) 125{ 126 return panfrost_query_raw(fd, DRM_PANFROST_PARAM_GPU_REVISION, true, 0); 127} 128 129unsigned 130panfrost_query_l2_slices(const struct panfrost_device *dev) 131{ 132 /* Query MEM_FEATURES register */ 133 uint32_t mem_features = 134 panfrost_query_raw(dev->fd, DRM_PANFROST_PARAM_MEM_FEATURES, 135 true, 0); 136 137 /* L2_SLICES is MEM_FEATURES[11:8] minus(1) */ 138 return ((mem_features >> 8) & 0xF) + 1; 139} 140 141static struct panfrost_tiler_features 142panfrost_query_tiler_features(int fd) 143{ 144 /* Default value (2^9 bytes and 8 levels) to match old behaviour */ 145 uint32_t raw = panfrost_query_raw(fd, DRM_PANFROST_PARAM_TILER_FEATURES, 146 false, 0x809); 147 148 /* Bin size is log2 in the first byte, max levels in the second byte */ 149 return (struct panfrost_tiler_features) { 150 .bin_size = (1 << (raw & BITFIELD_MASK(5))), 151 .max_levels = (raw >> 8) & BITFIELD_MASK(4) 152 }; 153} 154 155static unsigned 156panfrost_query_core_count(int fd, unsigned *core_id_range) 157{ 158 /* On older kernels, worst-case to 16 cores */ 159 160 unsigned mask = panfrost_query_raw(fd, 161 DRM_PANFROST_PARAM_SHADER_PRESENT, false, 0xffff); 162 163 /* Some cores might be absent. In some cases, we care 164 * about the range of core IDs (that is, the greatest core ID + 1). If 165 * the core mask is contiguous, this equals the core count. 166 */ 167 *core_id_range = util_last_bit(mask); 168 169 /* The actual core count skips overs the gaps */ 170 return util_bitcount(mask); 171} 172 173/* Architectural maximums, since this register may be not implemented 174 * by a given chip. G31 is actually 512 instead of 768 but it doesn't 175 * really matter. */ 176 177static unsigned 178panfrost_max_thread_count(unsigned arch) 179{ 180 switch (arch) { 181 /* Midgard */ 182 case 4: 183 case 5: 184 return 256; 185 186 /* Bifrost, first generation */ 187 case 6: 188 return 384; 189 190 /* Bifrost, second generation (G31 is 512 but it doesn't matter) */ 191 case 7: 192 return 768; 193 194 /* Valhall (for completeness) */ 195 default: 196 return 1024; 197 } 198} 199 200static unsigned 201panfrost_query_thread_tls_alloc(int fd, unsigned major) 202{ 203 unsigned tls = panfrost_query_raw(fd, 204 DRM_PANFROST_PARAM_THREAD_TLS_ALLOC, false, 0); 205 206 return (tls > 0) ? tls : panfrost_max_thread_count(major); 207} 208 209static uint32_t 210panfrost_query_compressed_formats(int fd) 211{ 212 /* If unspecified, assume ASTC/ETC only. Factory default for Juno, and 213 * should exist on any Mali configuration. All hardware should report 214 * these texture formats but the kernel might not be new enough. */ 215 216 uint32_t default_set = 217 (1 << MALI_ETC2_RGB8) | 218 (1 << MALI_ETC2_R11_UNORM) | 219 (1 << MALI_ETC2_RGBA8) | 220 (1 << MALI_ETC2_RG11_UNORM) | 221 (1 << MALI_ETC2_R11_SNORM) | 222 (1 << MALI_ETC2_RG11_SNORM) | 223 (1 << MALI_ETC2_RGB8A1) | 224 (1 << MALI_ASTC_3D_LDR) | 225 (1 << MALI_ASTC_3D_HDR) | 226 (1 << MALI_ASTC_2D_LDR) | 227 (1 << MALI_ASTC_2D_HDR); 228 229 return panfrost_query_raw(fd, DRM_PANFROST_PARAM_TEXTURE_FEATURES0, 230 false, default_set); 231} 232 233/* DRM_PANFROST_PARAM_TEXTURE_FEATURES0 will return a bitmask of supported 234 * compressed formats, so we offer a helper to test if a format is supported */ 235 236bool 237panfrost_supports_compressed_format(struct panfrost_device *dev, unsigned fmt) 238{ 239 if (MALI_EXTRACT_TYPE(fmt) != MALI_FORMAT_COMPRESSED) 240 return true; 241 242 unsigned idx = fmt & ~MALI_FORMAT_COMPRESSED; 243 assert(idx < 32); 244 245 return dev->compressed_formats & (1 << idx); 246} 247 248/* Check for AFBC hardware support. AFBC is introduced in v5. Implementations 249 * may omit it, signaled as a nonzero value in the AFBC_FEATURES property. */ 250 251static bool 252panfrost_query_afbc(int fd, unsigned arch) 253{ 254 unsigned reg = panfrost_query_raw(fd, 255 DRM_PANFROST_PARAM_AFBC_FEATURES, 256 false, 0); 257 258 return (arch >= 5) && (reg == 0); 259} 260 261/* 262 * To pipeline multiple tiles, a given tile may use at most half of the tile 263 * buffer. This function returns the optimal size (assuming pipelining). 264 * 265 * For Mali-G510 and Mali-G310, we will need extra logic to query the tilebuffer 266 * size for the particular variant. The CORE_FEATURES register might help. 267 */ 268static unsigned 269panfrost_query_optimal_tib_size(const struct panfrost_device *dev) 270{ 271 /* Preconditions ensure the returned value is a multiple of 1 KiB, the 272 * granularity of the colour buffer allocation field. 273 */ 274 assert(dev->model->tilebuffer_size >= 2048); 275 assert(util_is_power_of_two_nonzero(dev->model->tilebuffer_size)); 276 277 return dev->model->tilebuffer_size / 2; 278} 279 280void 281panfrost_open_device(void *memctx, int fd, struct panfrost_device *dev) 282{ 283 dev->fd = fd; 284 dev->memctx = memctx; 285 dev->gpu_id = panfrost_query_gpu_version(fd); 286 dev->arch = pan_arch(dev->gpu_id); 287 dev->kernel_version = drmGetVersion(fd); 288 dev->revision = panfrost_query_gpu_revision(fd); 289 dev->model = panfrost_get_model(dev->gpu_id); 290 291 /* If we don't recognize the model, bail early */ 292 if (!dev->model) 293 return; 294 295 dev->core_count = panfrost_query_core_count(fd, &dev->core_id_range); 296 dev->thread_tls_alloc = panfrost_query_thread_tls_alloc(fd, dev->arch); 297 dev->optimal_tib_size = panfrost_query_optimal_tib_size(dev); 298 dev->compressed_formats = panfrost_query_compressed_formats(fd); 299 dev->tiler_features = panfrost_query_tiler_features(fd); 300 dev->has_afbc = panfrost_query_afbc(fd, dev->arch); 301 302 if (dev->arch <= 6) 303 dev->formats = panfrost_pipe_format_v6; 304 else if (dev->arch <= 7) 305 dev->formats = panfrost_pipe_format_v7; 306 else 307 dev->formats = panfrost_pipe_format_v9; 308 309 util_sparse_array_init(&dev->bo_map, sizeof(struct panfrost_bo), 512); 310 311 pthread_mutex_init(&dev->bo_cache.lock, NULL); 312 list_inithead(&dev->bo_cache.lru); 313 314 for (unsigned i = 0; i < ARRAY_SIZE(dev->bo_cache.buckets); ++i) 315 list_inithead(&dev->bo_cache.buckets[i]); 316 317 /* Initialize pandecode before we start allocating */ 318 if (dev->debug & (PAN_DBG_TRACE | PAN_DBG_SYNC)) 319 pandecode_initialize(!(dev->debug & PAN_DBG_TRACE)); 320 321 /* Tiler heap is internally required by the tiler, which can only be 322 * active for a single job chain at once, so a single heap can be 323 * shared across batches/contextes */ 324 325 dev->tiler_heap = panfrost_bo_create(dev, 128 * 1024 * 1024, 326 PAN_BO_INVISIBLE | PAN_BO_GROWABLE, "Tiler heap"); 327 328 pthread_mutex_init(&dev->submit_lock, NULL); 329 330 /* Done once on init */ 331 panfrost_upload_sample_positions(dev); 332} 333 334void 335panfrost_close_device(struct panfrost_device *dev) 336{ 337 /* If we don't recognize the model, the rest of the device won't exist, 338 * we will have early-exited the device open. 339 */ 340 if (dev->model) { 341 pthread_mutex_destroy(&dev->submit_lock); 342 panfrost_bo_unreference(dev->tiler_heap); 343 panfrost_bo_cache_evict_all(dev); 344 pthread_mutex_destroy(&dev->bo_cache.lock); 345 util_sparse_array_finish(&dev->bo_map); 346 } 347 348 drmFreeVersion(dev->kernel_version); 349 close(dev->fd); 350} 351