1# 2# Copyright © 2021 Google, Inc. 3# 4# Permission is hereby granted, free of charge, to any person obtaining a 5# copy of this software and associated documentation files (the "Software"), 6# to deal in the Software without restriction, including without limitation 7# the rights to use, copy, modify, merge, publish, distribute, sublicense, 8# and/or sell copies of the Software, and to permit persons to whom the 9# Software is furnished to do so, subject to the following conditions: 10# 11# The above copyright notice and this permission notice (including the next 12# paragraph) shall be included in all copies or substantial portions of the 13# Software. 14# 15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21# IN THE SOFTWARE. 22 23from mako.template import Template 24import sys 25 26def max_bitfield_val(high, low, shift): 27 return ((1 << (high - low)) - 1) << shift 28 29class State(object): 30 def __init__(self): 31 # List of unique device-info structs, multiple different GPU ids 32 # can map to a single info struct in cases where the differences 33 # are not sw visible, or the only differences are parameters 34 # queried from the kernel (like GMEM size) 35 self.gpu_infos = [] 36 37 # Table mapping GPU id to device-info struct 38 self.gpus = {} 39 40 def info_index(self, gpu_info): 41 i = 0 42 for info in self.gpu_infos: 43 if gpu_info == info: 44 return i 45 i += 1 46 raise Error("invalid info") 47 48s = State() 49 50def add_gpus(ids, info): 51 for id in ids: 52 s.gpus[id] = info 53 54class GPUId(object): 55 def __init__(self, gpu_id = None, chip_id = None, name=None): 56 if chip_id == None: 57 assert(gpu_id != None) 58 val = gpu_id 59 core = int(val / 100) 60 val -= (core * 100); 61 major = int(val / 10); 62 val -= (major * 10) 63 minor = val 64 chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff 65 self.chip_id = chip_id 66 if gpu_id == None: 67 gpu_id = 0 68 self.gpu_id = gpu_id 69 if name == None: 70 assert(gpu_id != 0) 71 name = "FD%d" % gpu_id 72 self.name = name 73 74class Struct(object): 75 """A helper class that stringifies itself to a 'C' struct initializer 76 """ 77 def __str__(self): 78 s = "{" 79 for name, value in vars(self).items(): 80 s += "." + name + "=" + str(value) + "," 81 return s + "}" 82 83class GPUInfo(Struct): 84 """Base class for any generation of adreno, consists of GMEM layout 85 related parameters 86 87 Note that tile_max_h is normally only constrained by corresponding 88 bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h 89 tends to have lower limits, in which case a comment will describe 90 the bitfield size/shift 91 """ 92 def __init__(self, gmem_align_w, gmem_align_h, 93 tile_align_w, tile_align_h, 94 tile_max_w, tile_max_h, num_vsc_pipes): 95 self.gmem_align_w = gmem_align_w 96 self.gmem_align_h = gmem_align_h 97 self.tile_align_w = tile_align_w 98 self.tile_align_h = tile_align_h 99 self.tile_max_w = tile_max_w 100 self.tile_max_h = tile_max_h 101 self.num_vsc_pipes = num_vsc_pipes 102 103 s.gpu_infos.append(self) 104 105 106class A6xxGPUInfo(GPUInfo): 107 """The a6xx generation has a lot more parameters, and is broken down 108 into distinct sub-generations. The template parameter avoids 109 duplication of parameters that are unique to the sub-generation. 110 """ 111 def __init__(self, template, num_sp_cores, num_ccu, 112 RB_UNKNOWN_8E04_blit, PC_POWER_CNTL): 113 super().__init__(gmem_align_w = 16, gmem_align_h = 4, 114 tile_align_w = 32, tile_align_h = 32, 115 tile_max_w = 1024, # max_bitfield_val(5, 0, 5) 116 tile_max_h = max_bitfield_val(14, 8, 4), 117 num_vsc_pipes = 32) 118 assert(num_sp_cores == num_ccu) 119 120 self.num_sp_cores = num_sp_cores 121 122 # 96 tile alignment seems correlated to 3 CCU 123 if num_ccu == 3: 124 self.tile_align_w = 96 125 126 self.a6xx = Struct() 127 self.a6xx.magic = Struct() 128 129 for name, val in template["magic"].items(): 130 setattr(self.a6xx.magic, name, val) 131 132 # Various "magic" register values: 133 self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit 134 self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL 135 136 # Things that earlier gens have and later gens remove, provide 137 # defaults here and let them be overridden by sub-gen template: 138 self.a6xx.has_cp_reg_write = True 139 self.a6xx.has_8bpp_ubwc = True 140 141 for name, val in template.items(): 142 if name == "magic": # handled above 143 continue 144 setattr(self.a6xx, name, val) 145 146# a2xx is really two sub-generations, a20x and a22x, but we don't currently 147# capture that in the device-info tables 148add_gpus([ 149 GPUId(200), 150 GPUId(201), 151 GPUId(205), 152 GPUId(220), 153 ], GPUInfo( 154 gmem_align_w = 32, gmem_align_h = 32, 155 tile_align_w = 32, tile_align_h = 32, 156 tile_max_w = 512, 157 tile_max_h = ~0, # TODO 158 num_vsc_pipes = 8, 159 )) 160 161add_gpus([ 162 GPUId(305), 163 GPUId(307), 164 GPUId(320), 165 GPUId(330), 166 ], GPUInfo( 167 gmem_align_w = 32, gmem_align_h = 32, 168 tile_align_w = 32, tile_align_h = 32, 169 tile_max_w = 992, # max_bitfield_val(4, 0, 5) 170 tile_max_h = max_bitfield_val(9, 5, 5), 171 num_vsc_pipes = 8, 172 )) 173 174add_gpus([ 175 GPUId(405), 176 GPUId(420), 177 GPUId(430), 178 ], GPUInfo( 179 gmem_align_w = 32, gmem_align_h = 32, 180 tile_align_w = 32, tile_align_h = 32, 181 tile_max_w = 1024, # max_bitfield_val(4, 0, 5) 182 tile_max_h = max_bitfield_val(9, 5, 5), 183 num_vsc_pipes = 8, 184 )) 185 186add_gpus([ 187 GPUId(508), 188 GPUId(509), 189 GPUId(510), 190 GPUId(512), 191 GPUId(530), 192 GPUId(540), 193 ], GPUInfo( 194 gmem_align_w = 64, gmem_align_h = 32, 195 tile_align_w = 64, tile_align_h = 32, 196 tile_max_w = 1024, # max_bitfield_val(7, 0, 5) 197 tile_max_h = max_bitfield_val(16, 9, 5), 198 num_vsc_pipes = 16, 199 )) 200 201# a6xx can be divided into distinct sub-generations, where certain device- 202# info parameters are keyed to the sub-generation. These templates reduce 203# the copypaste 204 205# a615, a616, a618, a619, a620 and a630: 206a6xx_gen1 = dict( 207 fibers_per_sp = 128 * 16, 208 reg_size_vec4 = 96, 209 instr_cache_size = 64, 210 concurrent_resolve = True, 211 indirect_draw_wfm_quirk = True, 212 depth_bounds_require_depth_test_quirk = True, 213 magic = dict( 214 TPL1_DBG_ECO_CNTL = 0x100000, 215 ) 216 ) 217 218# a640, a680: 219a6xx_gen2 = dict( 220 fibers_per_sp = 128 * 4 * 16, 221 reg_size_vec4 = 96, 222 instr_cache_size = 64, # TODO 223 supports_multiview_mask = True, 224 has_z24uint_s8uint = True, 225 indirect_draw_wfm_quirk = True, 226 depth_bounds_require_depth_test_quirk = True, # TODO: check if true 227 has_dp2acc = False, # TODO: check if true 228 magic = dict( 229 TPL1_DBG_ECO_CNTL = 0, 230 ), 231 ) 232 233# a650: 234a6xx_gen3 = dict( 235 fibers_per_sp = 128 * 2 * 16, 236 reg_size_vec4 = 64, 237 # Blob limits it to 128 but we hang with 128 238 instr_cache_size = 127, 239 supports_multiview_mask = True, 240 has_z24uint_s8uint = True, 241 tess_use_shared = True, 242 storage_16bit = True, 243 has_tex_filter_cubic = True, 244 has_sample_locations = True, 245 has_ccu_flush_bug = True, 246 has_8bpp_ubwc = False, 247 has_dp2acc = True, 248 has_lrz_dir_tracking = True, 249 enable_lrz_fast_clear = True, 250 lrz_track_quirk = True, 251 magic = dict( 252 # this seems to be a chicken bit that fixes cubic filtering: 253 TPL1_DBG_ECO_CNTL = 0x1000000, 254 ), 255 ) 256 257# a635, a660: 258a6xx_gen4 = dict( 259 fibers_per_sp = 128 * 2 * 16, 260 reg_size_vec4 = 64, 261 # Blob limits it to 128 but we hang with 128 262 instr_cache_size = 127, 263 supports_multiview_mask = True, 264 has_z24uint_s8uint = True, 265 tess_use_shared = True, 266 storage_16bit = True, 267 has_tex_filter_cubic = True, 268 has_sample_locations = True, 269 has_ccu_flush_bug = True, 270 has_cp_reg_write = False, 271 has_8bpp_ubwc = False, 272 has_lpac = True, 273 has_shading_rate = True, 274 has_getfiberid = True, 275 has_dp2acc = True, 276 has_dp4acc = True, 277 enable_lrz_fast_clear = True, 278 has_lrz_dir_tracking = True, 279 magic = dict( 280 TPL1_DBG_ECO_CNTL = 0x5008000, 281 ), 282 ) 283 284add_gpus([ 285 GPUId(615), 286 GPUId(616), 287 GPUId(618), 288 GPUId(619), 289 GPUId(620), 290 ], A6xxGPUInfo( 291 a6xx_gen1, 292 num_sp_cores = 1, 293 num_ccu = 1, 294 RB_UNKNOWN_8E04_blit = 0x00100000, 295 PC_POWER_CNTL = 0, 296 )) 297 298add_gpus([ 299 GPUId(630), 300 ], A6xxGPUInfo( 301 a6xx_gen1, 302 num_sp_cores = 2, 303 num_ccu = 2, 304 RB_UNKNOWN_8E04_blit = 0x01000000, 305 PC_POWER_CNTL = 1, 306 )) 307 308add_gpus([ 309 GPUId(640), 310 ], A6xxGPUInfo( 311 a6xx_gen2, 312 num_sp_cores = 2, 313 num_ccu = 2, 314 RB_UNKNOWN_8E04_blit = 0x00100000, 315 PC_POWER_CNTL = 1, 316 )) 317 318add_gpus([ 319 GPUId(680), 320 ], A6xxGPUInfo( 321 a6xx_gen2, 322 num_sp_cores = 4, 323 num_ccu = 4, 324 RB_UNKNOWN_8E04_blit = 0x04100000, 325 PC_POWER_CNTL = 3, 326 )) 327 328add_gpus([ 329 GPUId(650), 330 ], A6xxGPUInfo( 331 a6xx_gen3, 332 num_sp_cores = 3, 333 num_ccu = 3, 334 RB_UNKNOWN_8E04_blit = 0x04100000, 335 PC_POWER_CNTL = 2, 336 )) 337 338add_gpus([ 339 GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"), 340 GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"), 341 GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"), 342 # fallback wildcard entry should be last: 343 GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"), 344 ], A6xxGPUInfo( 345 a6xx_gen4, 346 num_sp_cores = 2, 347 num_ccu = 2, 348 RB_UNKNOWN_8E04_blit = 0x00100000, 349 PC_POWER_CNTL = 1, 350 )) 351 352add_gpus([ 353 GPUId(660), 354 ], A6xxGPUInfo( 355 a6xx_gen4, 356 num_sp_cores = 3, 357 num_ccu = 3, 358 RB_UNKNOWN_8E04_blit = 0x04100000, 359 PC_POWER_CNTL = 2, 360 )) 361 362template = """\ 363/* Copyright (C) 2021 Google, Inc. 364 * 365 * Permission is hereby granted, free of charge, to any person obtaining a 366 * copy of this software and associated documentation files (the "Software"), 367 * to deal in the Software without restriction, including without limitation 368 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 369 * and/or sell copies of the Software, and to permit persons to whom the 370 * Software is furnished to do so, subject to the following conditions: 371 * 372 * The above copyright notice and this permission notice (including the next 373 * paragraph) shall be included in all copies or substantial portions of the 374 * Software. 375 * 376 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 377 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 378 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 379 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 380 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 381 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 382 * IN THE SOFTWARE. 383 */ 384 385#include "freedreno_dev_info.h" 386 387/* Map python to C: */ 388#define True true 389#define False false 390 391%for info in s.gpu_infos: 392static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)}; 393%endfor 394 395static const struct fd_dev_rec fd_dev_recs[] = { 396%for id, info in s.gpus.items(): 397 { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} }, 398%endfor 399}; 400""" 401 402print(Template(template).render(s=s)) 403 404