1#
2# Copyright © 2021 Google, Inc.
3#
4# Permission is hereby granted, free of charge, to any person obtaining a
5# copy of this software and associated documentation files (the "Software"),
6# to deal in the Software without restriction, including without limitation
7# the rights to use, copy, modify, merge, publish, distribute, sublicense,
8# and/or sell copies of the Software, and to permit persons to whom the
9# Software is furnished to do so, subject to the following conditions:
10#
11# The above copyright notice and this permission notice (including the next
12# paragraph) shall be included in all copies or substantial portions of the
13# Software.
14#
15# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21# IN THE SOFTWARE.
22
23from mako.template import Template
24import sys
25
26def max_bitfield_val(high, low, shift):
27    return ((1 << (high - low)) - 1) << shift
28
29class State(object):
30    def __init__(self):
31        # List of unique device-info structs, multiple different GPU ids
32        # can map to a single info struct in cases where the differences
33        # are not sw visible, or the only differences are parameters
34        # queried from the kernel (like GMEM size)
35        self.gpu_infos = []
36
37        # Table mapping GPU id to device-info struct
38        self.gpus = {}
39
40    def info_index(self, gpu_info):
41        i = 0
42        for info in self.gpu_infos:
43            if gpu_info == info:
44                return i
45            i += 1
46        raise Error("invalid info")
47
48s = State()
49
50def add_gpus(ids, info):
51    for id in ids:
52        s.gpus[id] = info
53
54class GPUId(object):
55    def __init__(self, gpu_id = None, chip_id = None, name=None):
56        if chip_id == None:
57            assert(gpu_id != None)
58            val = gpu_id
59            core = int(val / 100)
60            val -= (core * 100);
61            major = int(val / 10);
62            val -= (major * 10)
63            minor = val
64            chip_id = (core << 24) | (major << 16) | (minor << 8) | 0xff
65        self.chip_id = chip_id
66        if gpu_id == None:
67            gpu_id = 0
68        self.gpu_id = gpu_id
69        if name == None:
70            assert(gpu_id != 0)
71            name = "FD%d" % gpu_id
72        self.name = name
73
74class Struct(object):
75    """A helper class that stringifies itself to a 'C' struct initializer
76    """
77    def __str__(self):
78        s = "{"
79        for name, value in vars(self).items():
80            s += "." + name + "=" + str(value) + ","
81        return s + "}"
82
83class GPUInfo(Struct):
84    """Base class for any generation of adreno, consists of GMEM layout
85       related parameters
86
87       Note that tile_max_h is normally only constrained by corresponding
88       bitfield size/shift (ie. VSC_BIN_SIZE, or similar), but tile_max_h
89       tends to have lower limits, in which case a comment will describe
90       the bitfield size/shift
91    """
92    def __init__(self, gmem_align_w, gmem_align_h,
93                 tile_align_w, tile_align_h,
94                 tile_max_w, tile_max_h, num_vsc_pipes):
95        self.gmem_align_w  = gmem_align_w
96        self.gmem_align_h  = gmem_align_h
97        self.tile_align_w  = tile_align_w
98        self.tile_align_h  = tile_align_h
99        self.tile_max_w    = tile_max_w
100        self.tile_max_h    = tile_max_h
101        self.num_vsc_pipes = num_vsc_pipes
102
103        s.gpu_infos.append(self)
104
105
106class A6xxGPUInfo(GPUInfo):
107    """The a6xx generation has a lot more parameters, and is broken down
108       into distinct sub-generations.  The template parameter avoids
109       duplication of parameters that are unique to the sub-generation.
110    """
111    def __init__(self, template, num_sp_cores, num_ccu,
112                 RB_UNKNOWN_8E04_blit, PC_POWER_CNTL):
113        super().__init__(gmem_align_w = 16, gmem_align_h = 4,
114                         tile_align_w = 32, tile_align_h = 32,
115                         tile_max_w   = 1024, # max_bitfield_val(5, 0, 5)
116                         tile_max_h   = max_bitfield_val(14, 8, 4),
117                         num_vsc_pipes = 32)
118        assert(num_sp_cores == num_ccu)
119
120        self.num_sp_cores = num_sp_cores
121
122        # 96 tile alignment seems correlated to 3 CCU
123        if num_ccu == 3:
124            self.tile_align_w = 96
125
126        self.a6xx = Struct()
127        self.a6xx.magic = Struct()
128
129        for name, val in template["magic"].items():
130            setattr(self.a6xx.magic, name, val)
131
132        # Various "magic" register values:
133        self.a6xx.magic.RB_UNKNOWN_8E04_blit = RB_UNKNOWN_8E04_blit
134        self.a6xx.magic.PC_POWER_CNTL = PC_POWER_CNTL
135
136        # Things that earlier gens have and later gens remove, provide
137        # defaults here and let them be overridden by sub-gen template:
138        self.a6xx.has_cp_reg_write = True
139        self.a6xx.has_8bpp_ubwc = True
140
141        for name, val in template.items():
142            if name == "magic": # handled above
143                continue
144            setattr(self.a6xx, name, val)
145
146# a2xx is really two sub-generations, a20x and a22x, but we don't currently
147# capture that in the device-info tables
148add_gpus([
149        GPUId(200),
150        GPUId(201),
151        GPUId(205),
152        GPUId(220),
153    ], GPUInfo(
154        gmem_align_w = 32,  gmem_align_h = 32,
155        tile_align_w = 32,  tile_align_h = 32,
156        tile_max_w   = 512,
157        tile_max_h   = ~0, # TODO
158        num_vsc_pipes = 8,
159    ))
160
161add_gpus([
162        GPUId(305),
163        GPUId(307),
164        GPUId(320),
165        GPUId(330),
166    ], GPUInfo(
167        gmem_align_w = 32,  gmem_align_h = 32,
168        tile_align_w = 32,  tile_align_h = 32,
169        tile_max_w   = 992, # max_bitfield_val(4, 0, 5)
170        tile_max_h   = max_bitfield_val(9, 5, 5),
171        num_vsc_pipes = 8,
172    ))
173
174add_gpus([
175        GPUId(405),
176        GPUId(420),
177        GPUId(430),
178    ], GPUInfo(
179        gmem_align_w = 32,  gmem_align_h = 32,
180        tile_align_w = 32,  tile_align_h = 32,
181        tile_max_w   = 1024, # max_bitfield_val(4, 0, 5)
182        tile_max_h   = max_bitfield_val(9, 5, 5),
183        num_vsc_pipes = 8,
184    ))
185
186add_gpus([
187        GPUId(508),
188        GPUId(509),
189        GPUId(510),
190        GPUId(512),
191        GPUId(530),
192        GPUId(540),
193    ], GPUInfo(
194        gmem_align_w = 64,  gmem_align_h = 32,
195        tile_align_w = 64,  tile_align_h = 32,
196        tile_max_w   = 1024, # max_bitfield_val(7, 0, 5)
197        tile_max_h   = max_bitfield_val(16, 9, 5),
198        num_vsc_pipes = 16,
199    ))
200
201# a6xx can be divided into distinct sub-generations, where certain device-
202# info parameters are keyed to the sub-generation.  These templates reduce
203# the copypaste
204
205# a615, a616, a618, a619, a620 and a630:
206a6xx_gen1 = dict(
207        fibers_per_sp = 128 * 16,
208        reg_size_vec4 = 96,
209        instr_cache_size = 64,
210        concurrent_resolve = True,
211        indirect_draw_wfm_quirk = True,
212        depth_bounds_require_depth_test_quirk = True,
213        magic = dict(
214            TPL1_DBG_ECO_CNTL = 0x100000,
215        )
216    )
217
218# a640, a680:
219a6xx_gen2 = dict(
220        fibers_per_sp = 128 * 4 * 16,
221        reg_size_vec4 = 96,
222        instr_cache_size = 64, # TODO
223        supports_multiview_mask = True,
224        has_z24uint_s8uint = True,
225        indirect_draw_wfm_quirk = True,
226        depth_bounds_require_depth_test_quirk = True, # TODO: check if true
227        has_dp2acc = False, # TODO: check if true
228        magic = dict(
229            TPL1_DBG_ECO_CNTL = 0,
230        ),
231    )
232
233# a650:
234a6xx_gen3 = dict(
235        fibers_per_sp = 128 * 2 * 16,
236        reg_size_vec4 = 64,
237        # Blob limits it to 128 but we hang with 128
238        instr_cache_size = 127,
239        supports_multiview_mask = True,
240        has_z24uint_s8uint = True,
241        tess_use_shared = True,
242        storage_16bit = True,
243        has_tex_filter_cubic = True,
244        has_sample_locations = True,
245        has_ccu_flush_bug = True,
246        has_8bpp_ubwc = False,
247        has_dp2acc = True,
248        has_lrz_dir_tracking = True,
249        enable_lrz_fast_clear = True,
250        lrz_track_quirk = True,
251        magic = dict(
252            # this seems to be a chicken bit that fixes cubic filtering:
253            TPL1_DBG_ECO_CNTL = 0x1000000,
254        ),
255    )
256
257# a635, a660:
258a6xx_gen4 = dict(
259        fibers_per_sp = 128 * 2 * 16,
260        reg_size_vec4 = 64,
261        # Blob limits it to 128 but we hang with 128
262        instr_cache_size = 127,
263        supports_multiview_mask = True,
264        has_z24uint_s8uint = True,
265        tess_use_shared = True,
266        storage_16bit = True,
267        has_tex_filter_cubic = True,
268        has_sample_locations = True,
269        has_ccu_flush_bug = True,
270        has_cp_reg_write = False,
271        has_8bpp_ubwc = False,
272        has_lpac = True,
273        has_shading_rate = True,
274        has_getfiberid = True,
275        has_dp2acc = True,
276        has_dp4acc = True,
277        enable_lrz_fast_clear = True,
278        has_lrz_dir_tracking = True,
279        magic = dict(
280            TPL1_DBG_ECO_CNTL = 0x5008000,
281        ),
282    )
283
284add_gpus([
285        GPUId(615),
286        GPUId(616),
287        GPUId(618),
288        GPUId(619),
289        GPUId(620),
290    ], A6xxGPUInfo(
291        a6xx_gen1,
292        num_sp_cores = 1,
293        num_ccu = 1,
294        RB_UNKNOWN_8E04_blit = 0x00100000,
295        PC_POWER_CNTL = 0,
296    ))
297
298add_gpus([
299        GPUId(630),
300    ], A6xxGPUInfo(
301        a6xx_gen1,
302        num_sp_cores = 2,
303        num_ccu = 2,
304        RB_UNKNOWN_8E04_blit = 0x01000000,
305        PC_POWER_CNTL = 1,
306    ))
307
308add_gpus([
309        GPUId(640),
310    ], A6xxGPUInfo(
311        a6xx_gen2,
312        num_sp_cores = 2,
313        num_ccu = 2,
314        RB_UNKNOWN_8E04_blit = 0x00100000,
315        PC_POWER_CNTL = 1,
316    ))
317
318add_gpus([
319        GPUId(680),
320    ], A6xxGPUInfo(
321        a6xx_gen2,
322        num_sp_cores = 4,
323        num_ccu = 4,
324        RB_UNKNOWN_8E04_blit = 0x04100000,
325        PC_POWER_CNTL = 3,
326    ))
327
328add_gpus([
329        GPUId(650),
330    ], A6xxGPUInfo(
331        a6xx_gen3,
332        num_sp_cores = 3,
333        num_ccu = 3,
334        RB_UNKNOWN_8E04_blit = 0x04100000,
335        PC_POWER_CNTL = 2,
336    ))
337
338add_gpus([
339        GPUId(chip_id=0x00be06030500, name="Adreno 8c Gen 3"),
340        GPUId(chip_id=0x007506030500, name="Adreno 7c+ Gen 3"),
341        GPUId(chip_id=0x006006030500, name="Adreno 7c+ Gen 3 Lite"),
342        # fallback wildcard entry should be last:
343        GPUId(chip_id=0xffff06030500, name="Adreno 7c+ Gen 3"),
344    ], A6xxGPUInfo(
345        a6xx_gen4,
346        num_sp_cores = 2,
347        num_ccu = 2,
348        RB_UNKNOWN_8E04_blit = 0x00100000,
349        PC_POWER_CNTL = 1,
350    ))
351
352add_gpus([
353        GPUId(660),
354    ], A6xxGPUInfo(
355        a6xx_gen4,
356        num_sp_cores = 3,
357        num_ccu = 3,
358        RB_UNKNOWN_8E04_blit = 0x04100000,
359        PC_POWER_CNTL = 2,
360    ))
361
362template = """\
363/* Copyright (C) 2021 Google, Inc.
364 *
365 * Permission is hereby granted, free of charge, to any person obtaining a
366 * copy of this software and associated documentation files (the "Software"),
367 * to deal in the Software without restriction, including without limitation
368 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
369 * and/or sell copies of the Software, and to permit persons to whom the
370 * Software is furnished to do so, subject to the following conditions:
371 *
372 * The above copyright notice and this permission notice (including the next
373 * paragraph) shall be included in all copies or substantial portions of the
374 * Software.
375 *
376 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
377 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
378 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
379 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
380 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
381 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
382 * IN THE SOFTWARE.
383 */
384
385#include "freedreno_dev_info.h"
386
387/* Map python to C: */
388#define True true
389#define False false
390
391%for info in s.gpu_infos:
392static const struct fd_dev_info __info${s.info_index(info)} = ${str(info)};
393%endfor
394
395static const struct fd_dev_rec fd_dev_recs[] = {
396%for id, info in s.gpus.items():
397   { {${id.gpu_id}, ${hex(id.chip_id)}}, "${id.name}", &__info${s.info_index(info)} },
398%endfor
399};
400"""
401
402print(Template(template).render(s=s))
403
404