xref: /third_party/mesa3d/src/amd/common/ac_surface.c (revision bf215546)
1/*
2 * Copyright © 2011 Red Hat All Rights Reserved.
3 * Copyright © 2017 Advanced Micro Devices, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining
7 * a copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 *
23 * The above copyright notice and this permission notice (including the
24 * next paragraph) shall be included in all copies or substantial portions
25 * of the Software.
26 */
27
28#define AC_SURFACE_INCLUDE_NIR
29#include "ac_surface.h"
30
31#include "ac_drm_fourcc.h"
32#include "ac_gpu_info.h"
33#include "addrlib/inc/addrinterface.h"
34#include "addrlib/src/amdgpu_asic_addr.h"
35#include "amd_family.h"
36#include "sid.h"
37#include "util/hash_table.h"
38#include "util/macros.h"
39#include "util/simple_mtx.h"
40#include "util/u_atomic.h"
41#include "util/format/u_format.h"
42#include "util/u_math.h"
43#include "util/u_memory.h"
44
45#include <errno.h>
46#include <stdio.h>
47#include <stdlib.h>
48
49#ifdef _WIN32
50#define AMDGPU_TILING_ARRAY_MODE_SHIFT			0
51#define AMDGPU_TILING_ARRAY_MODE_MASK			0xf
52#define AMDGPU_TILING_PIPE_CONFIG_SHIFT			4
53#define AMDGPU_TILING_PIPE_CONFIG_MASK			0x1f
54#define AMDGPU_TILING_TILE_SPLIT_SHIFT			9
55#define AMDGPU_TILING_TILE_SPLIT_MASK			0x7
56#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT		12
57#define AMDGPU_TILING_MICRO_TILE_MODE_MASK		0x7
58#define AMDGPU_TILING_BANK_WIDTH_SHIFT			15
59#define AMDGPU_TILING_BANK_WIDTH_MASK			0x3
60#define AMDGPU_TILING_BANK_HEIGHT_SHIFT			17
61#define AMDGPU_TILING_BANK_HEIGHT_MASK			0x3
62#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT		19
63#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK		0x3
64#define AMDGPU_TILING_NUM_BANKS_SHIFT			21
65#define AMDGPU_TILING_NUM_BANKS_MASK			0x3
66#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT		0
67#define AMDGPU_TILING_SWIZZLE_MODE_MASK			0x1f
68#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT		5
69#define AMDGPU_TILING_DCC_OFFSET_256B_MASK		0xFFFFFF
70#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT		29
71#define AMDGPU_TILING_DCC_PITCH_MAX_MASK		0x3FFF
72#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT		43
73#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK		0x1
74#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT	44
75#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK		0x1
76#define AMDGPU_TILING_SCANOUT_SHIFT			63
77#define AMDGPU_TILING_SCANOUT_MASK			0x1
78#define AMDGPU_TILING_SET(field, value) \
79	(((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT)
80#define AMDGPU_TILING_GET(value, field) \
81	(((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK)
82#else
83#include "drm-uapi/amdgpu_drm.h"
84#endif
85
86#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND
87#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A
88#endif
89
90#ifndef CIASICIDGFXENGINE_ARCTICISLAND
91#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D
92#endif
93
94struct ac_addrlib {
95   ADDR_HANDLE handle;
96   simple_mtx_t lock;
97};
98
99bool ac_modifier_has_dcc(uint64_t modifier)
100{
101   return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier);
102}
103
104bool ac_modifier_has_dcc_retile(uint64_t modifier)
105{
106   return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier);
107}
108
109bool ac_modifier_supports_dcc_image_stores(uint64_t modifier)
110{
111   if (!ac_modifier_has_dcc(modifier))
112      return false;
113
114   return (!AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
115            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
116            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_128B) ||
117           (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && /* gfx10.3 */
118            AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) &&
119            AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) &&
120            AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_64B);
121
122}
123
124
125bool ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level,
126                                          const struct radeon_surf *surf)
127{
128   /* DCC image stores is only available for GFX10+. */
129   if (gfx_level < GFX10)
130      return false;
131
132   /* DCC image stores support the following settings:
133    * - INDEPENDENT_64B_BLOCKS = 0
134    * - INDEPENDENT_128B_BLOCKS = 1
135    * - MAX_COMPRESSED_BLOCK_SIZE = 128B
136    * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
137    *
138    * gfx10.3 also supports the following setting:
139    * - INDEPENDENT_64B_BLOCKS = 1
140    * - INDEPENDENT_128B_BLOCKS = 1
141    * - MAX_COMPRESSED_BLOCK_SIZE = 64B
142    * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used)
143    *
144    * The compressor only looks at MAX_COMPRESSED_BLOCK_SIZE to determine
145    * the INDEPENDENT_xx_BLOCKS settings. 128B implies INDEP_128B, while 64B
146    * implies INDEP_64B && INDEP_128B.
147    *
148    * The same limitations apply to SDMA compressed stores because
149    * SDMA uses the same DCC codec.
150    */
151   return (!surf->u.gfx9.color.dcc.independent_64B_blocks &&
152            surf->u.gfx9.color.dcc.independent_128B_blocks &&
153            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) ||
154           (gfx_level >= GFX10_3 && /* gfx10.3 */
155            surf->u.gfx9.color.dcc.independent_64B_blocks &&
156            surf->u.gfx9.color.dcc.independent_128B_blocks &&
157            surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
158}
159
160static
161AddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier)
162{
163   if (modifier == DRM_FORMAT_MOD_LINEAR)
164      return ADDR_SW_LINEAR;
165
166   return AMD_FMT_MOD_GET(TILE, modifier);
167}
168static void
169ac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf,
170             ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info)
171{
172   assert(ac_modifier_has_dcc(modifier));
173
174   if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) {
175      surf_info->flags.metaPipeUnaligned = 0;
176   } else {
177      surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier);
178   }
179
180   /* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on
181    * non-displayable DCC surfaces just because num_render_backends = 1 */
182   surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 &&
183                                      AMD_FMT_MOD_GET(RB, modifier) == 0 &&
184                                      surf_info->flags.metaPipeUnaligned;
185
186   surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
187   surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier);
188   surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier);
189}
190
191bool ac_is_modifier_supported(const struct radeon_info *info,
192                              const struct ac_modifier_options *options,
193                              enum pipe_format format,
194                              uint64_t modifier)
195{
196
197   if (util_format_is_compressed(format) ||
198       util_format_is_depth_or_stencil(format) ||
199       util_format_get_blocksizebits(format) > 64)
200      return false;
201
202   if (info->gfx_level < GFX9)
203      return false;
204
205   if(modifier == DRM_FORMAT_MOD_LINEAR)
206      return true;
207
208   /* GFX8 may need a different modifier for each plane */
209   if (info->gfx_level < GFX9 && util_format_get_num_planes(format) > 1)
210      return false;
211
212   uint32_t allowed_swizzles = 0xFFFFFFFF;
213   switch(info->gfx_level) {
214   case GFX9:
215      allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660;
216      break;
217   case GFX10:
218   case GFX10_3:
219      allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660;
220      break;
221   case GFX11:
222      allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x88000000 : 0xCC440440;
223      break;
224   default:
225      return false;
226   }
227
228   if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles))
229      return false;
230
231   if (ac_modifier_has_dcc(modifier)) {
232      /* TODO: support multi-planar formats with DCC */
233      if (util_format_get_num_planes(format) > 1)
234         return false;
235
236      if (!info->has_graphics)
237         return false;
238
239      if (!options->dcc)
240         return false;
241
242      if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile)
243         return false;
244   }
245
246   return true;
247}
248
249bool ac_get_supported_modifiers(const struct radeon_info *info,
250                                const struct ac_modifier_options *options,
251                                enum pipe_format format,
252                                unsigned *mod_count,
253                                uint64_t *mods)
254{
255   unsigned current_mod = 0;
256
257#define ADD_MOD(name)                                                   \
258   if (ac_is_modifier_supported(info, options, format, (name))) {  \
259      if (mods && current_mod < *mod_count)                  \
260         mods[current_mod] = (name);                    \
261      ++current_mod;                                         \
262   }
263
264   /* The modifiers have to be added in descending order of estimated
265    * performance. The drivers will prefer modifiers that come earlier
266    * in the list. */
267   switch (info->gfx_level) {
268   case GFX9: {
269      unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) +
270                                    G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8);
271      unsigned bank_xor_bits =  MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits);
272      unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config);
273      unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) +
274                    G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config);
275
276      uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) |
277                            AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
278                            AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) |
279                            AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) |
280                            AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
281                            AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits);
282
283      ADD_MOD(AMD_FMT_MOD |
284              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
285              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
286              AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
287              common_dcc |
288              AMD_FMT_MOD_SET(PIPE, pipes) |
289              AMD_FMT_MOD_SET(RB, rb))
290
291      ADD_MOD(AMD_FMT_MOD |
292              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
293              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
294              AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
295              common_dcc |
296              AMD_FMT_MOD_SET(PIPE, pipes) |
297              AMD_FMT_MOD_SET(RB, rb))
298
299      if (util_format_get_blocksizebits(format) == 32) {
300         if (info->max_render_backends == 1) {
301            ADD_MOD(AMD_FMT_MOD |
302                    AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
303                    AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
304                    common_dcc);
305         }
306
307
308         ADD_MOD(AMD_FMT_MOD |
309                 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
310                 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
311                 AMD_FMT_MOD_SET(DCC_RETILE, 1) |
312                 common_dcc |
313                 AMD_FMT_MOD_SET(PIPE, pipes) |
314                 AMD_FMT_MOD_SET(RB, rb))
315      }
316
317
318      ADD_MOD(AMD_FMT_MOD |
319              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) |
320              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
321              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
322              AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
323
324      ADD_MOD(AMD_FMT_MOD |
325              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
326              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) |
327              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
328              AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits));
329
330      ADD_MOD(AMD_FMT_MOD |
331              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
332              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
333
334      ADD_MOD(AMD_FMT_MOD |
335              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
336              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
337
338      ADD_MOD(DRM_FORMAT_MOD_LINEAR)
339      break;
340   }
341   case GFX10:
342   case GFX10_3: {
343      bool rbplus = info->gfx_level >= GFX10_3;
344      unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
345      unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0;
346
347      unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10;
348      uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) |
349                            AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
350                            AMD_FMT_MOD_SET(DCC, 1) |
351                            AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) |
352                            AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
353                            AMD_FMT_MOD_SET(PACKERS, pkrs);
354
355      ADD_MOD(AMD_FMT_MOD | common_dcc |
356              AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) |
357              AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
358              AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
359
360      if (info->gfx_level >= GFX10_3) {
361         ADD_MOD(AMD_FMT_MOD | common_dcc |
362                 AMD_FMT_MOD_SET(DCC_RETILE, 1) |
363                 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
364                 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B))
365
366         ADD_MOD(AMD_FMT_MOD | common_dcc |
367                 AMD_FMT_MOD_SET(DCC_RETILE, 1) |
368                 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
369                 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
370                 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B))
371      }
372
373      ADD_MOD(AMD_FMT_MOD |
374              AMD_FMT_MOD_SET(TILE_VERSION, version) |
375              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) |
376              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
377              AMD_FMT_MOD_SET(PACKERS, pkrs))
378
379      ADD_MOD(AMD_FMT_MOD |
380              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) |
381              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) |
382              AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits))
383
384      if (util_format_get_blocksizebits(format) != 32) {
385         ADD_MOD(AMD_FMT_MOD |
386                 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) |
387                 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
388      }
389
390      ADD_MOD(AMD_FMT_MOD |
391              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) |
392              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9));
393
394      ADD_MOD(DRM_FORMAT_MOD_LINEAR)
395      break;
396   }
397   case GFX11: {
398      /* GFX11 has new microblock organization. No S modes for 2D. */
399      unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config);
400      unsigned pkrs = G_0098F8_NUM_PKRS(info->gb_addr_config);
401      unsigned num_pipes = 1 << pipe_xor_bits;
402
403      /* R_X swizzle modes are the best for rendering and DCC requires them. */
404      for (unsigned i = 0; i < 2; i++) {
405         unsigned swizzle_r_x;
406
407         /* Insert the best one first. */
408         if (num_pipes > 16)
409            swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X;
410         else
411            swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X;
412
413         /* Disable 256K on APUs because it doesn't work with DAL. */
414         if (!info->has_dedicated_vram && swizzle_r_x == AMD_FMT_MOD_TILE_GFX11_256K_R_X)
415            continue;
416
417         uint64_t modifier_r_x = AMD_FMT_MOD |
418                                 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
419                                 AMD_FMT_MOD_SET(TILE, swizzle_r_x) |
420                                 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) |
421                                 AMD_FMT_MOD_SET(PACKERS, pkrs);
422
423         /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */
424         uint64_t modifier_dcc_best = modifier_r_x |
425                                      AMD_FMT_MOD_SET(DCC, 1) |
426                                      AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) |
427                                      AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
428                                      AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B);
429
430         /* DCC settings for 4K and greater resolutions. (required by display hw) */
431         uint64_t modifier_dcc_4k = modifier_r_x |
432                                    AMD_FMT_MOD_SET(DCC, 1) |
433                                    AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) |
434                                    AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) |
435                                    AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B);
436
437         /* Modifiers have to be sorted from best to worst.
438          *
439          * Top level order:
440          *   1. The best chip-specific modifiers with DCC, potentially non-displayable.
441          *   2. Chip-specific displayable modifiers with DCC.
442          *   3. Chip-specific displayable modifiers without DCC.
443          *   4. Chip-independent modifiers without DCC.
444          *   5. Linear.
445          */
446
447         /* Add the best non-displayable modifier first. */
448         ADD_MOD(modifier_dcc_best | AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1));
449
450         /* Displayable modifiers are next. */
451         /* Add other displayable DCC settings. (DCC_RETILE implies displayable on all chips) */
452         ADD_MOD(modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1))
453         ADD_MOD(modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1))
454
455         /* Add one without DCC that is displayable (it's also optimal for non-displayable cases). */
456         ADD_MOD(modifier_r_x)
457      }
458
459      /* Add one that is compatible with other gfx11 chips. */
460      ADD_MOD(AMD_FMT_MOD |
461              AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) |
462              AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D))
463
464      /* Linear must be last. */
465      ADD_MOD(DRM_FORMAT_MOD_LINEAR)
466      break;
467   }
468   default:
469      break;
470   }
471
472#undef ADD_MOD
473
474   if (!mods) {
475      *mod_count = current_mod;
476      return true;
477   }
478
479   bool complete = current_mod <= *mod_count;
480   *mod_count = MIN2(*mod_count, current_mod);
481   return complete;
482}
483
484static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput)
485{
486   return malloc(pInput->sizeInBytes);
487}
488
489static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput)
490{
491   free(pInput->pVirtAddr);
492   return ADDR_OK;
493}
494
495struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info,
496                                     uint64_t *max_alignment)
497{
498   ADDR_CREATE_INPUT addrCreateInput = {0};
499   ADDR_CREATE_OUTPUT addrCreateOutput = {0};
500   ADDR_REGISTER_VALUE regValue = {0};
501   ADDR_CREATE_FLAGS createFlags = {{0}};
502   ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0};
503   ADDR_E_RETURNCODE addrRet;
504
505   addrCreateInput.size = sizeof(ADDR_CREATE_INPUT);
506   addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT);
507
508   regValue.gbAddrConfig = info->gb_addr_config;
509   createFlags.value = 0;
510
511   addrCreateInput.chipFamily = info->family_id;
512   addrCreateInput.chipRevision = info->chip_external_rev;
513
514   if (addrCreateInput.chipFamily == FAMILY_UNKNOWN)
515      return NULL;
516
517   if (addrCreateInput.chipFamily >= FAMILY_AI) {
518      addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND;
519   } else {
520      regValue.noOfBanks = info->mc_arb_ramcfg & 0x3;
521      regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2;
522
523      regValue.backendDisables = info->enabled_rb_mask;
524      regValue.pTileConfig = info->si_tile_mode_array;
525      regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array);
526      if (addrCreateInput.chipFamily == FAMILY_SI) {
527         regValue.pMacroTileConfig = NULL;
528         regValue.noOfMacroEntries = 0;
529      } else {
530         regValue.pMacroTileConfig = info->cik_macrotile_mode_array;
531         regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array);
532      }
533
534      createFlags.useTileIndex = 1;
535      createFlags.useHtileSliceAlign = 1;
536
537      addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND;
538   }
539
540   addrCreateInput.callbacks.allocSysMem = allocSysMem;
541   addrCreateInput.callbacks.freeSysMem = freeSysMem;
542   addrCreateInput.callbacks.debugPrint = 0;
543   addrCreateInput.createFlags = createFlags;
544   addrCreateInput.regValue = regValue;
545
546   addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput);
547   if (addrRet != ADDR_OK)
548      return NULL;
549
550   if (max_alignment) {
551      addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput);
552      if (addrRet == ADDR_OK) {
553         *max_alignment = addrGetMaxAlignmentsOutput.baseAlign;
554      }
555   }
556
557   struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib));
558   if (!addrlib) {
559      AddrDestroy(addrCreateOutput.hLib);
560      return NULL;
561   }
562
563   addrlib->handle = addrCreateOutput.hLib;
564   simple_mtx_init(&addrlib->lock, mtx_plain);
565   return addrlib;
566}
567
568void ac_addrlib_destroy(struct ac_addrlib *addrlib)
569{
570   simple_mtx_destroy(&addrlib->lock);
571   AddrDestroy(addrlib->handle);
572   free(addrlib);
573}
574
575void *ac_addrlib_get_handle(struct ac_addrlib *addrlib)
576{
577   return addrlib->handle;
578}
579
580static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags)
581{
582   /* FMASK is allocated together with the color surface and can't be
583    * allocated separately.
584    */
585   assert(!(flags & RADEON_SURF_FMASK));
586   if (flags & RADEON_SURF_FMASK)
587      return -EINVAL;
588
589   /* all dimension must be at least 1 ! */
590   if (!config->info.width || !config->info.height || !config->info.depth ||
591       !config->info.array_size || !config->info.levels)
592      return -EINVAL;
593
594   switch (config->info.samples) {
595   case 0:
596   case 1:
597   case 2:
598   case 4:
599   case 8:
600      break;
601   case 16:
602      if (flags & RADEON_SURF_Z_OR_SBUFFER)
603         return -EINVAL;
604      break;
605   default:
606      return -EINVAL;
607   }
608
609   if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) {
610      switch (config->info.storage_samples) {
611      case 0:
612      case 1:
613      case 2:
614      case 4:
615      case 8:
616         break;
617      default:
618         return -EINVAL;
619      }
620   }
621
622   if (config->is_3d && config->info.array_size > 1)
623      return -EINVAL;
624   if (config->is_cube && config->info.depth > 1)
625      return -EINVAL;
626
627   return 0;
628}
629
630static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config,
631                              struct radeon_surf *surf, bool is_stencil, unsigned level,
632                              bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn,
633                              ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut,
634                              ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn,
635                              ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut,
636                              ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn,
637                              ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut)
638{
639   struct legacy_surf_level *surf_level;
640   struct legacy_surf_dcc_level *dcc_level;
641   ADDR_E_RETURNCODE ret;
642
643   AddrSurfInfoIn->mipLevel = level;
644   AddrSurfInfoIn->width = u_minify(config->info.width, level);
645   AddrSurfInfoIn->height = u_minify(config->info.height, level);
646
647   /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics,
648    * because GFX9 needs linear alignment of 256 bytes.
649    */
650   if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED &&
651       AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) {
652      unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8);
653
654      AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment);
655   }
656
657   /* addrlib assumes the bytes/pixel is a divisor of 64, which is not
658    * true for r32g32b32 formats. */
659   if (AddrSurfInfoIn->bpp == 96) {
660      assert(config->info.levels == 1);
661      assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED);
662
663      /* The least common multiple of 64 bytes and 12 bytes/pixel is
664       * 192 bytes, or 16 pixels. */
665      AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16);
666   }
667
668   if (config->is_3d)
669      AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level);
670   else if (config->is_cube)
671      AddrSurfInfoIn->numSlices = 6;
672   else
673      AddrSurfInfoIn->numSlices = config->info.array_size;
674
675   if (level > 0) {
676      /* Set the base level pitch. This is needed for calculation
677       * of non-zero levels. */
678      if (is_stencil)
679         AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x;
680      else
681         AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x;
682
683      /* Convert blocks to pixels for compressed formats. */
684      if (compressed)
685         AddrSurfInfoIn->basePitch *= surf->blk_w;
686   }
687
688   ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut);
689   if (ret != ADDR_OK) {
690      return ret;
691   }
692
693   surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level];
694   dcc_level = &surf->u.legacy.color.dcc_level[level];
695   surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256;
696   surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4;
697   surf_level->nblk_x = AddrSurfInfoOut->pitch;
698   surf_level->nblk_y = AddrSurfInfoOut->height;
699
700   switch (AddrSurfInfoOut->tileMode) {
701   case ADDR_TM_LINEAR_ALIGNED:
702      surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
703      break;
704   case ADDR_TM_1D_TILED_THIN1:
705   case ADDR_TM_PRT_TILED_THIN1:
706      surf_level->mode = RADEON_SURF_MODE_1D;
707      break;
708   case ADDR_TM_2D_TILED_THIN1:
709   case ADDR_TM_PRT_2D_TILED_THIN1:
710      surf_level->mode = RADEON_SURF_MODE_2D;
711      break;
712   default:
713      assert(0);
714   }
715
716   if (is_stencil)
717      surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex;
718   else
719      surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex;
720
721   if (AddrSurfInfoIn->flags.prt) {
722      if (level == 0) {
723         surf->prt_tile_width = AddrSurfInfoOut->pitchAlign;
724         surf->prt_tile_height = AddrSurfInfoOut->heightAlign;
725         surf->prt_tile_depth = AddrSurfInfoOut->depthAlign;
726      }
727      if (surf_level->nblk_x >= surf->prt_tile_width &&
728          surf_level->nblk_y >= surf->prt_tile_height) {
729         /* +1 because the current level is not in the miptail */
730         surf->first_mip_tail_level = level + 1;
731      }
732   }
733
734   surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize;
735
736   /* Clear DCC fields at the beginning. */
737   if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil)
738      dcc_level->dcc_offset = 0;
739
740   /* The previous level's flag tells us if we can use DCC for this level. */
741   if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) {
742      bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned;
743
744      AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize;
745      AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
746      AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
747      AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
748      AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
749
750      ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
751
752      if (ret == ADDR_OK) {
753         dcc_level->dcc_offset = surf->meta_size;
754         surf->num_meta_levels = level + 1;
755         surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize;
756         surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign));
757
758         /* If the DCC size of a subresource (1 mip level or 1 slice)
759          * is not aligned, the DCC memory layout is not contiguous for
760          * that subresource, which means we can't use fast clear.
761          *
762          * We only do fast clears for whole mipmap levels. If we did
763          * per-slice fast clears, the same restriction would apply.
764          * (i.e. only compute the slice size and see if it's aligned)
765          *
766          * The last level can be non-contiguous and still be clearable
767          * if it's interleaved with the next level that doesn't exist.
768          */
769         if (AddrDccOut->dccRamSizeAligned ||
770             (prev_level_clearable && level == config->info.levels - 1))
771            dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize;
772         else
773            dcc_level->dcc_fast_clear_size = 0;
774
775         /* Compute the DCC slice size because addrlib doesn't
776          * provide this info. As DCC memory is linear (each
777          * slice is the same size) it's easy to compute.
778          */
779         surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size;
780
781         /* For arrays, we have to compute the DCC info again
782          * with one slice size to get a correct fast clear
783          * size.
784          */
785         if (config->info.array_size > 1) {
786            AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize;
787            AddrDccIn->tileMode = AddrSurfInfoOut->tileMode;
788            AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo;
789            AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex;
790            AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
791
792            ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut);
793            if (ret == ADDR_OK) {
794               /* If the DCC memory isn't properly
795                * aligned, the data are interleaved
796                * accross slices.
797                */
798               if (AddrDccOut->dccRamSizeAligned)
799                  dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize;
800               else
801                  dcc_level->dcc_slice_fast_clear_size = 0;
802            }
803
804            if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS &&
805                surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) {
806               surf->meta_size = 0;
807               surf->num_meta_levels = 0;
808               AddrDccOut->subLvlCompressible = false;
809            }
810         } else {
811            dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size;
812         }
813      }
814   }
815
816   /* HTILE. */
817   if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D &&
818       level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) {
819      AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible;
820      AddrHtileIn->pitch = AddrSurfInfoOut->pitch;
821      AddrHtileIn->height = AddrSurfInfoOut->height;
822      AddrHtileIn->numSlices = AddrSurfInfoOut->depth;
823      AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8;
824      AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8;
825      AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo;
826      AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex;
827      AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex;
828
829      ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut);
830
831      if (ret == ADDR_OK) {
832         surf->meta_size = AddrHtileOut->htileBytes;
833         surf->meta_slice_size = AddrHtileOut->sliceSize;
834         surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign);
835         surf->meta_pitch = AddrHtileOut->pitch;
836         surf->num_meta_levels = level + 1;
837      }
838   }
839
840   return 0;
841}
842
843static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info)
844{
845   uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]];
846
847   if (info->gfx_level >= GFX7)
848      surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode);
849   else
850      surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode);
851}
852
853static unsigned cik_get_macro_tile_index(struct radeon_surf *surf)
854{
855   unsigned index, tileb;
856
857   tileb = 8 * 8 * surf->bpe;
858   tileb = MIN2(surf->u.legacy.tile_split, tileb);
859
860   for (index = 0; tileb > 64; index++)
861      tileb >>= 1;
862
863   assert(index < 16);
864   return index;
865}
866
867static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf)
868{
869   unsigned num_channels = config->info.num_channels;
870   unsigned bpe = surf->bpe;
871
872   /* With modifiers the kernel is in charge of whether it is displayable.
873    * We need to ensure at least 32 pixels pitch alignment, but this is
874    * always the case when the blocksize >= 4K.
875    */
876   if (surf->modifier != DRM_FORMAT_MOD_INVALID)
877      return false;
878
879   if (!config->is_1d && !config->is_3d && !config->is_cube &&
880       !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
881       surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 &&
882       surf->blk_h == 1) {
883      /* subsampled */
884      if (surf->blk_w == 2 && surf->blk_h == 1)
885         return true;
886
887      if (/* RGBA8 or RGBA16F */
888          (bpe >= 4 && bpe <= 8 && num_channels == 4) ||
889          /* R5G6B5 or R5G5B5A1 */
890          (bpe == 2 && num_channels >= 3) ||
891          /* C8 palette */
892          (bpe == 1 && num_channels == 1))
893         return true;
894   }
895   return false;
896}
897
898/**
899 * This must be called after the first level is computed.
900 *
901 * Copy surface-global settings like pipe/bank config from level 0 surface
902 * computation, and compute tile swizzle.
903 */
904static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info,
905                                 const struct ac_surf_config *config,
906                                 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf)
907{
908   surf->surf_alignment_log2 = util_logbase2(csio->baseAlign);
909   surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1;
910   gfx6_set_micro_tile_mode(surf, info);
911
912   /* For 2D modes only. */
913   if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) {
914      surf->u.legacy.bankw = csio->pTileInfo->bankWidth;
915      surf->u.legacy.bankh = csio->pTileInfo->bankHeight;
916      surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio;
917      surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes;
918      surf->u.legacy.num_banks = csio->pTileInfo->banks;
919      surf->u.legacy.macro_tile_index = csio->macroModeIndex;
920   } else {
921      surf->u.legacy.macro_tile_index = 0;
922   }
923
924   /* Compute tile swizzle. */
925   /* TODO: fix tile swizzle with mipmapping for GFX6 */
926   if ((info->gfx_level >= GFX7 || config->info.levels == 1) && config->info.surf_index &&
927       surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D &&
928       !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) &&
929       !get_display_flag(config, surf)) {
930      ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0};
931      ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0};
932
933      AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
934      AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
935
936      AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
937      AddrBaseSwizzleIn.tileIndex = csio->tileIndex;
938      AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex;
939      AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo;
940      AddrBaseSwizzleIn.tileMode = csio->tileMode;
941
942      int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut);
943      if (r != ADDR_OK)
944         return r;
945
946      assert(AddrBaseSwizzleOut.tileSwizzle <=
947             u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
948      surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle;
949   }
950   return 0;
951}
952
953static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config,
954                             struct radeon_surf *surf)
955{
956   unsigned pipe_interleave_bytes = info->pipe_interleave_bytes;
957   unsigned num_pipes = info->num_tile_pipes;
958   unsigned cl_width, cl_height;
959
960   if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear ||
961       (config->info.samples >= 2 && !surf->fmask_size))
962      return;
963
964   assert(info->gfx_level <= GFX8);
965
966   switch (num_pipes) {
967   case 2:
968      cl_width = 32;
969      cl_height = 16;
970      break;
971   case 4:
972      cl_width = 32;
973      cl_height = 32;
974      break;
975   case 8:
976      cl_width = 64;
977      cl_height = 32;
978      break;
979   case 16: /* Hawaii */
980      cl_width = 64;
981      cl_height = 64;
982      break;
983   default:
984      assert(0);
985      return;
986   }
987
988   unsigned base_align = num_pipes * pipe_interleave_bytes;
989
990   unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8);
991   unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8);
992   unsigned slice_elements = (width * height) / (8 * 8);
993
994   /* Each element of CMASK is a nibble. */
995   unsigned slice_bytes = slice_elements / 2;
996
997   surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128);
998   if (surf->u.legacy.color.cmask_slice_tile_max)
999      surf->u.legacy.color.cmask_slice_tile_max -= 1;
1000
1001   unsigned num_layers;
1002   if (config->is_3d)
1003      num_layers = config->info.depth;
1004   else if (config->is_cube)
1005      num_layers = 6;
1006   else
1007      num_layers = config->info.array_size;
1008
1009   surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align));
1010   surf->cmask_slice_size = align(slice_bytes, base_align);
1011   surf->cmask_size = surf->cmask_slice_size * num_layers;
1012}
1013
1014/**
1015 * Fill in the tiling information in \p surf based on the given surface config.
1016 *
1017 * The following fields of \p surf must be initialized by the caller:
1018 * blk_w, blk_h, bpe, flags.
1019 */
1020static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info,
1021                                const struct ac_surf_config *config, enum radeon_surf_mode mode,
1022                                struct radeon_surf *surf)
1023{
1024   unsigned level;
1025   bool compressed;
1026   ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
1027   ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0};
1028   ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0};
1029   ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0};
1030   ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0};
1031   ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0};
1032   ADDR_TILEINFO AddrTileInfoIn = {0};
1033   ADDR_TILEINFO AddrTileInfoOut = {0};
1034   int r;
1035
1036   AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT);
1037   AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT);
1038   AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT);
1039   AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT);
1040   AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT);
1041   AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT);
1042   AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut;
1043
1044   compressed = surf->blk_w == 4 && surf->blk_h == 4;
1045
1046   /* MSAA requires 2D tiling. */
1047   if (config->info.samples > 1)
1048      mode = RADEON_SURF_MODE_2D;
1049
1050   /* DB doesn't support linear layouts. */
1051   if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D)
1052      mode = RADEON_SURF_MODE_1D;
1053
1054   /* Set the requested tiling mode. */
1055   switch (mode) {
1056   case RADEON_SURF_MODE_LINEAR_ALIGNED:
1057      AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED;
1058      break;
1059   case RADEON_SURF_MODE_1D:
1060      if (surf->flags & RADEON_SURF_PRT)
1061         AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1;
1062      else
1063         AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1;
1064      break;
1065   case RADEON_SURF_MODE_2D:
1066      if (surf->flags & RADEON_SURF_PRT)
1067         AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1;
1068      else
1069         AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1;
1070      break;
1071   default:
1072      assert(0);
1073   }
1074
1075   /* The format must be set correctly for the allocation of compressed
1076    * textures to work. In other cases, setting the bpp is sufficient.
1077    */
1078   if (compressed) {
1079      switch (surf->bpe) {
1080      case 8:
1081         AddrSurfInfoIn.format = ADDR_FMT_BC1;
1082         break;
1083      case 16:
1084         AddrSurfInfoIn.format = ADDR_FMT_BC3;
1085         break;
1086      default:
1087         assert(0);
1088      }
1089   } else {
1090      AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8;
1091   }
1092
1093   AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
1094   AddrSurfInfoIn.tileIndex = -1;
1095
1096   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) {
1097      AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
1098   }
1099
1100   /* Set the micro tile type. */
1101   if (surf->flags & RADEON_SURF_SCANOUT)
1102      AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE;
1103   else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
1104      AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER;
1105   else
1106      AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE;
1107
1108   AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1109   AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
1110   AddrSurfInfoIn.flags.cube = config->is_cube;
1111   AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
1112   AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1;
1113   AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0;
1114   AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
1115
1116   /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been
1117    * requested, because TC-compatible HTILE requires 2D tiling.
1118    */
1119   AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible &&
1120                                    !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 &&
1121                                    !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE);
1122
1123   /* DCC notes:
1124    * - If we add MSAA support, keep in mind that CB can't decompress 8bpp
1125    *   with samples >= 4.
1126    * - Mipmapped array textures have low performance (discovered by a closed
1127    *   driver team).
1128    */
1129   AddrSurfInfoIn.flags.dccCompatible =
1130      info->gfx_level >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */
1131      !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) &&
1132      !compressed &&
1133      ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1);
1134
1135   AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0;
1136   AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
1137
1138   /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit)
1139    * for Z and stencil. This can cause a number of problems which we work
1140    * around here:
1141    *
1142    * - a depth part that is incompatible with mipmapped texturing
1143    * - at least on Stoney, entirely incompatible Z/S aspects (e.g.
1144    *   incorrect tiling applied to the stencil part, stencil buffer
1145    *   memory accesses that go out of bounds) even without mipmapping
1146    *
1147    * Some piglit tests that are prone to different types of related
1148    * failures:
1149    *  ./bin/ext_framebuffer_multisample-upsample 2 stencil
1150    *  ./bin/framebuffer-blit-levels {draw,read} stencil
1151    *  ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample}
1152    *  ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw}
1153    *  ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8
1154    */
1155   int stencil_tile_idx = -1;
1156
1157   if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil &&
1158       (config->info.levels > 1 || info->family == CHIP_STONEY)) {
1159      /* Compute stencilTileIdx that is compatible with the (depth)
1160       * tileIdx. This degrades the depth surface if necessary to
1161       * ensure that a matching stencilTileIdx exists. */
1162      AddrSurfInfoIn.flags.matchStencilTileCfg = 1;
1163
1164      /* Keep the depth mip-tail compatible with texturing. */
1165      AddrSurfInfoIn.flags.noStencil = 1;
1166   }
1167
1168   /* Set preferred macrotile parameters. This is usually required
1169    * for shared resources. This is for 2D tiling only. */
1170   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
1171       AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw &&
1172       surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) {
1173      /* If any of these parameters are incorrect, the calculation
1174       * will fail. */
1175      AddrTileInfoIn.banks = surf->u.legacy.num_banks;
1176      AddrTileInfoIn.bankWidth = surf->u.legacy.bankw;
1177      AddrTileInfoIn.bankHeight = surf->u.legacy.bankh;
1178      AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea;
1179      AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split;
1180      AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */
1181      AddrSurfInfoIn.flags.opt4Space = 0;
1182      AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn;
1183
1184      /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set
1185       * the tile index, because we are expected to know it if
1186       * we know the other parameters.
1187       *
1188       * This is something that can easily be fixed in Addrlib.
1189       * For now, just figure it out here.
1190       * Note that only 2D_TILE_THIN1 is handled here.
1191       */
1192      assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
1193      assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1);
1194
1195      if (info->gfx_level == GFX6) {
1196         if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) {
1197            if (surf->bpe == 2)
1198               AddrSurfInfoIn.tileIndex = 11; /* 16bpp */
1199            else
1200               AddrSurfInfoIn.tileIndex = 12; /* 32bpp */
1201         } else {
1202            if (surf->bpe == 1)
1203               AddrSurfInfoIn.tileIndex = 14; /* 8bpp */
1204            else if (surf->bpe == 2)
1205               AddrSurfInfoIn.tileIndex = 15; /* 16bpp */
1206            else if (surf->bpe == 4)
1207               AddrSurfInfoIn.tileIndex = 16; /* 32bpp */
1208            else
1209               AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */
1210         }
1211      } else {
1212         /* GFX7 - GFX8 */
1213         if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE)
1214            AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */
1215         else
1216            AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */
1217
1218         /* Addrlib doesn't set this if tileIndex is forced like above. */
1219         AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf);
1220      }
1221   }
1222
1223   surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
1224   surf->num_meta_levels = 0;
1225   surf->surf_size = 0;
1226   surf->meta_size = 0;
1227   surf->meta_slice_size = 0;
1228   surf->meta_alignment_log2 = 0;
1229
1230   const bool only_stencil =
1231      (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
1232
1233   /* Calculate texture layout information. */
1234   if (!only_stencil) {
1235      for (level = 0; level < config->info.levels; level++) {
1236         r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn,
1237                                &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn,
1238                                &AddrHtileOut);
1239         if (r)
1240            return r;
1241
1242         if (level > 0)
1243            continue;
1244
1245         if (!AddrSurfInfoOut.tcCompatible) {
1246            AddrSurfInfoIn.flags.tcCompatible = 0;
1247            surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1248         }
1249
1250         if (AddrSurfInfoIn.flags.matchStencilTileCfg) {
1251            AddrSurfInfoIn.flags.matchStencilTileCfg = 0;
1252            AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex;
1253            stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx;
1254
1255            assert(stencil_tile_idx >= 0);
1256         }
1257
1258         r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
1259         if (r)
1260            return r;
1261      }
1262   }
1263
1264   /* Calculate texture layout information for stencil. */
1265   if (surf->flags & RADEON_SURF_SBUFFER) {
1266      AddrSurfInfoIn.tileIndex = stencil_tile_idx;
1267      AddrSurfInfoIn.bpp = 8;
1268      AddrSurfInfoIn.flags.depth = 0;
1269      AddrSurfInfoIn.flags.stencil = 1;
1270      AddrSurfInfoIn.flags.tcCompatible = 0;
1271      /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */
1272      AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split;
1273
1274      for (level = 0; level < config->info.levels; level++) {
1275         r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn,
1276                                &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL);
1277         if (r)
1278            return r;
1279
1280         /* DB uses the depth pitch for both stencil and depth. */
1281         if (!only_stencil) {
1282            if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x)
1283               surf->u.legacy.stencil_adjusted = true;
1284         } else {
1285            surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x;
1286         }
1287
1288         if (level == 0) {
1289            if (only_stencil) {
1290               r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf);
1291               if (r)
1292                  return r;
1293            }
1294
1295            /* For 2D modes only. */
1296            if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) {
1297               surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes;
1298            }
1299         }
1300      }
1301   }
1302
1303   /* Compute FMASK. */
1304   if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics &&
1305       !(surf->flags & RADEON_SURF_NO_FMASK)) {
1306      ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0};
1307      ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
1308      ADDR_TILEINFO fmask_tile_info = {0};
1309
1310      fin.size = sizeof(fin);
1311      fout.size = sizeof(fout);
1312
1313      fin.tileMode = AddrSurfInfoOut.tileMode;
1314      fin.pitch = AddrSurfInfoOut.pitch;
1315      fin.height = config->info.height;
1316      fin.numSlices = AddrSurfInfoIn.numSlices;
1317      fin.numSamples = AddrSurfInfoIn.numSamples;
1318      fin.numFrags = AddrSurfInfoIn.numFrags;
1319      fin.tileIndex = -1;
1320      fout.pTileInfo = &fmask_tile_info;
1321
1322      r = AddrComputeFmaskInfo(addrlib, &fin, &fout);
1323      if (r)
1324         return r;
1325
1326      surf->fmask_size = fout.fmaskBytes;
1327      surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
1328      surf->fmask_slice_size = fout.sliceSize;
1329      surf->fmask_tile_swizzle = 0;
1330
1331      surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64;
1332      if (surf->u.legacy.color.fmask.slice_tile_max)
1333         surf->u.legacy.color.fmask.slice_tile_max -= 1;
1334
1335      surf->u.legacy.color.fmask.tiling_index = fout.tileIndex;
1336      surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight;
1337      surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch;
1338
1339      /* Compute tile swizzle for FMASK. */
1340      if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) {
1341         ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0};
1342         ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0};
1343
1344         xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT);
1345         xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT);
1346
1347         /* This counter starts from 1 instead of 0. */
1348         xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
1349         xin.tileIndex = fout.tileIndex;
1350         xin.macroModeIndex = fout.macroModeIndex;
1351         xin.pTileInfo = fout.pTileInfo;
1352         xin.tileMode = fin.tileMode;
1353
1354         int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout);
1355         if (r != ADDR_OK)
1356            return r;
1357
1358         assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1359         surf->fmask_tile_swizzle = xout.tileSwizzle;
1360      }
1361   }
1362
1363   /* Recalculate the whole DCC miptree size including disabled levels.
1364    * This is what addrlib does, but calling addrlib would be a lot more
1365    * complicated.
1366    */
1367   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) {
1368      /* The smallest miplevels that are never compressed by DCC
1369       * still read the DCC buffer via TC if the base level uses DCC,
1370       * and for some reason the DCC buffer needs to be larger if
1371       * the miptree uses non-zero tile_swizzle. Otherwise there are
1372       * VM faults.
1373       *
1374       * "dcc_alignment * 4" was determined by trial and error.
1375       */
1376      surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4);
1377   }
1378
1379   /* Make sure HTILE covers the whole miptree, because the shader reads
1380    * TC-compatible HTILE even for levels where it's disabled by DB.
1381    */
1382   if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) &&
1383       surf->meta_size && config->info.levels > 1) {
1384      /* MSAA can't occur with levels > 1, so ignore the sample count. */
1385      const unsigned total_pixels = surf->surf_size / surf->bpe;
1386      const unsigned htile_block_size = 8 * 8;
1387      const unsigned htile_element_size = 4;
1388
1389      surf->meta_size = (total_pixels / htile_block_size) * htile_element_size;
1390      surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2);
1391   } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) {
1392      /* Unset this if HTILE is not present. */
1393      surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
1394   }
1395
1396   surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED;
1397   surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY ||
1398                          surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER;
1399
1400   /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
1401    * used at the same time. This case is not currently expected to occur
1402    * because we don't use rotated. Enforce this restriction on all chips
1403    * to facilitate testing.
1404    */
1405   if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) {
1406      assert(!"rotate micro tile mode is unsupported");
1407      return ADDR_ERROR;
1408   }
1409
1410   ac_compute_cmask(info, config, surf);
1411   return 0;
1412}
1413
1414/* This is only called when expecting a tiled layout. */
1415static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info,
1416                                           struct radeon_surf *surf,
1417                                           ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask,
1418                                           AddrSwizzleMode *swizzle_mode)
1419{
1420   ADDR_E_RETURNCODE ret;
1421   ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0};
1422   ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0};
1423
1424   sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT);
1425   sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT);
1426
1427   sin.flags = in->flags;
1428   sin.resourceType = in->resourceType;
1429   sin.format = in->format;
1430   sin.resourceLoction = ADDR_RSRC_LOC_INVIS;
1431
1432   /* TODO: We could allow some of these: */
1433   sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */
1434
1435   if (info->gfx_level >= GFX11) {
1436      /* Disable 256K on APUs because it doesn't work with DAL. */
1437      if (!info->has_dedicated_vram) {
1438         sin.forbiddenBlock.gfx11.thin256KB = 1;
1439         sin.forbiddenBlock.gfx11.thick256KB = 1;
1440      }
1441   } else {
1442      sin.forbiddenBlock.var = 1;   /* don't allow the variable-sized swizzle modes */
1443   }
1444
1445   sin.bpp = in->bpp;
1446   sin.width = in->width;
1447   sin.height = in->height;
1448   sin.numSlices = in->numSlices;
1449   sin.numMipLevels = in->numMipLevels;
1450   sin.numSamples = in->numSamples;
1451   sin.numFrags = in->numFrags;
1452
1453   if (is_fmask) {
1454      sin.flags.display = 0;
1455      sin.flags.color = 0;
1456      sin.flags.fmask = 1;
1457   }
1458
1459   /* With PRT images we want to force 64 KiB block size so that the image
1460    * created is consistent with the format properties returned in Vulkan
1461    * independent of the image. */
1462   if (sin.flags.prt) {
1463      sin.forbiddenBlock.macroThin4KB = 1;
1464      sin.forbiddenBlock.macroThick4KB = 1;
1465      if (info->gfx_level >= GFX11) {
1466         sin.forbiddenBlock.gfx11.thin256KB = 1;
1467         sin.forbiddenBlock.gfx11.thick256KB = 1;
1468      }
1469      sin.forbiddenBlock.linear = 1;
1470   }
1471
1472   if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) {
1473      sin.forbiddenBlock.linear = 1;
1474
1475      if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY)
1476         sin.preferredSwSet.sw_D = 1;
1477      else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD)
1478         sin.preferredSwSet.sw_S = 1;
1479      else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH)
1480         sin.preferredSwSet.sw_Z = 1;
1481      else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER)
1482         sin.preferredSwSet.sw_R = 1;
1483   }
1484
1485   if (info->gfx_level >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) {
1486      /* 3D textures should use S swizzle modes for the best performance.
1487       * THe only exception is 3D render targets, which prefer 64KB_D_X.
1488       *
1489       * 3D texture sampler performance with a very large 3D texture:
1490       *   ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off)
1491       *   ADDR_SW_64KB_Z_X = 25 FPS
1492       *   ADDR_SW_64KB_D_X = 53 FPS
1493       *   ADDR_SW_4KB_S    = 53 FPS
1494       *   ADDR_SW_64KB_S   = 53 FPS
1495       *   ADDR_SW_64KB_S_T = 61 FPS
1496       *   ADDR_SW_4KB_S_X  = 63 FPS
1497       *   ADDR_SW_64KB_S_X = 62 FPS
1498       */
1499      sin.preferredSwSet.sw_S = 1;
1500   }
1501
1502   ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout);
1503   if (ret != ADDR_OK)
1504      return ret;
1505
1506   *swizzle_mode = sout.swizzleMode;
1507   return 0;
1508}
1509
1510static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode)
1511{
1512   if (info->gfx_level >= GFX11)
1513      return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X ||
1514             sw_mode == ADDR_SW_256KB_Z_X || sw_mode == ADDR_SW_256KB_R_X;
1515
1516   if (info->gfx_level >= GFX10)
1517      return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X;
1518
1519   return sw_mode != ADDR_SW_LINEAR;
1520}
1521
1522ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info,
1523                                            const struct radeon_surf *surf)
1524{
1525   if (info->gfx_level <= GFX9) {
1526      /* Only independent 64B blocks are supported. */
1527      return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks &&
1528             surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
1529   }
1530
1531   if (info->family == CHIP_NAVI10) {
1532      /* Only independent 128B blocks are supported. */
1533      return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks &&
1534             surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B;
1535   }
1536
1537   bool valid_64b = surf->u.gfx9.color.dcc.independent_64B_blocks &&
1538                    surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B;
1539   bool valid_128b = surf->u.gfx9.color.dcc.independent_128B_blocks &&
1540                     surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B;
1541
1542   if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
1543      /* Either 64B or 128B can be used, but not both.
1544       * If 64B is used, DCC image stores are unsupported.
1545       */
1546      return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks &&
1547             (valid_64b || valid_128b);
1548   }
1549
1550   /* Valid settings are the same as NAVI14 + (64B && 128B && max_compressed_block_size == 64B) */
1551   return (surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks &&
1552           (valid_64b || valid_128b)) ||
1553          (surf->u.gfx9.color.dcc.independent_64B_blocks &&
1554           surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
1555}
1556
1557static bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info,
1558                                                      const struct ac_surf_config *config)
1559{
1560   assert(info->gfx_level >= GFX10);
1561
1562   /* Older kernels have buggy DAL. */
1563   if (info->drm_minor <= 43)
1564      return true;
1565
1566   /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
1567   return config->info.width > 2560 || config->info.height > 2560;
1568}
1569
1570void ac_modifier_max_extent(const struct radeon_info *info,
1571                            uint64_t modifier, uint32_t *width, uint32_t *height)
1572{
1573   /* DCC is supported with any size. The maximum width per display pipe is 5760, but multiple
1574    * display pipes can be used to drive the display.
1575    */
1576   *width = 16384;
1577   *height = 16384;
1578
1579   if (ac_modifier_has_dcc(modifier)) {
1580      bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier);
1581
1582      if (info->gfx_level >= GFX10 && !independent_64B_blocks) {
1583         /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */
1584         *width = 2560;
1585         *height = 2560;
1586      }
1587   }
1588}
1589
1590static bool is_dcc_supported_by_DCN(const struct radeon_info *info,
1591                                    const struct ac_surf_config *config,
1592                                    const struct radeon_surf *surf, bool rb_aligned,
1593                                    bool pipe_aligned)
1594{
1595   if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit)
1596      return false;
1597
1598   /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */
1599   if (surf->bpe != 4)
1600      return false;
1601
1602   /* Handle unaligned DCC. */
1603   if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned))
1604      return false;
1605
1606   switch (info->gfx_level) {
1607   case GFX6:
1608   case GFX7:
1609   case GFX8:
1610      /* We can get here due to SI_FORCE_FAMILY. */
1611      return false;
1612   case GFX9:
1613      /* There are more constraints, but we always set
1614       * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B,
1615       * which always works.
1616       */
1617      assert(surf->u.gfx9.color.dcc.independent_64B_blocks &&
1618             surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B);
1619      return true;
1620   case GFX10:
1621   case GFX10_3:
1622   case GFX11:
1623      /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */
1624      if (info->gfx_level == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks)
1625         return false;
1626
1627      return (!gfx10_DCN_requires_independent_64B_blocks(info, config) ||
1628              (surf->u.gfx9.color.dcc.independent_64B_blocks &&
1629               surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B));
1630   default:
1631      unreachable("unhandled chip");
1632      return false;
1633   }
1634}
1635
1636static void ac_copy_dcc_equation(const struct radeon_info *info,
1637                                 ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc,
1638                                 struct gfx9_meta_equation *equation)
1639{
1640   equation->meta_block_width = dcc->metaBlkWidth;
1641   equation->meta_block_height = dcc->metaBlkHeight;
1642   equation->meta_block_depth = dcc->metaBlkDepth;
1643
1644   if (info->gfx_level >= GFX10) {
1645      /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */
1646      for (unsigned i = 0; i < 4; i++)
1647         assert(dcc->equation.gfx10_bits[i] == 0);
1648
1649      for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++)
1650         assert(dcc->equation.gfx10_bits[i] == 0);
1651
1652      memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4,
1653             sizeof(equation->u.gfx10_bits));
1654   } else {
1655      assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
1656
1657      equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits;
1658      equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits;
1659      for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
1660         for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
1661            equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim;
1662            equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord;
1663         }
1664      }
1665   }
1666}
1667
1668static void ac_copy_cmask_equation(const struct radeon_info *info,
1669                                   ADDR2_COMPUTE_CMASK_INFO_OUTPUT *cmask,
1670                                   struct gfx9_meta_equation *equation)
1671{
1672   equation->meta_block_width = cmask->metaBlkWidth;
1673   equation->meta_block_height = cmask->metaBlkHeight;
1674   equation->meta_block_depth = 1;
1675
1676   if (info->gfx_level == GFX9) {
1677      assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit));
1678
1679      equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits;
1680      equation->u.gfx9.num_pipe_bits = cmask->equation.gfx9.numPipeBits;
1681      for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) {
1682         for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) {
1683            equation->u.gfx9.bit[b].coord[c].dim = cmask->equation.gfx9.bit[b].coord[c].dim;
1684            equation->u.gfx9.bit[b].coord[c].ord = cmask->equation.gfx9.bit[b].coord[c].ord;
1685         }
1686      }
1687   }
1688}
1689
1690static void ac_copy_htile_equation(const struct radeon_info *info,
1691                                   ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile,
1692                                   struct gfx9_meta_equation *equation)
1693{
1694   equation->meta_block_width = htile->metaBlkWidth;
1695   equation->meta_block_height = htile->metaBlkHeight;
1696
1697   /* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */
1698   for (unsigned i = 0; i < 8; i++)
1699      assert(htile->equation.gfx10_bits[i] == 0);
1700
1701   for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++)
1702      assert(htile->equation.gfx10_bits[i] == 0);
1703
1704   memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8,
1705          sizeof(equation->u.gfx10_bits));
1706}
1707
1708static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info,
1709                                const struct ac_surf_config *config, struct radeon_surf *surf,
1710                                bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in)
1711{
1712   ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1713   ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0};
1714   ADDR_E_RETURNCODE ret;
1715
1716   out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT);
1717   out.pMipInfo = mip_info;
1718
1719   ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out);
1720   if (ret != ADDR_OK)
1721      return ret;
1722
1723   if (in->flags.prt) {
1724      surf->prt_tile_width = out.blockWidth;
1725      surf->prt_tile_height = out.blockHeight;
1726      surf->prt_tile_depth = out.blockSlices;
1727
1728      surf->first_mip_tail_level = out.firstMipIdInTail;
1729
1730      for (unsigned i = 0; i < in->numMipLevels; i++) {
1731         surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset;
1732
1733         if (info->gfx_level >= GFX10)
1734            surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch;
1735         else
1736            surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch;
1737      }
1738   }
1739
1740   if (in->flags.stencil) {
1741      surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode;
1742      surf->u.gfx9.zs.stencil_epitch =
1743         out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
1744      surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign));
1745      surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign);
1746      surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize;
1747      return 0;
1748   }
1749
1750   surf->u.gfx9.swizzle_mode = in->swizzleMode;
1751   surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1;
1752
1753   /* CMASK fast clear uses these even if FMASK isn't allocated.
1754    * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4.
1755    */
1756   if (!in->flags.depth) {
1757      surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3;
1758      surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch;
1759   }
1760
1761   surf->u.gfx9.surf_slice_size = out.sliceSize;
1762   surf->u.gfx9.surf_pitch = out.pitch;
1763   surf->u.gfx9.surf_height = out.height;
1764   surf->surf_size = out.surfSize;
1765   surf->surf_alignment_log2 = util_logbase2(out.baseAlign);
1766
1767   if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch &&
1768       surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) {
1769      /* Adjust surf_pitch to be in elements units not in pixels */
1770      surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe);
1771      surf->u.gfx9.epitch =
1772         MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1);
1773      /* The surface is really a surf->bpe bytes per pixel surface even if we
1774       * use it as a surf->bpe bytes per element one.
1775       * Adjust surf_slice_size and surf_size to reflect the change
1776       * made to surf_pitch.
1777       */
1778      surf->u.gfx9.surf_slice_size =
1779         MAX2(surf->u.gfx9.surf_slice_size,
1780              surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w);
1781      surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices;
1782   }
1783
1784   if (in->swizzleMode == ADDR_SW_LINEAR) {
1785      int alignment = 256 / surf->bpe;
1786      for (unsigned i = 0; i < in->numMipLevels; i++) {
1787         surf->u.gfx9.offset[i] = mip_info[i].offset;
1788         /* Adjust pitch like we did for surf_pitch */
1789         surf->u.gfx9.pitch[i] = align(mip_info[i].pitch / surf->blk_w, alignment);
1790      }
1791   }
1792
1793   surf->u.gfx9.base_mip_width = mip_info[0].pitch;
1794   surf->u.gfx9.base_mip_height = mip_info[0].height;
1795
1796   if (in->flags.depth) {
1797      assert(in->swizzleMode != ADDR_SW_LINEAR);
1798
1799      if (surf->flags & RADEON_SURF_NO_HTILE)
1800         return 0;
1801
1802      /* HTILE */
1803      ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0};
1804      ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0};
1805      ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1806
1807      hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT);
1808      hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT);
1809      hout.pMipInfo = meta_mip_info;
1810
1811      assert(in->flags.metaPipeUnaligned == 0);
1812      assert(in->flags.metaRbUnaligned == 0);
1813
1814      hin.hTileFlags.pipeAligned = 1;
1815      hin.hTileFlags.rbAligned = 1;
1816      hin.depthFlags = in->flags;
1817      hin.swizzleMode = in->swizzleMode;
1818      hin.unalignedWidth = in->width;
1819      hin.unalignedHeight = in->height;
1820      hin.numSlices = in->numSlices;
1821      hin.numMipLevels = in->numMipLevels;
1822      hin.firstMipIdInTail = out.firstMipIdInTail;
1823
1824      ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout);
1825      if (ret != ADDR_OK)
1826         return ret;
1827
1828      surf->meta_size = hout.htileBytes;
1829      surf->meta_slice_size = hout.sliceSize;
1830      surf->meta_alignment_log2 = util_logbase2(hout.baseAlign);
1831      surf->meta_pitch = hout.pitch;
1832      surf->num_meta_levels = in->numMipLevels;
1833
1834      for (unsigned i = 0; i < in->numMipLevels; i++) {
1835         surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
1836         surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
1837
1838         if (meta_mip_info[i].inMiptail) {
1839            /* GFX10 can only compress the first level
1840             * in the mip tail.
1841             */
1842            surf->num_meta_levels = i + 1;
1843            break;
1844         }
1845      }
1846
1847      if (!surf->num_meta_levels)
1848         surf->meta_size = 0;
1849
1850      if (info->gfx_level >= GFX10)
1851         ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation);
1852      return 0;
1853   }
1854
1855   {
1856      /* Compute tile swizzle for the color surface.
1857       * All *_X and *_T modes can use the swizzle.
1858       */
1859      if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail &&
1860          !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) {
1861         ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
1862         ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
1863
1864         xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
1865         xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
1866
1867         xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1;
1868         xin.flags = in->flags;
1869         xin.swizzleMode = in->swizzleMode;
1870         xin.resourceType = in->resourceType;
1871         xin.format = in->format;
1872         xin.numSamples = in->numSamples;
1873         xin.numFrags = in->numFrags;
1874
1875         ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
1876         if (ret != ADDR_OK)
1877            return ret;
1878
1879         assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8));
1880         surf->tile_swizzle = xout.pipeBankXor;
1881      }
1882
1883      bool use_dcc = false;
1884      if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
1885         use_dcc = ac_modifier_has_dcc(surf->modifier);
1886      } else {
1887         use_dcc = info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed &&
1888                   is_dcc_supported_by_CB(info, in->swizzleMode) &&
1889                   (!in->flags.display ||
1890                    is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned,
1891                                            !in->flags.metaPipeUnaligned));
1892      }
1893
1894      /* DCC */
1895      if (use_dcc) {
1896         ADDR2_COMPUTE_DCCINFO_INPUT din = {0};
1897         ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0};
1898         ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
1899
1900         din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT);
1901         dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT);
1902         dout.pMipInfo = meta_mip_info;
1903
1904         din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned;
1905         din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned;
1906         din.resourceType = in->resourceType;
1907         din.swizzleMode = in->swizzleMode;
1908         din.bpp = in->bpp;
1909         din.unalignedWidth = in->width;
1910         din.unalignedHeight = in->height;
1911         din.numSlices = in->numSlices;
1912         din.numFrags = in->numFrags;
1913         din.numMipLevels = in->numMipLevels;
1914         din.dataSurfaceSize = out.surfSize;
1915         din.firstMipIdInTail = out.firstMipIdInTail;
1916
1917         if (info->gfx_level == GFX9)
1918            simple_mtx_lock(&addrlib->lock);
1919         ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
1920         if (info->gfx_level == GFX9)
1921            simple_mtx_unlock(&addrlib->lock);
1922
1923         if (ret != ADDR_OK)
1924            return ret;
1925
1926         surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned;
1927         surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned;
1928         surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth;
1929         surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight;
1930         surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth;
1931         surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1;
1932         surf->u.gfx9.color.dcc_height = dout.height;
1933         surf->meta_size = dout.dccRamSize;
1934         surf->meta_slice_size = dout.dccRamSliceSize;
1935         surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
1936         surf->num_meta_levels = in->numMipLevels;
1937
1938         /* Disable DCC for levels that are in the mip tail.
1939          *
1940          * There are two issues that this is intended to
1941          * address:
1942          *
1943          * 1. Multiple mip levels may share a cache line. This
1944          *    can lead to corruption when switching between
1945          *    rendering to different mip levels because the
1946          *    RBs don't maintain coherency.
1947          *
1948          * 2. Texturing with metadata after rendering sometimes
1949          *    fails with corruption, probably for a similar
1950          *    reason.
1951          *
1952          * Working around these issues for all levels in the
1953          * mip tail may be overly conservative, but it's what
1954          * Vulkan does.
1955          *
1956          * Alternative solutions that also work but are worse:
1957          * - Disable DCC entirely.
1958          * - Flush TC L2 after rendering.
1959          */
1960         for (unsigned i = 0; i < in->numMipLevels; i++) {
1961            surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset;
1962            surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize;
1963
1964            if (meta_mip_info[i].inMiptail) {
1965               /* GFX10 can only compress the first level
1966                * in the mip tail.
1967                *
1968                * TODO: Try to do the same thing for gfx9
1969                *       if there are no regressions.
1970                */
1971               if (info->gfx_level >= GFX10)
1972                  surf->num_meta_levels = i + 1;
1973               else
1974                  surf->num_meta_levels = i;
1975               break;
1976            }
1977         }
1978
1979         if (!surf->num_meta_levels)
1980            surf->meta_size = 0;
1981
1982         surf->u.gfx9.color.display_dcc_size = surf->meta_size;
1983         surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2;
1984         surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max;
1985         surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height;
1986
1987         if (in->resourceType == ADDR_RSRC_TEX_2D)
1988            ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation);
1989
1990         /* Compute displayable DCC. */
1991         if (((in->flags.display && info->use_display_dcc_with_retile_blit) ||
1992              ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) {
1993            /* Compute displayable DCC info. */
1994            din.dccKeyFlags.pipeAligned = 0;
1995            din.dccKeyFlags.rbAligned = 0;
1996
1997            assert(din.numSlices == 1);
1998            assert(din.numMipLevels == 1);
1999            assert(din.numFrags == 1);
2000            assert(surf->tile_swizzle == 0);
2001            assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned);
2002
2003            if (info->gfx_level == GFX9)
2004               simple_mtx_lock(&addrlib->lock);
2005            ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout);
2006            if (info->gfx_level == GFX9)
2007               simple_mtx_unlock(&addrlib->lock);
2008
2009            if (ret != ADDR_OK)
2010               return ret;
2011
2012            surf->u.gfx9.color.display_dcc_size = dout.dccRamSize;
2013            surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign);
2014            surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1;
2015            surf->u.gfx9.color.display_dcc_height = dout.height;
2016            assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size);
2017
2018            ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation);
2019            surf->u.gfx9.color.dcc.display_equation_valid = true;
2020         }
2021      }
2022
2023      /* FMASK (it doesn't exist on GFX11) */
2024      if (info->gfx_level <= GFX10_3 && info->has_graphics &&
2025          in->numSamples > 1 && !(surf->flags & RADEON_SURF_NO_FMASK)) {
2026         ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0};
2027         ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0};
2028
2029         fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT);
2030         fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT);
2031
2032         ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode);
2033         if (ret != ADDR_OK)
2034            return ret;
2035
2036         fin.unalignedWidth = in->width;
2037         fin.unalignedHeight = in->height;
2038         fin.numSlices = in->numSlices;
2039         fin.numSamples = in->numSamples;
2040         fin.numFrags = in->numFrags;
2041
2042         ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout);
2043         if (ret != ADDR_OK)
2044            return ret;
2045
2046         surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode;
2047         surf->u.gfx9.color.fmask_epitch = fout.pitch - 1;
2048         surf->fmask_size = fout.fmaskBytes;
2049         surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign);
2050         surf->fmask_slice_size = fout.sliceSize;
2051
2052         /* Compute tile swizzle for the FMASK surface. */
2053         if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T &&
2054             !(surf->flags & RADEON_SURF_SHAREABLE)) {
2055            ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0};
2056            ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0};
2057
2058            xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT);
2059            xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT);
2060
2061            /* This counter starts from 1 instead of 0. */
2062            xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index);
2063            xin.flags = in->flags;
2064            xin.swizzleMode = fin.swizzleMode;
2065            xin.resourceType = in->resourceType;
2066            xin.format = in->format;
2067            xin.numSamples = in->numSamples;
2068            xin.numFrags = in->numFrags;
2069
2070            ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout);
2071            if (ret != ADDR_OK)
2072               return ret;
2073
2074            assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8));
2075            surf->fmask_tile_swizzle = xout.pipeBankXor;
2076         }
2077      }
2078
2079      /* CMASK -- on GFX10 only for FMASK (and it doesn't exist on GFX11) */
2080      if (info->gfx_level <= GFX10_3 && info->has_graphics &&
2081          in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D &&
2082          ((info->gfx_level <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 &&
2083            in->flags.metaRbUnaligned == 0) ||
2084           (surf->fmask_size && in->numSamples >= 2))) {
2085         ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0};
2086         ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0};
2087         ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0};
2088
2089         cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT);
2090         cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT);
2091         cout.pMipInfo = meta_mip_info;
2092
2093         assert(in->flags.metaPipeUnaligned == 0);
2094         assert(in->flags.metaRbUnaligned == 0);
2095
2096         cin.cMaskFlags.pipeAligned = 1;
2097         cin.cMaskFlags.rbAligned = 1;
2098         cin.resourceType = in->resourceType;
2099         cin.unalignedWidth = in->width;
2100         cin.unalignedHeight = in->height;
2101         cin.numSlices = in->numSlices;
2102         cin.numMipLevels = in->numMipLevels;
2103         cin.firstMipIdInTail = out.firstMipIdInTail;
2104
2105         if (in->numSamples > 1)
2106            cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode;
2107         else
2108            cin.swizzleMode = in->swizzleMode;
2109
2110         if (info->gfx_level == GFX9)
2111            simple_mtx_lock(&addrlib->lock);
2112         ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout);
2113         if (info->gfx_level == GFX9)
2114            simple_mtx_unlock(&addrlib->lock);
2115
2116         if (ret != ADDR_OK)
2117            return ret;
2118
2119         surf->cmask_size = cout.cmaskBytes;
2120         surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign);
2121         surf->cmask_slice_size = cout.sliceSize;
2122         surf->cmask_pitch = cout.pitch;
2123         surf->cmask_height = cout.height;
2124         surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset;
2125         surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize;
2126
2127         ac_copy_cmask_equation(info, &cout, &surf->u.gfx9.color.cmask_equation);
2128      }
2129   }
2130
2131   return 0;
2132}
2133
2134static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
2135                                const struct ac_surf_config *config, enum radeon_surf_mode mode,
2136                                struct radeon_surf *surf)
2137{
2138   bool compressed;
2139   ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0};
2140   int r;
2141
2142   AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT);
2143
2144   compressed = surf->blk_w == 4 && surf->blk_h == 4;
2145
2146   /* The format must be set correctly for the allocation of compressed
2147    * textures to work. In other cases, setting the bpp is sufficient. */
2148   if (compressed) {
2149      switch (surf->bpe) {
2150      case 8:
2151         AddrSurfInfoIn.format = ADDR_FMT_BC1;
2152         break;
2153      case 16:
2154         AddrSurfInfoIn.format = ADDR_FMT_BC3;
2155         break;
2156      default:
2157         assert(0);
2158      }
2159   } else {
2160      switch (surf->bpe) {
2161      case 1:
2162         assert(!(surf->flags & RADEON_SURF_ZBUFFER));
2163         AddrSurfInfoIn.format = ADDR_FMT_8;
2164         break;
2165      case 2:
2166         assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
2167         AddrSurfInfoIn.format = ADDR_FMT_16;
2168         break;
2169      case 4:
2170         assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER));
2171         AddrSurfInfoIn.format = ADDR_FMT_32;
2172         break;
2173      case 8:
2174         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2175         AddrSurfInfoIn.format = ADDR_FMT_32_32;
2176         break;
2177      case 12:
2178         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2179         AddrSurfInfoIn.format = ADDR_FMT_32_32_32;
2180         break;
2181      case 16:
2182         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2183         AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32;
2184         break;
2185      default:
2186         assert(0);
2187      }
2188      AddrSurfInfoIn.bpp = surf->bpe * 8;
2189   }
2190
2191   bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER);
2192   AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET);
2193   AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0;
2194   AddrSurfInfoIn.flags.display = get_display_flag(config, surf);
2195   /* flags.texture currently refers to TC-compatible HTILE */
2196   AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE;
2197   AddrSurfInfoIn.flags.opt4space = 1;
2198   AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0;
2199
2200   AddrSurfInfoIn.numMipLevels = config->info.levels;
2201   AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples);
2202   AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples;
2203
2204   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER))
2205      AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples);
2206
2207   /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures
2208    * as 2D to avoid having shader variants for 1D vs 2D, so all shaders
2209    * must sample 1D textures as 2D. */
2210   if (config->is_3d)
2211      AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D;
2212   else if (info->gfx_level != GFX9 && config->is_1d)
2213      AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D;
2214   else
2215      AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D;
2216
2217   AddrSurfInfoIn.width = config->info.width;
2218   AddrSurfInfoIn.height = config->info.height;
2219
2220   if (config->is_3d)
2221      AddrSurfInfoIn.numSlices = config->info.depth;
2222   else if (config->is_cube)
2223      AddrSurfInfoIn.numSlices = 6;
2224   else
2225      AddrSurfInfoIn.numSlices = config->info.array_size;
2226
2227   /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */
2228   AddrSurfInfoIn.flags.metaPipeUnaligned = 0;
2229   AddrSurfInfoIn.flags.metaRbUnaligned = 0;
2230
2231   if (ac_modifier_has_dcc(surf->modifier)) {
2232      ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn);
2233   } else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) {
2234      /* Optimal values for the L2 cache. */
2235      /* Don't change the DCC settings for imported buffers - they might differ. */
2236      if (!(surf->flags & RADEON_SURF_IMPORTED)) {
2237         if (info->gfx_level == GFX9) {
2238            surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2239            surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
2240            surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2241         } else if (info->gfx_level >= GFX10) {
2242            surf->u.gfx9.color.dcc.independent_64B_blocks = 0;
2243            surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2244            surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B;
2245         }
2246      }
2247
2248      if (AddrSurfInfoIn.flags.display) {
2249         /* The display hardware can only read DCC with RB_ALIGNED=0 and
2250          * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED.
2251          *
2252          * The CB block requires RB_ALIGNED=1 except 1 RB chips.
2253          * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes
2254          * after rendering, so PIPE_ALIGNED=1 is recommended.
2255          */
2256         if (info->use_display_dcc_unaligned) {
2257            AddrSurfInfoIn.flags.metaPipeUnaligned = 1;
2258            AddrSurfInfoIn.flags.metaRbUnaligned = 1;
2259         }
2260
2261         /* Adjust DCC settings to meet DCN requirements. */
2262         /* Don't change the DCC settings for imported buffers - they might differ. */
2263         if (!(surf->flags & RADEON_SURF_IMPORTED) &&
2264             (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit)) {
2265            /* Only Navi12/14 support independent 64B blocks in L2,
2266             * but without DCC image stores.
2267             */
2268            if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) {
2269               surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2270               surf->u.gfx9.color.dcc.independent_128B_blocks = 0;
2271               surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2272            }
2273
2274            if ((info->gfx_level >= GFX10_3 && info->family <= CHIP_REMBRANDT) ||
2275                /* Newer chips will skip this when possible to get better performance.
2276                 * This is also possible for other gfx10.3 chips, but is disabled for
2277                 * interoperability between different Mesa versions.
2278                 */
2279                (info->family > CHIP_REMBRANDT &&
2280                 gfx10_DCN_requires_independent_64B_blocks(info, config))) {
2281               surf->u.gfx9.color.dcc.independent_64B_blocks = 1;
2282               surf->u.gfx9.color.dcc.independent_128B_blocks = 1;
2283               surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
2284            }
2285         }
2286      }
2287   }
2288
2289   if (surf->modifier == DRM_FORMAT_MOD_INVALID) {
2290      switch (mode) {
2291      case RADEON_SURF_MODE_LINEAR_ALIGNED:
2292         assert(config->info.samples <= 1);
2293         assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER));
2294         AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR;
2295         break;
2296
2297      case RADEON_SURF_MODE_1D:
2298      case RADEON_SURF_MODE_2D:
2299         if (surf->flags & RADEON_SURF_IMPORTED ||
2300             (info->gfx_level >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) {
2301            AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode;
2302            break;
2303         }
2304
2305         r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
2306                                             &AddrSurfInfoIn.swizzleMode);
2307         if (r)
2308            return r;
2309         break;
2310
2311      default:
2312         assert(0);
2313      }
2314   } else {
2315      /* We have a valid and required modifier here. */
2316
2317      assert(!compressed);
2318      assert(!ac_modifier_has_dcc(surf->modifier) ||
2319             !(surf->flags & RADEON_SURF_DISABLE_DCC));
2320
2321      AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier);
2322   }
2323
2324   surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType;
2325   surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER);
2326
2327   surf->num_meta_levels = 0;
2328   surf->surf_size = 0;
2329   surf->fmask_size = 0;
2330   surf->meta_size = 0;
2331   surf->meta_slice_size = 0;
2332   surf->u.gfx9.surf_offset = 0;
2333   if (AddrSurfInfoIn.flags.stencil)
2334      surf->u.gfx9.zs.stencil_offset = 0;
2335   surf->cmask_size = 0;
2336
2337   const bool only_stencil =
2338      (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER);
2339
2340   /* Calculate texture layout information. */
2341   if (!only_stencil) {
2342      r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
2343      if (r)
2344         return r;
2345   }
2346
2347   /* Calculate texture layout information for stencil. */
2348   if (surf->flags & RADEON_SURF_SBUFFER) {
2349      AddrSurfInfoIn.flags.stencil = 1;
2350      AddrSurfInfoIn.bpp = 8;
2351      AddrSurfInfoIn.format = ADDR_FMT_8;
2352
2353      if (!AddrSurfInfoIn.flags.depth) {
2354         r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false,
2355                                             &AddrSurfInfoIn.swizzleMode);
2356         if (r)
2357            return r;
2358      } else
2359         AddrSurfInfoIn.flags.depth = 0;
2360
2361      r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn);
2362      if (r)
2363         return r;
2364   }
2365
2366   surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR;
2367
2368   /* Query whether the surface is displayable. */
2369   /* This is only useful for surfaces that are allocated without SCANOUT. */
2370   BOOL_32 displayable = false;
2371   if (!config->is_3d && !config->is_cube) {
2372      r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode,
2373                                         surf->bpe * 8, &displayable);
2374      if (r)
2375         return r;
2376
2377      /* Display needs unaligned DCC. */
2378      if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
2379          surf->num_meta_levels &&
2380          (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2381                                    surf->u.gfx9.color.dcc.pipe_aligned) ||
2382           /* Don't set is_displayable if displayable DCC is missing. */
2383           (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid)))
2384         displayable = false;
2385   }
2386   surf->is_displayable = displayable;
2387
2388   /* Validate that we allocated a displayable surface if requested. */
2389   assert(!AddrSurfInfoIn.flags.display || surf->is_displayable);
2390
2391   /* Validate that DCC is set up correctly. */
2392   if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) {
2393      assert(is_dcc_supported_by_L2(info, surf));
2394      if (AddrSurfInfoIn.flags.color)
2395         assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode));
2396      if (AddrSurfInfoIn.flags.display && surf->modifier == DRM_FORMAT_MOD_INVALID) {
2397         assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2398                                        surf->u.gfx9.color.dcc.pipe_aligned));
2399      }
2400   }
2401
2402   if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 &&
2403       AddrSurfInfoIn.flags.color && !surf->is_linear &&
2404       (1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */
2405       !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE |
2406                        RADEON_SURF_FORCE_MICRO_TILE_MODE)) &&
2407       surf->modifier == DRM_FORMAT_MOD_INVALID &&
2408       is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned,
2409                               surf->u.gfx9.color.dcc.pipe_aligned)) {
2410      /* Validate that DCC is enabled if DCN can do it. */
2411      if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) &&
2412          AddrSurfInfoIn.flags.display && surf->bpe == 4) {
2413         assert(surf->num_meta_levels);
2414      }
2415
2416      /* Validate that non-scanout DCC is always enabled. */
2417      if (!AddrSurfInfoIn.flags.display)
2418         assert(surf->num_meta_levels);
2419   }
2420
2421   if (!surf->meta_size) {
2422      /* Unset this if HTILE is not present. */
2423      surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE;
2424   }
2425
2426   if (surf->modifier != DRM_FORMAT_MOD_INVALID) {
2427      assert((surf->num_meta_levels != 0) == ac_modifier_has_dcc(surf->modifier));
2428   }
2429
2430   switch (surf->u.gfx9.swizzle_mode) {
2431   /* S = standard. */
2432   case ADDR_SW_256B_S:
2433   case ADDR_SW_4KB_S:
2434   case ADDR_SW_64KB_S:
2435   case ADDR_SW_64KB_S_T:
2436   case ADDR_SW_4KB_S_X:
2437   case ADDR_SW_64KB_S_X:
2438   case ADDR_SW_256KB_S_X:
2439      surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD;
2440      break;
2441
2442   /* D = display. */
2443   case ADDR_SW_LINEAR:
2444   case ADDR_SW_256B_D:
2445   case ADDR_SW_4KB_D:
2446   case ADDR_SW_64KB_D:
2447   case ADDR_SW_64KB_D_T:
2448   case ADDR_SW_4KB_D_X:
2449   case ADDR_SW_64KB_D_X:
2450   case ADDR_SW_256KB_D_X:
2451      surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY;
2452      break;
2453
2454   /* R = rotated (gfx9), render target (gfx10). */
2455   case ADDR_SW_256B_R:
2456   case ADDR_SW_4KB_R:
2457   case ADDR_SW_64KB_R:
2458   case ADDR_SW_64KB_R_T:
2459   case ADDR_SW_4KB_R_X:
2460   case ADDR_SW_64KB_R_X:
2461   case ADDR_SW_256KB_R_X:
2462      /* The rotated micro tile mode doesn't work if both CMASK and RB+ are
2463       * used at the same time. We currently do not use rotated
2464       * in gfx9.
2465       */
2466      assert(info->gfx_level >= GFX10 || !"rotate micro tile mode is unsupported");
2467      surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER;
2468      break;
2469
2470   /* Z = depth. */
2471   case ADDR_SW_4KB_Z:
2472   case ADDR_SW_64KB_Z:
2473   case ADDR_SW_64KB_Z_T:
2474   case ADDR_SW_4KB_Z_X:
2475   case ADDR_SW_64KB_Z_X:
2476   case ADDR_SW_256KB_Z_X:
2477      surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH;
2478      break;
2479
2480   default:
2481      assert(0);
2482   }
2483
2484   return 0;
2485}
2486
2487int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info,
2488                       const struct ac_surf_config *config, enum radeon_surf_mode mode,
2489                       struct radeon_surf *surf)
2490{
2491   int r;
2492
2493   r = surf_config_sanity(config, surf->flags);
2494   if (r)
2495      return r;
2496
2497   if (info->family_id >= FAMILY_AI)
2498      r = gfx9_compute_surface(addrlib, info, config, mode, surf);
2499   else
2500      r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf);
2501
2502   if (r)
2503      return r;
2504
2505   /* Determine the memory layout of multiple allocations in one buffer. */
2506   surf->total_size = surf->surf_size;
2507   surf->alignment_log2 = surf->surf_alignment_log2;
2508
2509   /* Ensure the offsets are always 0 if not available. */
2510   surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0;
2511
2512   if (surf->fmask_size) {
2513      assert(config->info.samples >= 2);
2514      surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2);
2515      surf->total_size = surf->fmask_offset + surf->fmask_size;
2516      surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2);
2517   }
2518
2519   /* Single-sample CMASK is in a separate buffer. */
2520   if (surf->cmask_size && config->info.samples >= 2) {
2521      surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
2522      surf->total_size = surf->cmask_offset + surf->cmask_size;
2523      surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
2524   }
2525
2526   if (surf->is_displayable)
2527      surf->flags |= RADEON_SURF_SCANOUT;
2528
2529   if (surf->meta_size &&
2530       /* dcc_size is computed on GFX9+ only if it's displayable. */
2531       (info->gfx_level >= GFX9 || !get_display_flag(config, surf))) {
2532      /* It's better when displayable DCC is immediately after
2533       * the image due to hw-specific reasons.
2534       */
2535      if (info->gfx_level >= GFX9 &&
2536          !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) &&
2537          surf->u.gfx9.color.dcc.display_equation_valid) {
2538         /* Add space for the displayable DCC buffer. */
2539         surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2);
2540         surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size;
2541      }
2542
2543      surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2);
2544      surf->total_size = surf->meta_offset + surf->meta_size;
2545      surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2);
2546   }
2547
2548   return 0;
2549}
2550
2551/* This is meant to be used for disabling DCC. */
2552void ac_surface_zero_dcc_fields(struct radeon_surf *surf)
2553{
2554   if (surf->flags & RADEON_SURF_Z_OR_SBUFFER)
2555      return;
2556
2557   surf->meta_offset = 0;
2558   surf->display_dcc_offset = 0;
2559   if (!surf->fmask_offset && !surf->cmask_offset) {
2560      surf->total_size = surf->surf_size;
2561      surf->alignment_log2 = surf->surf_alignment_log2;
2562   }
2563}
2564
2565static unsigned eg_tile_split(unsigned tile_split)
2566{
2567   switch (tile_split) {
2568   case 0:
2569      tile_split = 64;
2570      break;
2571   case 1:
2572      tile_split = 128;
2573      break;
2574   case 2:
2575      tile_split = 256;
2576      break;
2577   case 3:
2578      tile_split = 512;
2579      break;
2580   default:
2581   case 4:
2582      tile_split = 1024;
2583      break;
2584   case 5:
2585      tile_split = 2048;
2586      break;
2587   case 6:
2588      tile_split = 4096;
2589      break;
2590   }
2591   return tile_split;
2592}
2593
2594static unsigned eg_tile_split_rev(unsigned eg_tile_split)
2595{
2596   switch (eg_tile_split) {
2597   case 64:
2598      return 0;
2599   case 128:
2600      return 1;
2601   case 256:
2602      return 2;
2603   case 512:
2604      return 3;
2605   default:
2606   case 1024:
2607      return 4;
2608   case 2048:
2609      return 5;
2610   case 4096:
2611      return 6;
2612   }
2613}
2614
2615#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45
2616#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK  0x3
2617
2618/* This should be called before ac_compute_surface. */
2619void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2620                                uint64_t tiling_flags, enum radeon_surf_mode *mode)
2621{
2622   bool scanout;
2623
2624   if (info->gfx_level >= GFX9) {
2625      surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE);
2626      surf->u.gfx9.color.dcc.independent_64B_blocks =
2627         AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B);
2628      surf->u.gfx9.color.dcc.independent_128B_blocks =
2629         AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B);
2630      surf->u.gfx9.color.dcc.max_compressed_block_size =
2631         AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE);
2632      surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX);
2633      scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT);
2634      *mode =
2635         surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED;
2636   } else {
2637      surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG);
2638      surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH);
2639      surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT);
2640      surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT));
2641      surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT);
2642      surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS);
2643      scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */
2644
2645      if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */
2646         *mode = RADEON_SURF_MODE_2D;
2647      else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */
2648         *mode = RADEON_SURF_MODE_1D;
2649      else
2650         *mode = RADEON_SURF_MODE_LINEAR_ALIGNED;
2651   }
2652
2653   if (scanout)
2654      surf->flags |= RADEON_SURF_SCANOUT;
2655   else
2656      surf->flags &= ~RADEON_SURF_SCANOUT;
2657}
2658
2659void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2660                                uint64_t *tiling_flags)
2661{
2662   *tiling_flags = 0;
2663
2664   if (info->gfx_level >= GFX9) {
2665      uint64_t dcc_offset = 0;
2666
2667      if (surf->meta_offset) {
2668         dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset;
2669         assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24));
2670      }
2671
2672      *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode);
2673      *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8);
2674      *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max);
2675      *tiling_flags |=
2676         AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks);
2677      *tiling_flags |=
2678         AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks);
2679      *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE,
2680                                         surf->u.gfx9.color.dcc.max_compressed_block_size);
2681      *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0);
2682   } else {
2683      if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D)
2684         *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */
2685      else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D)
2686         *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */
2687      else
2688         *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */
2689
2690      *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config);
2691      *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw));
2692      *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh));
2693      if (surf->u.legacy.tile_split)
2694         *tiling_flags |=
2695            AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split));
2696      *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea));
2697      *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1);
2698
2699      if (surf->flags & RADEON_SURF_SCANOUT)
2700         *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */
2701      else
2702         *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */
2703   }
2704}
2705
2706static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info)
2707{
2708   return (ATI_VENDOR_ID << 16) | info->pci_id;
2709}
2710
2711/* This should be called after ac_compute_surface. */
2712bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2713                                 unsigned num_storage_samples, unsigned num_mipmap_levels,
2714                                 unsigned size_metadata, const uint32_t metadata[64])
2715{
2716   const uint32_t *desc = &metadata[2];
2717   uint64_t offset;
2718
2719   if (surf->modifier != DRM_FORMAT_MOD_INVALID)
2720      return true;
2721
2722   if (info->gfx_level >= GFX9)
2723      offset = surf->u.gfx9.surf_offset;
2724   else
2725      offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256;
2726
2727   if (offset ||                 /* Non-zero planes ignore metadata. */
2728       size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */
2729       metadata[0] == 0 ||       /* invalid version number */
2730       metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ {
2731      /* Disable DCC because it might not be enabled. */
2732      ac_surface_zero_dcc_fields(surf);
2733
2734      /* Don't report an error if the texture comes from an incompatible driver,
2735       * but this might not work.
2736       */
2737      return true;
2738   }
2739
2740   /* Validate that sample counts and the number of mipmap levels match. */
2741   unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]);
2742   unsigned type = G_008F1C_TYPE(desc[3]);
2743
2744   if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
2745      unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples));
2746
2747      if (desc_last_level != log_samples) {
2748         fprintf(stderr,
2749                 "amdgpu: invalid MSAA texture import, "
2750                 "metadata has log2(samples) = %u, the caller set %u\n",
2751                 desc_last_level, log_samples);
2752         return false;
2753      }
2754   } else {
2755      if (desc_last_level != num_mipmap_levels - 1) {
2756         fprintf(stderr,
2757                 "amdgpu: invalid mipmapped texture import, "
2758                 "metadata has last_level = %u, the caller set %u\n",
2759                 desc_last_level, num_mipmap_levels - 1);
2760         return false;
2761      }
2762   }
2763
2764   if (info->gfx_level >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) {
2765      /* Read DCC information. */
2766      switch (info->gfx_level) {
2767      case GFX8:
2768         surf->meta_offset = (uint64_t)desc[7] << 8;
2769         break;
2770
2771      case GFX9:
2772         surf->meta_offset =
2773            ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40);
2774         surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]);
2775         surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]);
2776
2777         /* If DCC is unaligned, this can only be a displayable image. */
2778         if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned)
2779            assert(surf->is_displayable);
2780         break;
2781
2782      case GFX10:
2783      case GFX10_3:
2784      case GFX11:
2785         surf->meta_offset =
2786            ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16);
2787         surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]);
2788         break;
2789
2790      default:
2791         assert(0);
2792         return false;
2793      }
2794   } else {
2795      /* Disable DCC. dcc_offset is always set by texture_from_handle
2796       * and must be cleared here.
2797       */
2798      ac_surface_zero_dcc_fields(surf);
2799   }
2800
2801   return true;
2802}
2803
2804void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf,
2805                                 unsigned num_mipmap_levels, uint32_t desc[8],
2806                                 unsigned *size_metadata, uint32_t metadata[64])
2807{
2808   /* Clear the base address and set the relative DCC offset. */
2809   desc[0] = 0;
2810   desc[1] &= C_008F14_BASE_ADDRESS_HI;
2811
2812   switch (info->gfx_level) {
2813   case GFX6:
2814   case GFX7:
2815      break;
2816   case GFX8:
2817      desc[7] = surf->meta_offset >> 8;
2818      break;
2819   case GFX9:
2820      desc[7] = surf->meta_offset >> 8;
2821      desc[5] &= C_008F24_META_DATA_ADDRESS;
2822      desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40);
2823      break;
2824   case GFX10:
2825   case GFX10_3:
2826   case GFX11:
2827      desc[6] &= C_00A018_META_DATA_ADDRESS_LO;
2828      desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8);
2829      desc[7] = surf->meta_offset >> 16;
2830      break;
2831   default:
2832      assert(0);
2833   }
2834
2835   /* Metadata image format format version 1:
2836    * [0] = 1 (metadata format identifier)
2837    * [1] = (VENDOR_ID << 16) | PCI_ID
2838    * [2:9] = image descriptor for the whole resource
2839    *         [2] is always 0, because the base address is cleared
2840    *         [9] is the DCC offset bits [39:8] from the beginning of
2841    *             the buffer
2842    * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level
2843    */
2844
2845   metadata[0] = 1; /* metadata image format version 1 */
2846
2847   /* Tiling modes are ambiguous without a PCI ID. */
2848   metadata[1] = ac_get_umd_metadata_word1(info);
2849
2850   /* Dwords [2:9] contain the image descriptor. */
2851   memcpy(&metadata[2], desc, 8 * 4);
2852   *size_metadata = 10 * 4;
2853
2854   /* Dwords [10:..] contain the mipmap level offsets. */
2855   if (info->gfx_level <= GFX8) {
2856      for (unsigned i = 0; i < num_mipmap_levels; i++)
2857         metadata[10 + i] = surf->u.legacy.level[i].offset_256B;
2858
2859      *size_metadata += num_mipmap_levels * 4;
2860   }
2861}
2862
2863static uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf)
2864{
2865   if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR)
2866      return 256 / surf->bpe;
2867
2868   if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D)
2869      return 1; /* TODO */
2870
2871   unsigned bpe_shift = util_logbase2(surf->bpe) / 2;
2872   switch(surf->u.gfx9.swizzle_mode & ~3) {
2873   case ADDR_SW_LINEAR: /* 256B block. */
2874      return 16 >> bpe_shift;
2875   case ADDR_SW_4KB_Z:
2876   case ADDR_SW_4KB_Z_X:
2877      return 64 >> bpe_shift;
2878   case ADDR_SW_64KB_Z:
2879   case ADDR_SW_64KB_Z_T:
2880   case ADDR_SW_64KB_Z_X:
2881      return 256 >> bpe_shift;
2882   case ADDR_SW_256KB_Z_X:
2883      return 512 >> bpe_shift;
2884   default:
2885      return 1; /* TODO */
2886   }
2887}
2888
2889bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf,
2890                                       unsigned num_mipmap_levels, uint64_t offset, unsigned pitch)
2891{
2892   /*
2893    * GFX10 and newer don't support custom strides. Furthermore, for
2894    * multiple miplevels or compression data we'd really need to rerun
2895    * addrlib to update all the fields in the surface. That, however, is a
2896    * software limitation and could be relaxed later.
2897    */
2898   bool require_equal_pitch = surf->surf_size != surf->total_size ||
2899                              num_mipmap_levels != 1 ||
2900                              info->gfx_level >= GFX10;
2901
2902   if (info->gfx_level >= GFX9) {
2903      if (pitch) {
2904         if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch)
2905            return false;
2906
2907         if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch)
2908            return false;
2909
2910         if (pitch != surf->u.gfx9.surf_pitch) {
2911            unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size;
2912
2913            surf->u.gfx9.surf_pitch = pitch;
2914            surf->u.gfx9.epitch = pitch - 1;
2915            surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe;
2916            surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices;
2917         }
2918      }
2919      surf->u.gfx9.surf_offset = offset;
2920      if (surf->u.gfx9.zs.stencil_offset)
2921         surf->u.gfx9.zs.stencil_offset += offset;
2922   } else {
2923      if (pitch) {
2924         if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch)
2925            return false;
2926
2927         surf->u.legacy.level[0].nblk_x = pitch;
2928         surf->u.legacy.level[0].slice_size_dw =
2929            ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4;
2930      }
2931
2932      if (offset) {
2933         for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i)
2934            surf->u.legacy.level[i].offset_256B += offset / 256;
2935      }
2936   }
2937
2938   if (offset & ((1 << surf->alignment_log2) - 1) ||
2939       offset >= UINT64_MAX - surf->total_size)
2940      return false;
2941
2942   if (surf->meta_offset)
2943      surf->meta_offset += offset;
2944   if (surf->fmask_offset)
2945      surf->fmask_offset += offset;
2946   if (surf->cmask_offset)
2947      surf->cmask_offset += offset;
2948   if (surf->display_dcc_offset)
2949      surf->display_dcc_offset += offset;
2950   return true;
2951}
2952
2953unsigned ac_surface_get_nplanes(const struct radeon_surf *surf)
2954{
2955   if (surf->modifier == DRM_FORMAT_MOD_INVALID)
2956      return 1;
2957   else if (surf->display_dcc_offset)
2958      return 3;
2959   else if (surf->meta_offset)
2960      return 2;
2961   else
2962      return 1;
2963}
2964
2965uint64_t ac_surface_get_plane_offset(enum amd_gfx_level gfx_level,
2966                                    const struct radeon_surf *surf,
2967                                    unsigned plane, unsigned layer)
2968{
2969   switch (plane) {
2970   case 0:
2971      if (gfx_level >= GFX9) {
2972         return surf->u.gfx9.surf_offset +
2973                layer * surf->u.gfx9.surf_slice_size;
2974      } else {
2975         return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 +
2976                layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4;
2977      }
2978   case 1:
2979      assert(!layer);
2980      return surf->display_dcc_offset ?
2981             surf->display_dcc_offset : surf->meta_offset;
2982   case 2:
2983      assert(!layer);
2984      return surf->meta_offset;
2985   default:
2986      unreachable("Invalid plane index");
2987   }
2988}
2989
2990uint64_t ac_surface_get_plane_stride(enum amd_gfx_level gfx_level,
2991                                    const struct radeon_surf *surf,
2992                                    unsigned plane, unsigned level)
2993{
2994   switch (plane) {
2995   case 0:
2996      if (gfx_level >= GFX9) {
2997         return (surf->is_linear ? surf->u.gfx9.pitch[level] : surf->u.gfx9.surf_pitch) * surf->bpe;
2998      } else {
2999         return surf->u.legacy.level[level].nblk_x * surf->bpe;
3000      }
3001   case 1:
3002      return 1 + (surf->display_dcc_offset ?
3003             surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max);
3004   case 2:
3005      return surf->u.gfx9.color.dcc_pitch_max + 1;
3006   default:
3007      unreachable("Invalid plane index");
3008   }
3009}
3010
3011uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf,
3012                                   unsigned plane)
3013{
3014   switch (plane) {
3015   case 0:
3016      return surf->surf_size;
3017   case 1:
3018      return surf->display_dcc_offset ?
3019             surf->u.gfx9.color.display_dcc_size : surf->meta_size;
3020   case 2:
3021      return surf->meta_size;
3022   default:
3023      unreachable("Invalid plane index");
3024   }
3025}
3026
3027void ac_surface_print_info(FILE *out, const struct radeon_info *info,
3028                           const struct radeon_surf *surf)
3029{
3030   if (info->gfx_level >= GFX9) {
3031      fprintf(out,
3032              "    Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", "
3033              "alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, "
3034              "blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n",
3035              surf->surf_size, surf->u.gfx9.surf_slice_size,
3036              1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode,
3037              surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch,
3038              surf->blk_w, surf->blk_h, surf->bpe, surf->flags);
3039
3040      if (surf->fmask_offset)
3041         fprintf(out,
3042                 "    FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
3043                 "alignment=%u, swmode=%u, epitch=%u\n",
3044                 surf->fmask_offset, surf->fmask_size,
3045                 1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode,
3046                 surf->u.gfx9.color.fmask_epitch);
3047
3048      if (surf->cmask_offset)
3049         fprintf(out,
3050                 "    CMask: offset=%" PRIu64 ", size=%u, "
3051                 "alignment=%u\n",
3052                 surf->cmask_offset, surf->cmask_size,
3053                 1 << surf->cmask_alignment_log2);
3054
3055      if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
3056         fprintf(out,
3057                 "    HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
3058                 surf->meta_offset, surf->meta_size,
3059                 1 << surf->meta_alignment_log2);
3060
3061      if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
3062         fprintf(out,
3063                 "    DCC: offset=%" PRIu64 ", size=%u, "
3064                 "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n",
3065                 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2,
3066                 surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels);
3067
3068      if (surf->has_stencil)
3069         fprintf(out,
3070                 "    Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n",
3071                 surf->u.gfx9.zs.stencil_offset,
3072                 surf->u.gfx9.zs.stencil_swizzle_mode,
3073                 surf->u.gfx9.zs.stencil_epitch);
3074   } else {
3075      fprintf(out,
3076              "    Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, "
3077              "bpe=%u, flags=0x%"PRIx64"\n",
3078              surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w,
3079              surf->blk_h, surf->bpe, surf->flags);
3080
3081      fprintf(out,
3082              "    Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, "
3083              "nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n",
3084              surf->surf_size, 1 << surf->surf_alignment_log2,
3085              surf->u.legacy.bankw, surf->u.legacy.bankh,
3086              surf->u.legacy.num_banks, surf->u.legacy.mtilea,
3087              surf->u.legacy.tile_split, surf->u.legacy.pipe_config,
3088              (surf->flags & RADEON_SURF_SCANOUT) != 0);
3089
3090      if (surf->fmask_offset)
3091         fprintf(out,
3092                 "    FMask: offset=%" PRIu64 ", size=%" PRIu64 ", "
3093                 "alignment=%u, pitch_in_pixels=%u, bankh=%u, "
3094                 "slice_tile_max=%u, tile_mode_index=%u\n",
3095                 surf->fmask_offset, surf->fmask_size,
3096                 1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels,
3097                 surf->u.legacy.color.fmask.bankh,
3098                 surf->u.legacy.color.fmask.slice_tile_max,
3099                 surf->u.legacy.color.fmask.tiling_index);
3100
3101      if (surf->cmask_offset)
3102         fprintf(out,
3103                 "    CMask: offset=%" PRIu64 ", size=%u, alignment=%u, "
3104                 "slice_tile_max=%u\n",
3105                 surf->cmask_offset, surf->cmask_size,
3106                 1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max);
3107
3108      if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset)
3109         fprintf(out, "    HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n",
3110                 surf->meta_offset, surf->meta_size,
3111                 1 << surf->meta_alignment_log2);
3112
3113      if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset)
3114         fprintf(out, "    DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n",
3115                 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2);
3116
3117      if (surf->has_stencil)
3118         fprintf(out, "    StencilLayout: tilesplit=%u\n",
3119                 surf->u.legacy.stencil_tile_split);
3120   }
3121}
3122
3123static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
3124                                                   struct gfx9_meta_equation *equation,
3125                                                   int blkSizeBias, unsigned blkStart,
3126                                                   nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size,
3127                                                   nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
3128                                                   nir_ssa_def *pipe_xor,
3129                                                   nir_ssa_def **bit_position)
3130{
3131   nir_ssa_def *zero = nir_imm_int(b, 0);
3132   nir_ssa_def *one = nir_imm_int(b, 1);
3133
3134   assert(info->gfx_level >= GFX10);
3135
3136   unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
3137   unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
3138   unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias;
3139
3140   nir_ssa_def *coord[] = {x, y, z, 0};
3141   nir_ssa_def *address = zero;
3142
3143   for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) {
3144      nir_ssa_def *v = zero;
3145
3146      for (unsigned c = 0; c < 4; c++) {
3147         unsigned index = i * 4 + c - (blkStart * 4);
3148         if (equation->u.gfx10_bits[index]) {
3149            unsigned mask = equation->u.gfx10_bits[index];
3150            nir_ssa_def *bits = coord[c];
3151
3152            while (mask)
3153               v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one));
3154         }
3155      }
3156
3157      address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i)));
3158   }
3159
3160   unsigned blkMask = (1 << blkSizeLog2) - 1;
3161   unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1;
3162   unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
3163   nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
3164   nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
3165   nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
3166   nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb);
3167   nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask),
3168                                                   nir_imm_int(b, m_pipeInterleaveLog2)), blkMask);
3169
3170   if (bit_position)
3171      *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)),
3172                                  nir_imm_int(b, 2));
3173
3174   return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z),
3175                               nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))),
3176                   nir_ixor(b, nir_ushr(b, address, one), pipeXor));
3177}
3178
3179static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info,
3180                                                  struct gfx9_meta_equation *equation,
3181                                                  nir_ssa_def *meta_pitch, nir_ssa_def *meta_height,
3182                                                  nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
3183                                                  nir_ssa_def *sample, nir_ssa_def *pipe_xor,
3184                                                  nir_ssa_def **bit_position)
3185{
3186   nir_ssa_def *zero = nir_imm_int(b, 0);
3187   nir_ssa_def *one = nir_imm_int(b, 1);
3188
3189   assert(info->gfx_level >= GFX9);
3190
3191   unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width);
3192   unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height);
3193   unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth);
3194
3195   unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config);
3196   unsigned numPipeBits = equation->u.gfx9.num_pipe_bits;
3197   nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2);
3198   nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2),
3199                                            pitchInBlock);
3200
3201   nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2);
3202   nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2);
3203   nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2);
3204
3205   nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock),
3206                                                  nir_imul(b, yb, pitchInBlock)), xb);
3207   nir_ssa_def *coords[] = {x, y, z, sample, blockIndex};
3208
3209   nir_ssa_def *address = zero;
3210   unsigned num_bits = equation->u.gfx9.num_bits;
3211   assert(num_bits <= 32);
3212
3213   /* Compute the address up until the last bit that doesn't use the block index. */
3214   for (unsigned i = 0; i < num_bits - 1; i++) {
3215      nir_ssa_def *xor = zero;
3216
3217      for (unsigned c = 0; c < 5; c++) {
3218         if (equation->u.gfx9.bit[i].coord[c].dim >= 5)
3219            continue;
3220
3221         assert(equation->u.gfx9.bit[i].coord[c].ord < 32);
3222         nir_ssa_def *ison =
3223            nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim],
3224                                     equation->u.gfx9.bit[i].coord[c].ord), one);
3225
3226         xor = nir_ixor(b, xor, ison);
3227      }
3228      address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i)));
3229   }
3230
3231   /* Fill the remaining bits with the block index. */
3232   unsigned last = num_bits - 1;
3233   address = nir_ior(b, address,
3234                     nir_ishl(b, nir_ushr_imm(b, blockIndex,
3235                                              equation->u.gfx9.bit[last].coord[0].ord),
3236                     nir_imm_int(b, last)));
3237
3238   if (bit_position)
3239      *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)),
3240                                  nir_imm_int(b, 2));
3241
3242   nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1);
3243   return nir_ixor(b, nir_ushr(b, address, one),
3244                   nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2)));
3245}
3246
3247nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info,
3248                                        unsigned bpe, struct gfx9_meta_equation *equation,
3249                                        nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height,
3250                                        nir_ssa_def *dcc_slice_size,
3251                                        nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
3252                                        nir_ssa_def *sample, nir_ssa_def *pipe_xor)
3253{
3254   if (info->gfx_level >= GFX10) {
3255      unsigned bpp_log2 = util_logbase2(bpe);
3256
3257      return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1,
3258                                            dcc_pitch, dcc_slice_size,
3259                                            x, y, z, pipe_xor, NULL);
3260   } else {
3261      return gfx9_nir_meta_addr_from_coord(b, info, equation, dcc_pitch,
3262                                           dcc_height, x, y, z,
3263                                           sample, pipe_xor, NULL);
3264   }
3265}
3266
3267nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info,
3268                                        struct gfx9_meta_equation *equation,
3269                                        nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height,
3270                                        nir_ssa_def *cmask_slice_size,
3271                                        nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
3272                                        nir_ssa_def *pipe_xor,
3273                                        nir_ssa_def **bit_position)
3274{
3275   nir_ssa_def *zero = nir_imm_int(b, 0);
3276
3277   if (info->gfx_level >= GFX10) {
3278      return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1,
3279                                            cmask_pitch, cmask_slice_size,
3280                                            x, y, z, pipe_xor, bit_position);
3281   } else {
3282      return gfx9_nir_meta_addr_from_coord(b, info, equation, cmask_pitch,
3283                                           cmask_height, x, y, z, zero,
3284                                           pipe_xor, bit_position);
3285   }
3286}
3287
3288nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info,
3289                                          struct gfx9_meta_equation *equation,
3290                                          nir_ssa_def *htile_pitch,
3291                                          nir_ssa_def *htile_slice_size,
3292                                          nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z,
3293                                          nir_ssa_def *pipe_xor)
3294{
3295   return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2,
3296                                            htile_pitch, htile_slice_size,
3297                                            x, y, z, pipe_xor, NULL);
3298}
3299