1/* 2 * Copyright © 2011 Red Hat All Rights Reserved. 3 * Copyright © 2017 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 28#define AC_SURFACE_INCLUDE_NIR 29#include "ac_surface.h" 30 31#include "ac_drm_fourcc.h" 32#include "ac_gpu_info.h" 33#include "addrlib/inc/addrinterface.h" 34#include "addrlib/src/amdgpu_asic_addr.h" 35#include "amd_family.h" 36#include "sid.h" 37#include "util/hash_table.h" 38#include "util/macros.h" 39#include "util/simple_mtx.h" 40#include "util/u_atomic.h" 41#include "util/format/u_format.h" 42#include "util/u_math.h" 43#include "util/u_memory.h" 44 45#include <errno.h> 46#include <stdio.h> 47#include <stdlib.h> 48 49#ifdef _WIN32 50#define AMDGPU_TILING_ARRAY_MODE_SHIFT 0 51#define AMDGPU_TILING_ARRAY_MODE_MASK 0xf 52#define AMDGPU_TILING_PIPE_CONFIG_SHIFT 4 53#define AMDGPU_TILING_PIPE_CONFIG_MASK 0x1f 54#define AMDGPU_TILING_TILE_SPLIT_SHIFT 9 55#define AMDGPU_TILING_TILE_SPLIT_MASK 0x7 56#define AMDGPU_TILING_MICRO_TILE_MODE_SHIFT 12 57#define AMDGPU_TILING_MICRO_TILE_MODE_MASK 0x7 58#define AMDGPU_TILING_BANK_WIDTH_SHIFT 15 59#define AMDGPU_TILING_BANK_WIDTH_MASK 0x3 60#define AMDGPU_TILING_BANK_HEIGHT_SHIFT 17 61#define AMDGPU_TILING_BANK_HEIGHT_MASK 0x3 62#define AMDGPU_TILING_MACRO_TILE_ASPECT_SHIFT 19 63#define AMDGPU_TILING_MACRO_TILE_ASPECT_MASK 0x3 64#define AMDGPU_TILING_NUM_BANKS_SHIFT 21 65#define AMDGPU_TILING_NUM_BANKS_MASK 0x3 66#define AMDGPU_TILING_SWIZZLE_MODE_SHIFT 0 67#define AMDGPU_TILING_SWIZZLE_MODE_MASK 0x1f 68#define AMDGPU_TILING_DCC_OFFSET_256B_SHIFT 5 69#define AMDGPU_TILING_DCC_OFFSET_256B_MASK 0xFFFFFF 70#define AMDGPU_TILING_DCC_PITCH_MAX_SHIFT 29 71#define AMDGPU_TILING_DCC_PITCH_MAX_MASK 0x3FFF 72#define AMDGPU_TILING_DCC_INDEPENDENT_64B_SHIFT 43 73#define AMDGPU_TILING_DCC_INDEPENDENT_64B_MASK 0x1 74#define AMDGPU_TILING_DCC_INDEPENDENT_128B_SHIFT 44 75#define AMDGPU_TILING_DCC_INDEPENDENT_128B_MASK 0x1 76#define AMDGPU_TILING_SCANOUT_SHIFT 63 77#define AMDGPU_TILING_SCANOUT_MASK 0x1 78#define AMDGPU_TILING_SET(field, value) \ 79 (((__u64)(value) & AMDGPU_TILING_##field##_MASK) << AMDGPU_TILING_##field##_SHIFT) 80#define AMDGPU_TILING_GET(value, field) \ 81 (((__u64)(value) >> AMDGPU_TILING_##field##_SHIFT) & AMDGPU_TILING_##field##_MASK) 82#else 83#include "drm-uapi/amdgpu_drm.h" 84#endif 85 86#ifndef CIASICIDGFXENGINE_SOUTHERNISLAND 87#define CIASICIDGFXENGINE_SOUTHERNISLAND 0x0000000A 88#endif 89 90#ifndef CIASICIDGFXENGINE_ARCTICISLAND 91#define CIASICIDGFXENGINE_ARCTICISLAND 0x0000000D 92#endif 93 94struct ac_addrlib { 95 ADDR_HANDLE handle; 96 simple_mtx_t lock; 97}; 98 99bool ac_modifier_has_dcc(uint64_t modifier) 100{ 101 return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC, modifier); 102} 103 104bool ac_modifier_has_dcc_retile(uint64_t modifier) 105{ 106 return IS_AMD_FMT_MOD(modifier) && AMD_FMT_MOD_GET(DCC_RETILE, modifier); 107} 108 109bool ac_modifier_supports_dcc_image_stores(uint64_t modifier) 110{ 111 if (!ac_modifier_has_dcc(modifier)) 112 return false; 113 114 return (!AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) && 115 AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) && 116 AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_128B) || 117 (AMD_FMT_MOD_GET(TILE_VERSION, modifier) >= AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS && /* gfx10.3 */ 118 AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier) && 119 AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier) && 120 AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier) == AMD_FMT_MOD_DCC_BLOCK_64B); 121 122} 123 124 125bool ac_surface_supports_dcc_image_stores(enum amd_gfx_level gfx_level, 126 const struct radeon_surf *surf) 127{ 128 /* DCC image stores is only available for GFX10+. */ 129 if (gfx_level < GFX10) 130 return false; 131 132 /* DCC image stores support the following settings: 133 * - INDEPENDENT_64B_BLOCKS = 0 134 * - INDEPENDENT_128B_BLOCKS = 1 135 * - MAX_COMPRESSED_BLOCK_SIZE = 128B 136 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 137 * 138 * gfx10.3 also supports the following setting: 139 * - INDEPENDENT_64B_BLOCKS = 1 140 * - INDEPENDENT_128B_BLOCKS = 1 141 * - MAX_COMPRESSED_BLOCK_SIZE = 64B 142 * - MAX_UNCOMPRESSED_BLOCK_SIZE = 256B (always used) 143 * 144 * The compressor only looks at MAX_COMPRESSED_BLOCK_SIZE to determine 145 * the INDEPENDENT_xx_BLOCKS settings. 128B implies INDEP_128B, while 64B 146 * implies INDEP_64B && INDEP_128B. 147 * 148 * The same limitations apply to SDMA compressed stores because 149 * SDMA uses the same DCC codec. 150 */ 151 return (!surf->u.gfx9.color.dcc.independent_64B_blocks && 152 surf->u.gfx9.color.dcc.independent_128B_blocks && 153 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B) || 154 (gfx_level >= GFX10_3 && /* gfx10.3 */ 155 surf->u.gfx9.color.dcc.independent_64B_blocks && 156 surf->u.gfx9.color.dcc.independent_128B_blocks && 157 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 158} 159 160static 161AddrSwizzleMode ac_modifier_gfx9_swizzle_mode(uint64_t modifier) 162{ 163 if (modifier == DRM_FORMAT_MOD_LINEAR) 164 return ADDR_SW_LINEAR; 165 166 return AMD_FMT_MOD_GET(TILE, modifier); 167} 168static void 169ac_modifier_fill_dcc_params(uint64_t modifier, struct radeon_surf *surf, 170 ADDR2_COMPUTE_SURFACE_INFO_INPUT *surf_info) 171{ 172 assert(ac_modifier_has_dcc(modifier)); 173 174 if (AMD_FMT_MOD_GET(DCC_RETILE, modifier)) { 175 surf_info->flags.metaPipeUnaligned = 0; 176 } else { 177 surf_info->flags.metaPipeUnaligned = !AMD_FMT_MOD_GET(DCC_PIPE_ALIGN, modifier); 178 } 179 180 /* The metaPipeUnaligned is not strictly necessary, but ensure we don't set metaRbUnaligned on 181 * non-displayable DCC surfaces just because num_render_backends = 1 */ 182 surf_info->flags.metaRbUnaligned = AMD_FMT_MOD_GET(TILE_VERSION, modifier) == AMD_FMT_MOD_TILE_VER_GFX9 && 183 AMD_FMT_MOD_GET(RB, modifier) == 0 && 184 surf_info->flags.metaPipeUnaligned; 185 186 surf->u.gfx9.color.dcc.independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); 187 surf->u.gfx9.color.dcc.independent_128B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_128B, modifier); 188 surf->u.gfx9.color.dcc.max_compressed_block_size = AMD_FMT_MOD_GET(DCC_MAX_COMPRESSED_BLOCK, modifier); 189} 190 191bool ac_is_modifier_supported(const struct radeon_info *info, 192 const struct ac_modifier_options *options, 193 enum pipe_format format, 194 uint64_t modifier) 195{ 196 197 if (util_format_is_compressed(format) || 198 util_format_is_depth_or_stencil(format) || 199 util_format_get_blocksizebits(format) > 64) 200 return false; 201 202 if (info->gfx_level < GFX9) 203 return false; 204 205 if(modifier == DRM_FORMAT_MOD_LINEAR) 206 return true; 207 208 /* GFX8 may need a different modifier for each plane */ 209 if (info->gfx_level < GFX9 && util_format_get_num_planes(format) > 1) 210 return false; 211 212 uint32_t allowed_swizzles = 0xFFFFFFFF; 213 switch(info->gfx_level) { 214 case GFX9: 215 allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x06000000 : 0x06660660; 216 break; 217 case GFX10: 218 case GFX10_3: 219 allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x08000000 : 0x0E660660; 220 break; 221 case GFX11: 222 allowed_swizzles = ac_modifier_has_dcc(modifier) ? 0x88000000 : 0xCC440440; 223 break; 224 default: 225 return false; 226 } 227 228 if (!((1u << ac_modifier_gfx9_swizzle_mode(modifier)) & allowed_swizzles)) 229 return false; 230 231 if (ac_modifier_has_dcc(modifier)) { 232 /* TODO: support multi-planar formats with DCC */ 233 if (util_format_get_num_planes(format) > 1) 234 return false; 235 236 if (!info->has_graphics) 237 return false; 238 239 if (!options->dcc) 240 return false; 241 242 if (ac_modifier_has_dcc_retile(modifier) && !options->dcc_retile) 243 return false; 244 } 245 246 return true; 247} 248 249bool ac_get_supported_modifiers(const struct radeon_info *info, 250 const struct ac_modifier_options *options, 251 enum pipe_format format, 252 unsigned *mod_count, 253 uint64_t *mods) 254{ 255 unsigned current_mod = 0; 256 257#define ADD_MOD(name) \ 258 if (ac_is_modifier_supported(info, options, format, (name))) { \ 259 if (mods && current_mod < *mod_count) \ 260 mods[current_mod] = (name); \ 261 ++current_mod; \ 262 } 263 264 /* The modifiers have to be added in descending order of estimated 265 * performance. The drivers will prefer modifiers that come earlier 266 * in the list. */ 267 switch (info->gfx_level) { 268 case GFX9: { 269 unsigned pipe_xor_bits = MIN2(G_0098F8_NUM_PIPES(info->gb_addr_config) + 270 G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config), 8); 271 unsigned bank_xor_bits = MIN2(G_0098F8_NUM_BANKS(info->gb_addr_config), 8 - pipe_xor_bits); 272 unsigned pipes = G_0098F8_NUM_PIPES(info->gb_addr_config); 273 unsigned rb = G_0098F8_NUM_RB_PER_SE(info->gb_addr_config) + 274 G_0098F8_NUM_SHADER_ENGINES_GFX9(info->gb_addr_config); 275 276 uint64_t common_dcc = AMD_FMT_MOD_SET(DCC, 1) | 277 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 278 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B) | 279 AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, info->has_dcc_constant_encode) | 280 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 281 AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits); 282 283 ADD_MOD(AMD_FMT_MOD | 284 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | 285 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 286 AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 287 common_dcc | 288 AMD_FMT_MOD_SET(PIPE, pipes) | 289 AMD_FMT_MOD_SET(RB, rb)) 290 291 ADD_MOD(AMD_FMT_MOD | 292 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 293 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 294 AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 295 common_dcc | 296 AMD_FMT_MOD_SET(PIPE, pipes) | 297 AMD_FMT_MOD_SET(RB, rb)) 298 299 if (util_format_get_blocksizebits(format) == 32) { 300 if (info->max_render_backends == 1) { 301 ADD_MOD(AMD_FMT_MOD | 302 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 303 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 304 common_dcc); 305 } 306 307 308 ADD_MOD(AMD_FMT_MOD | 309 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 310 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 311 AMD_FMT_MOD_SET(DCC_RETILE, 1) | 312 common_dcc | 313 AMD_FMT_MOD_SET(PIPE, pipes) | 314 AMD_FMT_MOD_SET(RB, rb)) 315 } 316 317 318 ADD_MOD(AMD_FMT_MOD | 319 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D_X) | 320 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 321 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 322 AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); 323 324 ADD_MOD(AMD_FMT_MOD | 325 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 326 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9) | 327 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 328 AMD_FMT_MOD_SET(BANK_XOR_BITS, bank_xor_bits)); 329 330 ADD_MOD(AMD_FMT_MOD | 331 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | 332 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 333 334 ADD_MOD(AMD_FMT_MOD | 335 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | 336 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 337 338 ADD_MOD(DRM_FORMAT_MOD_LINEAR) 339 break; 340 } 341 case GFX10: 342 case GFX10_3: { 343 bool rbplus = info->gfx_level >= GFX10_3; 344 unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config); 345 unsigned pkrs = rbplus ? G_0098F8_NUM_PKRS(info->gb_addr_config) : 0; 346 347 unsigned version = rbplus ? AMD_FMT_MOD_TILE_VER_GFX10_RBPLUS : AMD_FMT_MOD_TILE_VER_GFX10; 348 uint64_t common_dcc = AMD_FMT_MOD_SET(TILE_VERSION, version) | 349 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | 350 AMD_FMT_MOD_SET(DCC, 1) | 351 AMD_FMT_MOD_SET(DCC_CONSTANT_ENCODE, 1) | 352 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 353 AMD_FMT_MOD_SET(PACKERS, pkrs); 354 355 ADD_MOD(AMD_FMT_MOD | common_dcc | 356 AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1) | 357 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 358 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 359 360 if (info->gfx_level >= GFX10_3) { 361 ADD_MOD(AMD_FMT_MOD | common_dcc | 362 AMD_FMT_MOD_SET(DCC_RETILE, 1) | 363 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 364 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B)) 365 366 ADD_MOD(AMD_FMT_MOD | common_dcc | 367 AMD_FMT_MOD_SET(DCC_RETILE, 1) | 368 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 369 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 370 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B)) 371 } 372 373 ADD_MOD(AMD_FMT_MOD | 374 AMD_FMT_MOD_SET(TILE_VERSION, version) | 375 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_R_X) | 376 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 377 AMD_FMT_MOD_SET(PACKERS, pkrs)) 378 379 ADD_MOD(AMD_FMT_MOD | 380 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX10) | 381 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S_X) | 382 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits)) 383 384 if (util_format_get_blocksizebits(format) != 32) { 385 ADD_MOD(AMD_FMT_MOD | 386 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D) | 387 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 388 } 389 390 ADD_MOD(AMD_FMT_MOD | 391 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_S) | 392 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX9)); 393 394 ADD_MOD(DRM_FORMAT_MOD_LINEAR) 395 break; 396 } 397 case GFX11: { 398 /* GFX11 has new microblock organization. No S modes for 2D. */ 399 unsigned pipe_xor_bits = G_0098F8_NUM_PIPES(info->gb_addr_config); 400 unsigned pkrs = G_0098F8_NUM_PKRS(info->gb_addr_config); 401 unsigned num_pipes = 1 << pipe_xor_bits; 402 403 /* R_X swizzle modes are the best for rendering and DCC requires them. */ 404 for (unsigned i = 0; i < 2; i++) { 405 unsigned swizzle_r_x; 406 407 /* Insert the best one first. */ 408 if (num_pipes > 16) 409 swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX11_256K_R_X : AMD_FMT_MOD_TILE_GFX9_64K_R_X; 410 else 411 swizzle_r_x = !i ? AMD_FMT_MOD_TILE_GFX9_64K_R_X : AMD_FMT_MOD_TILE_GFX11_256K_R_X; 412 413 /* Disable 256K on APUs because it doesn't work with DAL. */ 414 if (!info->has_dedicated_vram && swizzle_r_x == AMD_FMT_MOD_TILE_GFX11_256K_R_X) 415 continue; 416 417 uint64_t modifier_r_x = AMD_FMT_MOD | 418 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | 419 AMD_FMT_MOD_SET(TILE, swizzle_r_x) | 420 AMD_FMT_MOD_SET(PIPE_XOR_BITS, pipe_xor_bits) | 421 AMD_FMT_MOD_SET(PACKERS, pkrs); 422 423 /* DCC_CONSTANT_ENCODE is not set because it can't vary with gfx11 (it's implied to be 1). */ 424 uint64_t modifier_dcc_best = modifier_r_x | 425 AMD_FMT_MOD_SET(DCC, 1) | 426 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 0) | 427 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 428 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_128B); 429 430 /* DCC settings for 4K and greater resolutions. (required by display hw) */ 431 uint64_t modifier_dcc_4k = modifier_r_x | 432 AMD_FMT_MOD_SET(DCC, 1) | 433 AMD_FMT_MOD_SET(DCC_INDEPENDENT_64B, 1) | 434 AMD_FMT_MOD_SET(DCC_INDEPENDENT_128B, 1) | 435 AMD_FMT_MOD_SET(DCC_MAX_COMPRESSED_BLOCK, AMD_FMT_MOD_DCC_BLOCK_64B); 436 437 /* Modifiers have to be sorted from best to worst. 438 * 439 * Top level order: 440 * 1. The best chip-specific modifiers with DCC, potentially non-displayable. 441 * 2. Chip-specific displayable modifiers with DCC. 442 * 3. Chip-specific displayable modifiers without DCC. 443 * 4. Chip-independent modifiers without DCC. 444 * 5. Linear. 445 */ 446 447 /* Add the best non-displayable modifier first. */ 448 ADD_MOD(modifier_dcc_best | AMD_FMT_MOD_SET(DCC_PIPE_ALIGN, 1)); 449 450 /* Displayable modifiers are next. */ 451 /* Add other displayable DCC settings. (DCC_RETILE implies displayable on all chips) */ 452 ADD_MOD(modifier_dcc_best | AMD_FMT_MOD_SET(DCC_RETILE, 1)) 453 ADD_MOD(modifier_dcc_4k | AMD_FMT_MOD_SET(DCC_RETILE, 1)) 454 455 /* Add one without DCC that is displayable (it's also optimal for non-displayable cases). */ 456 ADD_MOD(modifier_r_x) 457 } 458 459 /* Add one that is compatible with other gfx11 chips. */ 460 ADD_MOD(AMD_FMT_MOD | 461 AMD_FMT_MOD_SET(TILE_VERSION, AMD_FMT_MOD_TILE_VER_GFX11) | 462 AMD_FMT_MOD_SET(TILE, AMD_FMT_MOD_TILE_GFX9_64K_D)) 463 464 /* Linear must be last. */ 465 ADD_MOD(DRM_FORMAT_MOD_LINEAR) 466 break; 467 } 468 default: 469 break; 470 } 471 472#undef ADD_MOD 473 474 if (!mods) { 475 *mod_count = current_mod; 476 return true; 477 } 478 479 bool complete = current_mod <= *mod_count; 480 *mod_count = MIN2(*mod_count, current_mod); 481 return complete; 482} 483 484static void *ADDR_API allocSysMem(const ADDR_ALLOCSYSMEM_INPUT *pInput) 485{ 486 return malloc(pInput->sizeInBytes); 487} 488 489static ADDR_E_RETURNCODE ADDR_API freeSysMem(const ADDR_FREESYSMEM_INPUT *pInput) 490{ 491 free(pInput->pVirtAddr); 492 return ADDR_OK; 493} 494 495struct ac_addrlib *ac_addrlib_create(const struct radeon_info *info, 496 uint64_t *max_alignment) 497{ 498 ADDR_CREATE_INPUT addrCreateInput = {0}; 499 ADDR_CREATE_OUTPUT addrCreateOutput = {0}; 500 ADDR_REGISTER_VALUE regValue = {0}; 501 ADDR_CREATE_FLAGS createFlags = {{0}}; 502 ADDR_GET_MAX_ALIGNMENTS_OUTPUT addrGetMaxAlignmentsOutput = {0}; 503 ADDR_E_RETURNCODE addrRet; 504 505 addrCreateInput.size = sizeof(ADDR_CREATE_INPUT); 506 addrCreateOutput.size = sizeof(ADDR_CREATE_OUTPUT); 507 508 regValue.gbAddrConfig = info->gb_addr_config; 509 createFlags.value = 0; 510 511 addrCreateInput.chipFamily = info->family_id; 512 addrCreateInput.chipRevision = info->chip_external_rev; 513 514 if (addrCreateInput.chipFamily == FAMILY_UNKNOWN) 515 return NULL; 516 517 if (addrCreateInput.chipFamily >= FAMILY_AI) { 518 addrCreateInput.chipEngine = CIASICIDGFXENGINE_ARCTICISLAND; 519 } else { 520 regValue.noOfBanks = info->mc_arb_ramcfg & 0x3; 521 regValue.noOfRanks = (info->mc_arb_ramcfg & 0x4) >> 2; 522 523 regValue.backendDisables = info->enabled_rb_mask; 524 regValue.pTileConfig = info->si_tile_mode_array; 525 regValue.noOfEntries = ARRAY_SIZE(info->si_tile_mode_array); 526 if (addrCreateInput.chipFamily == FAMILY_SI) { 527 regValue.pMacroTileConfig = NULL; 528 regValue.noOfMacroEntries = 0; 529 } else { 530 regValue.pMacroTileConfig = info->cik_macrotile_mode_array; 531 regValue.noOfMacroEntries = ARRAY_SIZE(info->cik_macrotile_mode_array); 532 } 533 534 createFlags.useTileIndex = 1; 535 createFlags.useHtileSliceAlign = 1; 536 537 addrCreateInput.chipEngine = CIASICIDGFXENGINE_SOUTHERNISLAND; 538 } 539 540 addrCreateInput.callbacks.allocSysMem = allocSysMem; 541 addrCreateInput.callbacks.freeSysMem = freeSysMem; 542 addrCreateInput.callbacks.debugPrint = 0; 543 addrCreateInput.createFlags = createFlags; 544 addrCreateInput.regValue = regValue; 545 546 addrRet = AddrCreate(&addrCreateInput, &addrCreateOutput); 547 if (addrRet != ADDR_OK) 548 return NULL; 549 550 if (max_alignment) { 551 addrRet = AddrGetMaxAlignments(addrCreateOutput.hLib, &addrGetMaxAlignmentsOutput); 552 if (addrRet == ADDR_OK) { 553 *max_alignment = addrGetMaxAlignmentsOutput.baseAlign; 554 } 555 } 556 557 struct ac_addrlib *addrlib = calloc(1, sizeof(struct ac_addrlib)); 558 if (!addrlib) { 559 AddrDestroy(addrCreateOutput.hLib); 560 return NULL; 561 } 562 563 addrlib->handle = addrCreateOutput.hLib; 564 simple_mtx_init(&addrlib->lock, mtx_plain); 565 return addrlib; 566} 567 568void ac_addrlib_destroy(struct ac_addrlib *addrlib) 569{ 570 simple_mtx_destroy(&addrlib->lock); 571 AddrDestroy(addrlib->handle); 572 free(addrlib); 573} 574 575void *ac_addrlib_get_handle(struct ac_addrlib *addrlib) 576{ 577 return addrlib->handle; 578} 579 580static int surf_config_sanity(const struct ac_surf_config *config, unsigned flags) 581{ 582 /* FMASK is allocated together with the color surface and can't be 583 * allocated separately. 584 */ 585 assert(!(flags & RADEON_SURF_FMASK)); 586 if (flags & RADEON_SURF_FMASK) 587 return -EINVAL; 588 589 /* all dimension must be at least 1 ! */ 590 if (!config->info.width || !config->info.height || !config->info.depth || 591 !config->info.array_size || !config->info.levels) 592 return -EINVAL; 593 594 switch (config->info.samples) { 595 case 0: 596 case 1: 597 case 2: 598 case 4: 599 case 8: 600 break; 601 case 16: 602 if (flags & RADEON_SURF_Z_OR_SBUFFER) 603 return -EINVAL; 604 break; 605 default: 606 return -EINVAL; 607 } 608 609 if (!(flags & RADEON_SURF_Z_OR_SBUFFER)) { 610 switch (config->info.storage_samples) { 611 case 0: 612 case 1: 613 case 2: 614 case 4: 615 case 8: 616 break; 617 default: 618 return -EINVAL; 619 } 620 } 621 622 if (config->is_3d && config->info.array_size > 1) 623 return -EINVAL; 624 if (config->is_cube && config->info.depth > 1) 625 return -EINVAL; 626 627 return 0; 628} 629 630static int gfx6_compute_level(ADDR_HANDLE addrlib, const struct ac_surf_config *config, 631 struct radeon_surf *surf, bool is_stencil, unsigned level, 632 bool compressed, ADDR_COMPUTE_SURFACE_INFO_INPUT *AddrSurfInfoIn, 633 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *AddrSurfInfoOut, 634 ADDR_COMPUTE_DCCINFO_INPUT *AddrDccIn, 635 ADDR_COMPUTE_DCCINFO_OUTPUT *AddrDccOut, 636 ADDR_COMPUTE_HTILE_INFO_INPUT *AddrHtileIn, 637 ADDR_COMPUTE_HTILE_INFO_OUTPUT *AddrHtileOut) 638{ 639 struct legacy_surf_level *surf_level; 640 struct legacy_surf_dcc_level *dcc_level; 641 ADDR_E_RETURNCODE ret; 642 643 AddrSurfInfoIn->mipLevel = level; 644 AddrSurfInfoIn->width = u_minify(config->info.width, level); 645 AddrSurfInfoIn->height = u_minify(config->info.height, level); 646 647 /* Make GFX6 linear surfaces compatible with GFX9 for hybrid graphics, 648 * because GFX9 needs linear alignment of 256 bytes. 649 */ 650 if (config->info.levels == 1 && AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED && 651 AddrSurfInfoIn->bpp && util_is_power_of_two_or_zero(AddrSurfInfoIn->bpp)) { 652 unsigned alignment = 256 / (AddrSurfInfoIn->bpp / 8); 653 654 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, alignment); 655 } 656 657 /* addrlib assumes the bytes/pixel is a divisor of 64, which is not 658 * true for r32g32b32 formats. */ 659 if (AddrSurfInfoIn->bpp == 96) { 660 assert(config->info.levels == 1); 661 assert(AddrSurfInfoIn->tileMode == ADDR_TM_LINEAR_ALIGNED); 662 663 /* The least common multiple of 64 bytes and 12 bytes/pixel is 664 * 192 bytes, or 16 pixels. */ 665 AddrSurfInfoIn->width = align(AddrSurfInfoIn->width, 16); 666 } 667 668 if (config->is_3d) 669 AddrSurfInfoIn->numSlices = u_minify(config->info.depth, level); 670 else if (config->is_cube) 671 AddrSurfInfoIn->numSlices = 6; 672 else 673 AddrSurfInfoIn->numSlices = config->info.array_size; 674 675 if (level > 0) { 676 /* Set the base level pitch. This is needed for calculation 677 * of non-zero levels. */ 678 if (is_stencil) 679 AddrSurfInfoIn->basePitch = surf->u.legacy.zs.stencil_level[0].nblk_x; 680 else 681 AddrSurfInfoIn->basePitch = surf->u.legacy.level[0].nblk_x; 682 683 /* Convert blocks to pixels for compressed formats. */ 684 if (compressed) 685 AddrSurfInfoIn->basePitch *= surf->blk_w; 686 } 687 688 ret = AddrComputeSurfaceInfo(addrlib, AddrSurfInfoIn, AddrSurfInfoOut); 689 if (ret != ADDR_OK) { 690 return ret; 691 } 692 693 surf_level = is_stencil ? &surf->u.legacy.zs.stencil_level[level] : &surf->u.legacy.level[level]; 694 dcc_level = &surf->u.legacy.color.dcc_level[level]; 695 surf_level->offset_256B = align64(surf->surf_size, AddrSurfInfoOut->baseAlign) / 256; 696 surf_level->slice_size_dw = AddrSurfInfoOut->sliceSize / 4; 697 surf_level->nblk_x = AddrSurfInfoOut->pitch; 698 surf_level->nblk_y = AddrSurfInfoOut->height; 699 700 switch (AddrSurfInfoOut->tileMode) { 701 case ADDR_TM_LINEAR_ALIGNED: 702 surf_level->mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 703 break; 704 case ADDR_TM_1D_TILED_THIN1: 705 case ADDR_TM_PRT_TILED_THIN1: 706 surf_level->mode = RADEON_SURF_MODE_1D; 707 break; 708 case ADDR_TM_2D_TILED_THIN1: 709 case ADDR_TM_PRT_2D_TILED_THIN1: 710 surf_level->mode = RADEON_SURF_MODE_2D; 711 break; 712 default: 713 assert(0); 714 } 715 716 if (is_stencil) 717 surf->u.legacy.zs.stencil_tiling_index[level] = AddrSurfInfoOut->tileIndex; 718 else 719 surf->u.legacy.tiling_index[level] = AddrSurfInfoOut->tileIndex; 720 721 if (AddrSurfInfoIn->flags.prt) { 722 if (level == 0) { 723 surf->prt_tile_width = AddrSurfInfoOut->pitchAlign; 724 surf->prt_tile_height = AddrSurfInfoOut->heightAlign; 725 surf->prt_tile_depth = AddrSurfInfoOut->depthAlign; 726 } 727 if (surf_level->nblk_x >= surf->prt_tile_width && 728 surf_level->nblk_y >= surf->prt_tile_height) { 729 /* +1 because the current level is not in the miptail */ 730 surf->first_mip_tail_level = level + 1; 731 } 732 } 733 734 surf->surf_size = (uint64_t)surf_level->offset_256B * 256 + AddrSurfInfoOut->surfSize; 735 736 /* Clear DCC fields at the beginning. */ 737 if (!AddrSurfInfoIn->flags.depth && !AddrSurfInfoIn->flags.stencil) 738 dcc_level->dcc_offset = 0; 739 740 /* The previous level's flag tells us if we can use DCC for this level. */ 741 if (AddrSurfInfoIn->flags.dccCompatible && (level == 0 || AddrDccOut->subLvlCompressible)) { 742 bool prev_level_clearable = level == 0 || AddrDccOut->dccRamSizeAligned; 743 744 AddrDccIn->colorSurfSize = AddrSurfInfoOut->surfSize; 745 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 746 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 747 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 748 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 749 750 ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); 751 752 if (ret == ADDR_OK) { 753 dcc_level->dcc_offset = surf->meta_size; 754 surf->num_meta_levels = level + 1; 755 surf->meta_size = dcc_level->dcc_offset + AddrDccOut->dccRamSize; 756 surf->meta_alignment_log2 = MAX2(surf->meta_alignment_log2, util_logbase2(AddrDccOut->dccRamBaseAlign)); 757 758 /* If the DCC size of a subresource (1 mip level or 1 slice) 759 * is not aligned, the DCC memory layout is not contiguous for 760 * that subresource, which means we can't use fast clear. 761 * 762 * We only do fast clears for whole mipmap levels. If we did 763 * per-slice fast clears, the same restriction would apply. 764 * (i.e. only compute the slice size and see if it's aligned) 765 * 766 * The last level can be non-contiguous and still be clearable 767 * if it's interleaved with the next level that doesn't exist. 768 */ 769 if (AddrDccOut->dccRamSizeAligned || 770 (prev_level_clearable && level == config->info.levels - 1)) 771 dcc_level->dcc_fast_clear_size = AddrDccOut->dccFastClearSize; 772 else 773 dcc_level->dcc_fast_clear_size = 0; 774 775 /* Compute the DCC slice size because addrlib doesn't 776 * provide this info. As DCC memory is linear (each 777 * slice is the same size) it's easy to compute. 778 */ 779 surf->meta_slice_size = AddrDccOut->dccRamSize / config->info.array_size; 780 781 /* For arrays, we have to compute the DCC info again 782 * with one slice size to get a correct fast clear 783 * size. 784 */ 785 if (config->info.array_size > 1) { 786 AddrDccIn->colorSurfSize = AddrSurfInfoOut->sliceSize; 787 AddrDccIn->tileMode = AddrSurfInfoOut->tileMode; 788 AddrDccIn->tileInfo = *AddrSurfInfoOut->pTileInfo; 789 AddrDccIn->tileIndex = AddrSurfInfoOut->tileIndex; 790 AddrDccIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 791 792 ret = AddrComputeDccInfo(addrlib, AddrDccIn, AddrDccOut); 793 if (ret == ADDR_OK) { 794 /* If the DCC memory isn't properly 795 * aligned, the data are interleaved 796 * accross slices. 797 */ 798 if (AddrDccOut->dccRamSizeAligned) 799 dcc_level->dcc_slice_fast_clear_size = AddrDccOut->dccFastClearSize; 800 else 801 dcc_level->dcc_slice_fast_clear_size = 0; 802 } 803 804 if (surf->flags & RADEON_SURF_CONTIGUOUS_DCC_LAYERS && 805 surf->meta_slice_size != dcc_level->dcc_slice_fast_clear_size) { 806 surf->meta_size = 0; 807 surf->num_meta_levels = 0; 808 AddrDccOut->subLvlCompressible = false; 809 } 810 } else { 811 dcc_level->dcc_slice_fast_clear_size = dcc_level->dcc_fast_clear_size; 812 } 813 } 814 } 815 816 /* HTILE. */ 817 if (!is_stencil && AddrSurfInfoIn->flags.depth && surf_level->mode == RADEON_SURF_MODE_2D && 818 level == 0 && !(surf->flags & RADEON_SURF_NO_HTILE)) { 819 AddrHtileIn->flags.tcCompatible = AddrSurfInfoOut->tcCompatible; 820 AddrHtileIn->pitch = AddrSurfInfoOut->pitch; 821 AddrHtileIn->height = AddrSurfInfoOut->height; 822 AddrHtileIn->numSlices = AddrSurfInfoOut->depth; 823 AddrHtileIn->blockWidth = ADDR_HTILE_BLOCKSIZE_8; 824 AddrHtileIn->blockHeight = ADDR_HTILE_BLOCKSIZE_8; 825 AddrHtileIn->pTileInfo = AddrSurfInfoOut->pTileInfo; 826 AddrHtileIn->tileIndex = AddrSurfInfoOut->tileIndex; 827 AddrHtileIn->macroModeIndex = AddrSurfInfoOut->macroModeIndex; 828 829 ret = AddrComputeHtileInfo(addrlib, AddrHtileIn, AddrHtileOut); 830 831 if (ret == ADDR_OK) { 832 surf->meta_size = AddrHtileOut->htileBytes; 833 surf->meta_slice_size = AddrHtileOut->sliceSize; 834 surf->meta_alignment_log2 = util_logbase2(AddrHtileOut->baseAlign); 835 surf->meta_pitch = AddrHtileOut->pitch; 836 surf->num_meta_levels = level + 1; 837 } 838 } 839 840 return 0; 841} 842 843static void gfx6_set_micro_tile_mode(struct radeon_surf *surf, const struct radeon_info *info) 844{ 845 uint32_t tile_mode = info->si_tile_mode_array[surf->u.legacy.tiling_index[0]]; 846 847 if (info->gfx_level >= GFX7) 848 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE_NEW(tile_mode); 849 else 850 surf->micro_tile_mode = G_009910_MICRO_TILE_MODE(tile_mode); 851} 852 853static unsigned cik_get_macro_tile_index(struct radeon_surf *surf) 854{ 855 unsigned index, tileb; 856 857 tileb = 8 * 8 * surf->bpe; 858 tileb = MIN2(surf->u.legacy.tile_split, tileb); 859 860 for (index = 0; tileb > 64; index++) 861 tileb >>= 1; 862 863 assert(index < 16); 864 return index; 865} 866 867static bool get_display_flag(const struct ac_surf_config *config, const struct radeon_surf *surf) 868{ 869 unsigned num_channels = config->info.num_channels; 870 unsigned bpe = surf->bpe; 871 872 /* With modifiers the kernel is in charge of whether it is displayable. 873 * We need to ensure at least 32 pixels pitch alignment, but this is 874 * always the case when the blocksize >= 4K. 875 */ 876 if (surf->modifier != DRM_FORMAT_MOD_INVALID) 877 return false; 878 879 if (!config->is_1d && !config->is_3d && !config->is_cube && 880 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 881 surf->flags & RADEON_SURF_SCANOUT && config->info.samples <= 1 && surf->blk_w <= 2 && 882 surf->blk_h == 1) { 883 /* subsampled */ 884 if (surf->blk_w == 2 && surf->blk_h == 1) 885 return true; 886 887 if (/* RGBA8 or RGBA16F */ 888 (bpe >= 4 && bpe <= 8 && num_channels == 4) || 889 /* R5G6B5 or R5G5B5A1 */ 890 (bpe == 2 && num_channels >= 3) || 891 /* C8 palette */ 892 (bpe == 1 && num_channels == 1)) 893 return true; 894 } 895 return false; 896} 897 898/** 899 * This must be called after the first level is computed. 900 * 901 * Copy surface-global settings like pipe/bank config from level 0 surface 902 * computation, and compute tile swizzle. 903 */ 904static int gfx6_surface_settings(ADDR_HANDLE addrlib, const struct radeon_info *info, 905 const struct ac_surf_config *config, 906 ADDR_COMPUTE_SURFACE_INFO_OUTPUT *csio, struct radeon_surf *surf) 907{ 908 surf->surf_alignment_log2 = util_logbase2(csio->baseAlign); 909 surf->u.legacy.pipe_config = csio->pTileInfo->pipeConfig - 1; 910 gfx6_set_micro_tile_mode(surf, info); 911 912 /* For 2D modes only. */ 913 if (csio->tileMode >= ADDR_TM_2D_TILED_THIN1) { 914 surf->u.legacy.bankw = csio->pTileInfo->bankWidth; 915 surf->u.legacy.bankh = csio->pTileInfo->bankHeight; 916 surf->u.legacy.mtilea = csio->pTileInfo->macroAspectRatio; 917 surf->u.legacy.tile_split = csio->pTileInfo->tileSplitBytes; 918 surf->u.legacy.num_banks = csio->pTileInfo->banks; 919 surf->u.legacy.macro_tile_index = csio->macroModeIndex; 920 } else { 921 surf->u.legacy.macro_tile_index = 0; 922 } 923 924 /* Compute tile swizzle. */ 925 /* TODO: fix tile swizzle with mipmapping for GFX6 */ 926 if ((info->gfx_level >= GFX7 || config->info.levels == 1) && config->info.surf_index && 927 surf->u.legacy.level[0].mode == RADEON_SURF_MODE_2D && 928 !(surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_SHAREABLE)) && 929 !get_display_flag(config, surf)) { 930 ADDR_COMPUTE_BASE_SWIZZLE_INPUT AddrBaseSwizzleIn = {0}; 931 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT AddrBaseSwizzleOut = {0}; 932 933 AddrBaseSwizzleIn.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 934 AddrBaseSwizzleOut.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 935 936 AddrBaseSwizzleIn.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 937 AddrBaseSwizzleIn.tileIndex = csio->tileIndex; 938 AddrBaseSwizzleIn.macroModeIndex = csio->macroModeIndex; 939 AddrBaseSwizzleIn.pTileInfo = csio->pTileInfo; 940 AddrBaseSwizzleIn.tileMode = csio->tileMode; 941 942 int r = AddrComputeBaseSwizzle(addrlib, &AddrBaseSwizzleIn, &AddrBaseSwizzleOut); 943 if (r != ADDR_OK) 944 return r; 945 946 assert(AddrBaseSwizzleOut.tileSwizzle <= 947 u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 948 surf->tile_swizzle = AddrBaseSwizzleOut.tileSwizzle; 949 } 950 return 0; 951} 952 953static void ac_compute_cmask(const struct radeon_info *info, const struct ac_surf_config *config, 954 struct radeon_surf *surf) 955{ 956 unsigned pipe_interleave_bytes = info->pipe_interleave_bytes; 957 unsigned num_pipes = info->num_tile_pipes; 958 unsigned cl_width, cl_height; 959 960 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER || surf->is_linear || 961 (config->info.samples >= 2 && !surf->fmask_size)) 962 return; 963 964 assert(info->gfx_level <= GFX8); 965 966 switch (num_pipes) { 967 case 2: 968 cl_width = 32; 969 cl_height = 16; 970 break; 971 case 4: 972 cl_width = 32; 973 cl_height = 32; 974 break; 975 case 8: 976 cl_width = 64; 977 cl_height = 32; 978 break; 979 case 16: /* Hawaii */ 980 cl_width = 64; 981 cl_height = 64; 982 break; 983 default: 984 assert(0); 985 return; 986 } 987 988 unsigned base_align = num_pipes * pipe_interleave_bytes; 989 990 unsigned width = align(surf->u.legacy.level[0].nblk_x, cl_width * 8); 991 unsigned height = align(surf->u.legacy.level[0].nblk_y, cl_height * 8); 992 unsigned slice_elements = (width * height) / (8 * 8); 993 994 /* Each element of CMASK is a nibble. */ 995 unsigned slice_bytes = slice_elements / 2; 996 997 surf->u.legacy.color.cmask_slice_tile_max = (width * height) / (128 * 128); 998 if (surf->u.legacy.color.cmask_slice_tile_max) 999 surf->u.legacy.color.cmask_slice_tile_max -= 1; 1000 1001 unsigned num_layers; 1002 if (config->is_3d) 1003 num_layers = config->info.depth; 1004 else if (config->is_cube) 1005 num_layers = 6; 1006 else 1007 num_layers = config->info.array_size; 1008 1009 surf->cmask_alignment_log2 = util_logbase2(MAX2(256, base_align)); 1010 surf->cmask_slice_size = align(slice_bytes, base_align); 1011 surf->cmask_size = surf->cmask_slice_size * num_layers; 1012} 1013 1014/** 1015 * Fill in the tiling information in \p surf based on the given surface config. 1016 * 1017 * The following fields of \p surf must be initialized by the caller: 1018 * blk_w, blk_h, bpe, flags. 1019 */ 1020static int gfx6_compute_surface(ADDR_HANDLE addrlib, const struct radeon_info *info, 1021 const struct ac_surf_config *config, enum radeon_surf_mode mode, 1022 struct radeon_surf *surf) 1023{ 1024 unsigned level; 1025 bool compressed; 1026 ADDR_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 1027 ADDR_COMPUTE_SURFACE_INFO_OUTPUT AddrSurfInfoOut = {0}; 1028 ADDR_COMPUTE_DCCINFO_INPUT AddrDccIn = {0}; 1029 ADDR_COMPUTE_DCCINFO_OUTPUT AddrDccOut = {0}; 1030 ADDR_COMPUTE_HTILE_INFO_INPUT AddrHtileIn = {0}; 1031 ADDR_COMPUTE_HTILE_INFO_OUTPUT AddrHtileOut = {0}; 1032 ADDR_TILEINFO AddrTileInfoIn = {0}; 1033 ADDR_TILEINFO AddrTileInfoOut = {0}; 1034 int r; 1035 1036 AddrSurfInfoIn.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_INPUT); 1037 AddrSurfInfoOut.size = sizeof(ADDR_COMPUTE_SURFACE_INFO_OUTPUT); 1038 AddrDccIn.size = sizeof(ADDR_COMPUTE_DCCINFO_INPUT); 1039 AddrDccOut.size = sizeof(ADDR_COMPUTE_DCCINFO_OUTPUT); 1040 AddrHtileIn.size = sizeof(ADDR_COMPUTE_HTILE_INFO_INPUT); 1041 AddrHtileOut.size = sizeof(ADDR_COMPUTE_HTILE_INFO_OUTPUT); 1042 AddrSurfInfoOut.pTileInfo = &AddrTileInfoOut; 1043 1044 compressed = surf->blk_w == 4 && surf->blk_h == 4; 1045 1046 /* MSAA requires 2D tiling. */ 1047 if (config->info.samples > 1) 1048 mode = RADEON_SURF_MODE_2D; 1049 1050 /* DB doesn't support linear layouts. */ 1051 if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER) && mode < RADEON_SURF_MODE_1D) 1052 mode = RADEON_SURF_MODE_1D; 1053 1054 /* Set the requested tiling mode. */ 1055 switch (mode) { 1056 case RADEON_SURF_MODE_LINEAR_ALIGNED: 1057 AddrSurfInfoIn.tileMode = ADDR_TM_LINEAR_ALIGNED; 1058 break; 1059 case RADEON_SURF_MODE_1D: 1060 if (surf->flags & RADEON_SURF_PRT) 1061 AddrSurfInfoIn.tileMode = ADDR_TM_PRT_TILED_THIN1; 1062 else 1063 AddrSurfInfoIn.tileMode = ADDR_TM_1D_TILED_THIN1; 1064 break; 1065 case RADEON_SURF_MODE_2D: 1066 if (surf->flags & RADEON_SURF_PRT) 1067 AddrSurfInfoIn.tileMode = ADDR_TM_PRT_2D_TILED_THIN1; 1068 else 1069 AddrSurfInfoIn.tileMode = ADDR_TM_2D_TILED_THIN1; 1070 break; 1071 default: 1072 assert(0); 1073 } 1074 1075 /* The format must be set correctly for the allocation of compressed 1076 * textures to work. In other cases, setting the bpp is sufficient. 1077 */ 1078 if (compressed) { 1079 switch (surf->bpe) { 1080 case 8: 1081 AddrSurfInfoIn.format = ADDR_FMT_BC1; 1082 break; 1083 case 16: 1084 AddrSurfInfoIn.format = ADDR_FMT_BC3; 1085 break; 1086 default: 1087 assert(0); 1088 } 1089 } else { 1090 AddrDccIn.bpp = AddrSurfInfoIn.bpp = surf->bpe * 8; 1091 } 1092 1093 AddrDccIn.numSamples = AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 1094 AddrSurfInfoIn.tileIndex = -1; 1095 1096 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) { 1097 AddrDccIn.numSamples = AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 1098 } 1099 1100 /* Set the micro tile type. */ 1101 if (surf->flags & RADEON_SURF_SCANOUT) 1102 AddrSurfInfoIn.tileType = ADDR_DISPLAYABLE; 1103 else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 1104 AddrSurfInfoIn.tileType = ADDR_DEPTH_SAMPLE_ORDER; 1105 else 1106 AddrSurfInfoIn.tileType = ADDR_NON_DISPLAYABLE; 1107 1108 AddrSurfInfoIn.flags.color = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 1109 AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 1110 AddrSurfInfoIn.flags.cube = config->is_cube; 1111 AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 1112 AddrSurfInfoIn.flags.pow2Pad = config->info.levels > 1; 1113 AddrSurfInfoIn.flags.tcCompatible = (surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE) != 0; 1114 AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0; 1115 1116 /* Only degrade the tile mode for space if TC-compatible HTILE hasn't been 1117 * requested, because TC-compatible HTILE requires 2D tiling. 1118 */ 1119 AddrSurfInfoIn.flags.opt4Space = !AddrSurfInfoIn.flags.tcCompatible && 1120 !AddrSurfInfoIn.flags.fmask && config->info.samples <= 1 && 1121 !(surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE); 1122 1123 /* DCC notes: 1124 * - If we add MSAA support, keep in mind that CB can't decompress 8bpp 1125 * with samples >= 4. 1126 * - Mipmapped array textures have low performance (discovered by a closed 1127 * driver team). 1128 */ 1129 AddrSurfInfoIn.flags.dccCompatible = 1130 info->gfx_level >= GFX8 && info->has_graphics && /* disable DCC on compute-only chips */ 1131 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && !(surf->flags & RADEON_SURF_DISABLE_DCC) && 1132 !compressed && 1133 ((config->info.array_size == 1 && config->info.depth == 1) || config->info.levels == 1); 1134 1135 AddrSurfInfoIn.flags.noStencil = (surf->flags & RADEON_SURF_SBUFFER) == 0; 1136 AddrSurfInfoIn.flags.compressZ = !!(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 1137 1138 /* On GFX7-GFX8, the DB uses the same pitch and tile mode (except tilesplit) 1139 * for Z and stencil. This can cause a number of problems which we work 1140 * around here: 1141 * 1142 * - a depth part that is incompatible with mipmapped texturing 1143 * - at least on Stoney, entirely incompatible Z/S aspects (e.g. 1144 * incorrect tiling applied to the stencil part, stencil buffer 1145 * memory accesses that go out of bounds) even without mipmapping 1146 * 1147 * Some piglit tests that are prone to different types of related 1148 * failures: 1149 * ./bin/ext_framebuffer_multisample-upsample 2 stencil 1150 * ./bin/framebuffer-blit-levels {draw,read} stencil 1151 * ./bin/ext_framebuffer_multisample-unaligned-blit N {depth,stencil} {msaa,upsample,downsample} 1152 * ./bin/fbo-depth-array fs-writes-{depth,stencil} / {depth,stencil}-{clear,layered-clear,draw} 1153 * ./bin/depthstencil-render-miplevels 1024 d=s=z24_s8 1154 */ 1155 int stencil_tile_idx = -1; 1156 1157 if (AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.noStencil && 1158 (config->info.levels > 1 || info->family == CHIP_STONEY)) { 1159 /* Compute stencilTileIdx that is compatible with the (depth) 1160 * tileIdx. This degrades the depth surface if necessary to 1161 * ensure that a matching stencilTileIdx exists. */ 1162 AddrSurfInfoIn.flags.matchStencilTileCfg = 1; 1163 1164 /* Keep the depth mip-tail compatible with texturing. */ 1165 AddrSurfInfoIn.flags.noStencil = 1; 1166 } 1167 1168 /* Set preferred macrotile parameters. This is usually required 1169 * for shared resources. This is for 2D tiling only. */ 1170 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 1171 AddrSurfInfoIn.tileMode >= ADDR_TM_2D_TILED_THIN1 && surf->u.legacy.bankw && 1172 surf->u.legacy.bankh && surf->u.legacy.mtilea && surf->u.legacy.tile_split) { 1173 /* If any of these parameters are incorrect, the calculation 1174 * will fail. */ 1175 AddrTileInfoIn.banks = surf->u.legacy.num_banks; 1176 AddrTileInfoIn.bankWidth = surf->u.legacy.bankw; 1177 AddrTileInfoIn.bankHeight = surf->u.legacy.bankh; 1178 AddrTileInfoIn.macroAspectRatio = surf->u.legacy.mtilea; 1179 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.tile_split; 1180 AddrTileInfoIn.pipeConfig = surf->u.legacy.pipe_config + 1; /* +1 compared to GB_TILE_MODE */ 1181 AddrSurfInfoIn.flags.opt4Space = 0; 1182 AddrSurfInfoIn.pTileInfo = &AddrTileInfoIn; 1183 1184 /* If AddrSurfInfoIn.pTileInfo is set, Addrlib doesn't set 1185 * the tile index, because we are expected to know it if 1186 * we know the other parameters. 1187 * 1188 * This is something that can easily be fixed in Addrlib. 1189 * For now, just figure it out here. 1190 * Note that only 2D_TILE_THIN1 is handled here. 1191 */ 1192 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 1193 assert(AddrSurfInfoIn.tileMode == ADDR_TM_2D_TILED_THIN1); 1194 1195 if (info->gfx_level == GFX6) { 1196 if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) { 1197 if (surf->bpe == 2) 1198 AddrSurfInfoIn.tileIndex = 11; /* 16bpp */ 1199 else 1200 AddrSurfInfoIn.tileIndex = 12; /* 32bpp */ 1201 } else { 1202 if (surf->bpe == 1) 1203 AddrSurfInfoIn.tileIndex = 14; /* 8bpp */ 1204 else if (surf->bpe == 2) 1205 AddrSurfInfoIn.tileIndex = 15; /* 16bpp */ 1206 else if (surf->bpe == 4) 1207 AddrSurfInfoIn.tileIndex = 16; /* 32bpp */ 1208 else 1209 AddrSurfInfoIn.tileIndex = 17; /* 64bpp (and 128bpp) */ 1210 } 1211 } else { 1212 /* GFX7 - GFX8 */ 1213 if (AddrSurfInfoIn.tileType == ADDR_DISPLAYABLE) 1214 AddrSurfInfoIn.tileIndex = 10; /* 2D displayable */ 1215 else 1216 AddrSurfInfoIn.tileIndex = 14; /* 2D non-displayable */ 1217 1218 /* Addrlib doesn't set this if tileIndex is forced like above. */ 1219 AddrSurfInfoOut.macroModeIndex = cik_get_macro_tile_index(surf); 1220 } 1221 } 1222 1223 surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 1224 surf->num_meta_levels = 0; 1225 surf->surf_size = 0; 1226 surf->meta_size = 0; 1227 surf->meta_slice_size = 0; 1228 surf->meta_alignment_log2 = 0; 1229 1230 const bool only_stencil = 1231 (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); 1232 1233 /* Calculate texture layout information. */ 1234 if (!only_stencil) { 1235 for (level = 0; level < config->info.levels; level++) { 1236 r = gfx6_compute_level(addrlib, config, surf, false, level, compressed, &AddrSurfInfoIn, 1237 &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, &AddrHtileIn, 1238 &AddrHtileOut); 1239 if (r) 1240 return r; 1241 1242 if (level > 0) 1243 continue; 1244 1245 if (!AddrSurfInfoOut.tcCompatible) { 1246 AddrSurfInfoIn.flags.tcCompatible = 0; 1247 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 1248 } 1249 1250 if (AddrSurfInfoIn.flags.matchStencilTileCfg) { 1251 AddrSurfInfoIn.flags.matchStencilTileCfg = 0; 1252 AddrSurfInfoIn.tileIndex = AddrSurfInfoOut.tileIndex; 1253 stencil_tile_idx = AddrSurfInfoOut.stencilTileIdx; 1254 1255 assert(stencil_tile_idx >= 0); 1256 } 1257 1258 r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf); 1259 if (r) 1260 return r; 1261 } 1262 } 1263 1264 /* Calculate texture layout information for stencil. */ 1265 if (surf->flags & RADEON_SURF_SBUFFER) { 1266 AddrSurfInfoIn.tileIndex = stencil_tile_idx; 1267 AddrSurfInfoIn.bpp = 8; 1268 AddrSurfInfoIn.flags.depth = 0; 1269 AddrSurfInfoIn.flags.stencil = 1; 1270 AddrSurfInfoIn.flags.tcCompatible = 0; 1271 /* This will be ignored if AddrSurfInfoIn.pTileInfo is NULL. */ 1272 AddrTileInfoIn.tileSplitBytes = surf->u.legacy.stencil_tile_split; 1273 1274 for (level = 0; level < config->info.levels; level++) { 1275 r = gfx6_compute_level(addrlib, config, surf, true, level, compressed, &AddrSurfInfoIn, 1276 &AddrSurfInfoOut, &AddrDccIn, &AddrDccOut, NULL, NULL); 1277 if (r) 1278 return r; 1279 1280 /* DB uses the depth pitch for both stencil and depth. */ 1281 if (!only_stencil) { 1282 if (surf->u.legacy.zs.stencil_level[level].nblk_x != surf->u.legacy.level[level].nblk_x) 1283 surf->u.legacy.stencil_adjusted = true; 1284 } else { 1285 surf->u.legacy.level[level].nblk_x = surf->u.legacy.zs.stencil_level[level].nblk_x; 1286 } 1287 1288 if (level == 0) { 1289 if (only_stencil) { 1290 r = gfx6_surface_settings(addrlib, info, config, &AddrSurfInfoOut, surf); 1291 if (r) 1292 return r; 1293 } 1294 1295 /* For 2D modes only. */ 1296 if (AddrSurfInfoOut.tileMode >= ADDR_TM_2D_TILED_THIN1) { 1297 surf->u.legacy.stencil_tile_split = AddrSurfInfoOut.pTileInfo->tileSplitBytes; 1298 } 1299 } 1300 } 1301 } 1302 1303 /* Compute FMASK. */ 1304 if (config->info.samples >= 2 && AddrSurfInfoIn.flags.color && info->has_graphics && 1305 !(surf->flags & RADEON_SURF_NO_FMASK)) { 1306 ADDR_COMPUTE_FMASK_INFO_INPUT fin = {0}; 1307 ADDR_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 1308 ADDR_TILEINFO fmask_tile_info = {0}; 1309 1310 fin.size = sizeof(fin); 1311 fout.size = sizeof(fout); 1312 1313 fin.tileMode = AddrSurfInfoOut.tileMode; 1314 fin.pitch = AddrSurfInfoOut.pitch; 1315 fin.height = config->info.height; 1316 fin.numSlices = AddrSurfInfoIn.numSlices; 1317 fin.numSamples = AddrSurfInfoIn.numSamples; 1318 fin.numFrags = AddrSurfInfoIn.numFrags; 1319 fin.tileIndex = -1; 1320 fout.pTileInfo = &fmask_tile_info; 1321 1322 r = AddrComputeFmaskInfo(addrlib, &fin, &fout); 1323 if (r) 1324 return r; 1325 1326 surf->fmask_size = fout.fmaskBytes; 1327 surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign); 1328 surf->fmask_slice_size = fout.sliceSize; 1329 surf->fmask_tile_swizzle = 0; 1330 1331 surf->u.legacy.color.fmask.slice_tile_max = (fout.pitch * fout.height) / 64; 1332 if (surf->u.legacy.color.fmask.slice_tile_max) 1333 surf->u.legacy.color.fmask.slice_tile_max -= 1; 1334 1335 surf->u.legacy.color.fmask.tiling_index = fout.tileIndex; 1336 surf->u.legacy.color.fmask.bankh = fout.pTileInfo->bankHeight; 1337 surf->u.legacy.color.fmask.pitch_in_pixels = fout.pitch; 1338 1339 /* Compute tile swizzle for FMASK. */ 1340 if (config->info.fmask_surf_index && !(surf->flags & RADEON_SURF_SHAREABLE)) { 1341 ADDR_COMPUTE_BASE_SWIZZLE_INPUT xin = {0}; 1342 ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT xout = {0}; 1343 1344 xin.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_INPUT); 1345 xout.size = sizeof(ADDR_COMPUTE_BASE_SWIZZLE_OUTPUT); 1346 1347 /* This counter starts from 1 instead of 0. */ 1348 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 1349 xin.tileIndex = fout.tileIndex; 1350 xin.macroModeIndex = fout.macroModeIndex; 1351 xin.pTileInfo = fout.pTileInfo; 1352 xin.tileMode = fin.tileMode; 1353 1354 int r = AddrComputeBaseSwizzle(addrlib, &xin, &xout); 1355 if (r != ADDR_OK) 1356 return r; 1357 1358 assert(xout.tileSwizzle <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 1359 surf->fmask_tile_swizzle = xout.tileSwizzle; 1360 } 1361 } 1362 1363 /* Recalculate the whole DCC miptree size including disabled levels. 1364 * This is what addrlib does, but calling addrlib would be a lot more 1365 * complicated. 1366 */ 1367 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_size && config->info.levels > 1) { 1368 /* The smallest miplevels that are never compressed by DCC 1369 * still read the DCC buffer via TC if the base level uses DCC, 1370 * and for some reason the DCC buffer needs to be larger if 1371 * the miptree uses non-zero tile_swizzle. Otherwise there are 1372 * VM faults. 1373 * 1374 * "dcc_alignment * 4" was determined by trial and error. 1375 */ 1376 surf->meta_size = align64(surf->surf_size >> 8, (1 << surf->meta_alignment_log2) * 4); 1377 } 1378 1379 /* Make sure HTILE covers the whole miptree, because the shader reads 1380 * TC-compatible HTILE even for levels where it's disabled by DB. 1381 */ 1382 if (surf->flags & (RADEON_SURF_Z_OR_SBUFFER | RADEON_SURF_TC_COMPATIBLE_HTILE) && 1383 surf->meta_size && config->info.levels > 1) { 1384 /* MSAA can't occur with levels > 1, so ignore the sample count. */ 1385 const unsigned total_pixels = surf->surf_size / surf->bpe; 1386 const unsigned htile_block_size = 8 * 8; 1387 const unsigned htile_element_size = 4; 1388 1389 surf->meta_size = (total_pixels / htile_block_size) * htile_element_size; 1390 surf->meta_size = align(surf->meta_size, 1 << surf->meta_alignment_log2); 1391 } else if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && !surf->meta_size) { 1392 /* Unset this if HTILE is not present. */ 1393 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 1394 } 1395 1396 surf->is_linear = surf->u.legacy.level[0].mode == RADEON_SURF_MODE_LINEAR_ALIGNED; 1397 surf->is_displayable = surf->is_linear || surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY || 1398 surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER; 1399 1400 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 1401 * used at the same time. This case is not currently expected to occur 1402 * because we don't use rotated. Enforce this restriction on all chips 1403 * to facilitate testing. 1404 */ 1405 if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) { 1406 assert(!"rotate micro tile mode is unsupported"); 1407 return ADDR_ERROR; 1408 } 1409 1410 ac_compute_cmask(info, config, surf); 1411 return 0; 1412} 1413 1414/* This is only called when expecting a tiled layout. */ 1415static int gfx9_get_preferred_swizzle_mode(ADDR_HANDLE addrlib, const struct radeon_info *info, 1416 struct radeon_surf *surf, 1417 ADDR2_COMPUTE_SURFACE_INFO_INPUT *in, bool is_fmask, 1418 AddrSwizzleMode *swizzle_mode) 1419{ 1420 ADDR_E_RETURNCODE ret; 1421 ADDR2_GET_PREFERRED_SURF_SETTING_INPUT sin = {0}; 1422 ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT sout = {0}; 1423 1424 sin.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_INPUT); 1425 sout.size = sizeof(ADDR2_GET_PREFERRED_SURF_SETTING_OUTPUT); 1426 1427 sin.flags = in->flags; 1428 sin.resourceType = in->resourceType; 1429 sin.format = in->format; 1430 sin.resourceLoction = ADDR_RSRC_LOC_INVIS; 1431 1432 /* TODO: We could allow some of these: */ 1433 sin.forbiddenBlock.micro = 1; /* don't allow the 256B swizzle modes */ 1434 1435 if (info->gfx_level >= GFX11) { 1436 /* Disable 256K on APUs because it doesn't work with DAL. */ 1437 if (!info->has_dedicated_vram) { 1438 sin.forbiddenBlock.gfx11.thin256KB = 1; 1439 sin.forbiddenBlock.gfx11.thick256KB = 1; 1440 } 1441 } else { 1442 sin.forbiddenBlock.var = 1; /* don't allow the variable-sized swizzle modes */ 1443 } 1444 1445 sin.bpp = in->bpp; 1446 sin.width = in->width; 1447 sin.height = in->height; 1448 sin.numSlices = in->numSlices; 1449 sin.numMipLevels = in->numMipLevels; 1450 sin.numSamples = in->numSamples; 1451 sin.numFrags = in->numFrags; 1452 1453 if (is_fmask) { 1454 sin.flags.display = 0; 1455 sin.flags.color = 0; 1456 sin.flags.fmask = 1; 1457 } 1458 1459 /* With PRT images we want to force 64 KiB block size so that the image 1460 * created is consistent with the format properties returned in Vulkan 1461 * independent of the image. */ 1462 if (sin.flags.prt) { 1463 sin.forbiddenBlock.macroThin4KB = 1; 1464 sin.forbiddenBlock.macroThick4KB = 1; 1465 if (info->gfx_level >= GFX11) { 1466 sin.forbiddenBlock.gfx11.thin256KB = 1; 1467 sin.forbiddenBlock.gfx11.thick256KB = 1; 1468 } 1469 sin.forbiddenBlock.linear = 1; 1470 } 1471 1472 if (surf->flags & RADEON_SURF_FORCE_MICRO_TILE_MODE) { 1473 sin.forbiddenBlock.linear = 1; 1474 1475 if (surf->micro_tile_mode == RADEON_MICRO_MODE_DISPLAY) 1476 sin.preferredSwSet.sw_D = 1; 1477 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_STANDARD) 1478 sin.preferredSwSet.sw_S = 1; 1479 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_DEPTH) 1480 sin.preferredSwSet.sw_Z = 1; 1481 else if (surf->micro_tile_mode == RADEON_MICRO_MODE_RENDER) 1482 sin.preferredSwSet.sw_R = 1; 1483 } 1484 1485 if (info->gfx_level >= GFX10 && in->resourceType == ADDR_RSRC_TEX_3D && in->numSlices > 1) { 1486 /* 3D textures should use S swizzle modes for the best performance. 1487 * THe only exception is 3D render targets, which prefer 64KB_D_X. 1488 * 1489 * 3D texture sampler performance with a very large 3D texture: 1490 * ADDR_SW_64KB_R_X = 19 FPS (DCC on), 26 FPS (DCC off) 1491 * ADDR_SW_64KB_Z_X = 25 FPS 1492 * ADDR_SW_64KB_D_X = 53 FPS 1493 * ADDR_SW_4KB_S = 53 FPS 1494 * ADDR_SW_64KB_S = 53 FPS 1495 * ADDR_SW_64KB_S_T = 61 FPS 1496 * ADDR_SW_4KB_S_X = 63 FPS 1497 * ADDR_SW_64KB_S_X = 62 FPS 1498 */ 1499 sin.preferredSwSet.sw_S = 1; 1500 } 1501 1502 ret = Addr2GetPreferredSurfaceSetting(addrlib, &sin, &sout); 1503 if (ret != ADDR_OK) 1504 return ret; 1505 1506 *swizzle_mode = sout.swizzleMode; 1507 return 0; 1508} 1509 1510static bool is_dcc_supported_by_CB(const struct radeon_info *info, unsigned sw_mode) 1511{ 1512 if (info->gfx_level >= GFX11) 1513 return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X || 1514 sw_mode == ADDR_SW_256KB_Z_X || sw_mode == ADDR_SW_256KB_R_X; 1515 1516 if (info->gfx_level >= GFX10) 1517 return sw_mode == ADDR_SW_64KB_Z_X || sw_mode == ADDR_SW_64KB_R_X; 1518 1519 return sw_mode != ADDR_SW_LINEAR; 1520} 1521 1522ASSERTED static bool is_dcc_supported_by_L2(const struct radeon_info *info, 1523 const struct radeon_surf *surf) 1524{ 1525 if (info->gfx_level <= GFX9) { 1526 /* Only independent 64B blocks are supported. */ 1527 return surf->u.gfx9.color.dcc.independent_64B_blocks && !surf->u.gfx9.color.dcc.independent_128B_blocks && 1528 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B; 1529 } 1530 1531 if (info->family == CHIP_NAVI10) { 1532 /* Only independent 128B blocks are supported. */ 1533 return !surf->u.gfx9.color.dcc.independent_64B_blocks && surf->u.gfx9.color.dcc.independent_128B_blocks && 1534 surf->u.gfx9.color.dcc.max_compressed_block_size <= V_028C78_MAX_BLOCK_SIZE_128B; 1535 } 1536 1537 bool valid_64b = surf->u.gfx9.color.dcc.independent_64B_blocks && 1538 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B; 1539 bool valid_128b = surf->u.gfx9.color.dcc.independent_128B_blocks && 1540 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_128B; 1541 1542 if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) { 1543 /* Either 64B or 128B can be used, but not both. 1544 * If 64B is used, DCC image stores are unsupported. 1545 */ 1546 return surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks && 1547 (valid_64b || valid_128b); 1548 } 1549 1550 /* Valid settings are the same as NAVI14 + (64B && 128B && max_compressed_block_size == 64B) */ 1551 return (surf->u.gfx9.color.dcc.independent_64B_blocks != surf->u.gfx9.color.dcc.independent_128B_blocks && 1552 (valid_64b || valid_128b)) || 1553 (surf->u.gfx9.color.dcc.independent_64B_blocks && 1554 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 1555} 1556 1557static bool gfx10_DCN_requires_independent_64B_blocks(const struct radeon_info *info, 1558 const struct ac_surf_config *config) 1559{ 1560 assert(info->gfx_level >= GFX10); 1561 1562 /* Older kernels have buggy DAL. */ 1563 if (info->drm_minor <= 43) 1564 return true; 1565 1566 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */ 1567 return config->info.width > 2560 || config->info.height > 2560; 1568} 1569 1570void ac_modifier_max_extent(const struct radeon_info *info, 1571 uint64_t modifier, uint32_t *width, uint32_t *height) 1572{ 1573 /* DCC is supported with any size. The maximum width per display pipe is 5760, but multiple 1574 * display pipes can be used to drive the display. 1575 */ 1576 *width = 16384; 1577 *height = 16384; 1578 1579 if (ac_modifier_has_dcc(modifier)) { 1580 bool independent_64B_blocks = AMD_FMT_MOD_GET(DCC_INDEPENDENT_64B, modifier); 1581 1582 if (info->gfx_level >= GFX10 && !independent_64B_blocks) { 1583 /* For 4K, DCN requires INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B. */ 1584 *width = 2560; 1585 *height = 2560; 1586 } 1587 } 1588} 1589 1590static bool is_dcc_supported_by_DCN(const struct radeon_info *info, 1591 const struct ac_surf_config *config, 1592 const struct radeon_surf *surf, bool rb_aligned, 1593 bool pipe_aligned) 1594{ 1595 if (!info->use_display_dcc_unaligned && !info->use_display_dcc_with_retile_blit) 1596 return false; 1597 1598 /* 16bpp and 64bpp are more complicated, so they are disallowed for now. */ 1599 if (surf->bpe != 4) 1600 return false; 1601 1602 /* Handle unaligned DCC. */ 1603 if (info->use_display_dcc_unaligned && (rb_aligned || pipe_aligned)) 1604 return false; 1605 1606 switch (info->gfx_level) { 1607 case GFX6: 1608 case GFX7: 1609 case GFX8: 1610 /* We can get here due to SI_FORCE_FAMILY. */ 1611 return false; 1612 case GFX9: 1613 /* There are more constraints, but we always set 1614 * INDEPENDENT_64B_BLOCKS = 1 and MAX_COMPRESSED_BLOCK_SIZE = 64B, 1615 * which always works. 1616 */ 1617 assert(surf->u.gfx9.color.dcc.independent_64B_blocks && 1618 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B); 1619 return true; 1620 case GFX10: 1621 case GFX10_3: 1622 case GFX11: 1623 /* DCN requires INDEPENDENT_128B_BLOCKS = 0 only on Navi1x. */ 1624 if (info->gfx_level == GFX10 && surf->u.gfx9.color.dcc.independent_128B_blocks) 1625 return false; 1626 1627 return (!gfx10_DCN_requires_independent_64B_blocks(info, config) || 1628 (surf->u.gfx9.color.dcc.independent_64B_blocks && 1629 surf->u.gfx9.color.dcc.max_compressed_block_size == V_028C78_MAX_BLOCK_SIZE_64B)); 1630 default: 1631 unreachable("unhandled chip"); 1632 return false; 1633 } 1634} 1635 1636static void ac_copy_dcc_equation(const struct radeon_info *info, 1637 ADDR2_COMPUTE_DCCINFO_OUTPUT *dcc, 1638 struct gfx9_meta_equation *equation) 1639{ 1640 equation->meta_block_width = dcc->metaBlkWidth; 1641 equation->meta_block_height = dcc->metaBlkHeight; 1642 equation->meta_block_depth = dcc->metaBlkDepth; 1643 1644 if (info->gfx_level >= GFX10) { 1645 /* gfx9_meta_equation doesn't store the first 4 and the last 8 elements. They must be 0. */ 1646 for (unsigned i = 0; i < 4; i++) 1647 assert(dcc->equation.gfx10_bits[i] == 0); 1648 1649 for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 4; i < 68; i++) 1650 assert(dcc->equation.gfx10_bits[i] == 0); 1651 1652 memcpy(equation->u.gfx10_bits, dcc->equation.gfx10_bits + 4, 1653 sizeof(equation->u.gfx10_bits)); 1654 } else { 1655 assert(dcc->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit)); 1656 1657 equation->u.gfx9.num_bits = dcc->equation.gfx9.num_bits; 1658 equation->u.gfx9.num_pipe_bits = dcc->equation.gfx9.numPipeBits; 1659 for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) { 1660 for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) { 1661 equation->u.gfx9.bit[b].coord[c].dim = dcc->equation.gfx9.bit[b].coord[c].dim; 1662 equation->u.gfx9.bit[b].coord[c].ord = dcc->equation.gfx9.bit[b].coord[c].ord; 1663 } 1664 } 1665 } 1666} 1667 1668static void ac_copy_cmask_equation(const struct radeon_info *info, 1669 ADDR2_COMPUTE_CMASK_INFO_OUTPUT *cmask, 1670 struct gfx9_meta_equation *equation) 1671{ 1672 equation->meta_block_width = cmask->metaBlkWidth; 1673 equation->meta_block_height = cmask->metaBlkHeight; 1674 equation->meta_block_depth = 1; 1675 1676 if (info->gfx_level == GFX9) { 1677 assert(cmask->equation.gfx9.num_bits <= ARRAY_SIZE(equation->u.gfx9.bit)); 1678 1679 equation->u.gfx9.num_bits = cmask->equation.gfx9.num_bits; 1680 equation->u.gfx9.num_pipe_bits = cmask->equation.gfx9.numPipeBits; 1681 for (unsigned b = 0; b < ARRAY_SIZE(equation->u.gfx9.bit); b++) { 1682 for (unsigned c = 0; c < ARRAY_SIZE(equation->u.gfx9.bit[b].coord); c++) { 1683 equation->u.gfx9.bit[b].coord[c].dim = cmask->equation.gfx9.bit[b].coord[c].dim; 1684 equation->u.gfx9.bit[b].coord[c].ord = cmask->equation.gfx9.bit[b].coord[c].ord; 1685 } 1686 } 1687 } 1688} 1689 1690static void ac_copy_htile_equation(const struct radeon_info *info, 1691 ADDR2_COMPUTE_HTILE_INFO_OUTPUT *htile, 1692 struct gfx9_meta_equation *equation) 1693{ 1694 equation->meta_block_width = htile->metaBlkWidth; 1695 equation->meta_block_height = htile->metaBlkHeight; 1696 1697 /* gfx9_meta_equation doesn't store the first 8 and the last 4 elements. They must be 0. */ 1698 for (unsigned i = 0; i < 8; i++) 1699 assert(htile->equation.gfx10_bits[i] == 0); 1700 1701 for (unsigned i = ARRAY_SIZE(equation->u.gfx10_bits) + 8; i < 72; i++) 1702 assert(htile->equation.gfx10_bits[i] == 0); 1703 1704 memcpy(equation->u.gfx10_bits, htile->equation.gfx10_bits + 8, 1705 sizeof(equation->u.gfx10_bits)); 1706} 1707 1708static int gfx9_compute_miptree(struct ac_addrlib *addrlib, const struct radeon_info *info, 1709 const struct ac_surf_config *config, struct radeon_surf *surf, 1710 bool compressed, ADDR2_COMPUTE_SURFACE_INFO_INPUT *in) 1711{ 1712 ADDR2_MIP_INFO mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1713 ADDR2_COMPUTE_SURFACE_INFO_OUTPUT out = {0}; 1714 ADDR_E_RETURNCODE ret; 1715 1716 out.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_OUTPUT); 1717 out.pMipInfo = mip_info; 1718 1719 ret = Addr2ComputeSurfaceInfo(addrlib->handle, in, &out); 1720 if (ret != ADDR_OK) 1721 return ret; 1722 1723 if (in->flags.prt) { 1724 surf->prt_tile_width = out.blockWidth; 1725 surf->prt_tile_height = out.blockHeight; 1726 surf->prt_tile_depth = out.blockSlices; 1727 1728 surf->first_mip_tail_level = out.firstMipIdInTail; 1729 1730 for (unsigned i = 0; i < in->numMipLevels; i++) { 1731 surf->u.gfx9.prt_level_offset[i] = mip_info[i].macroBlockOffset + mip_info[i].mipTailOffset; 1732 1733 if (info->gfx_level >= GFX10) 1734 surf->u.gfx9.prt_level_pitch[i] = mip_info[i].pitch; 1735 else 1736 surf->u.gfx9.prt_level_pitch[i] = out.mipChainPitch; 1737 } 1738 } 1739 1740 if (in->flags.stencil) { 1741 surf->u.gfx9.zs.stencil_swizzle_mode = in->swizzleMode; 1742 surf->u.gfx9.zs.stencil_epitch = 1743 out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1; 1744 surf->surf_alignment_log2 = MAX2(surf->surf_alignment_log2, util_logbase2(out.baseAlign)); 1745 surf->u.gfx9.zs.stencil_offset = align(surf->surf_size, out.baseAlign); 1746 surf->surf_size = surf->u.gfx9.zs.stencil_offset + out.surfSize; 1747 return 0; 1748 } 1749 1750 surf->u.gfx9.swizzle_mode = in->swizzleMode; 1751 surf->u.gfx9.epitch = out.epitchIsHeight ? out.mipChainHeight - 1 : out.mipChainPitch - 1; 1752 1753 /* CMASK fast clear uses these even if FMASK isn't allocated. 1754 * FMASK only supports the Z swizzle modes, whose numbers are multiples of 4. 1755 */ 1756 if (!in->flags.depth) { 1757 surf->u.gfx9.color.fmask_swizzle_mode = surf->u.gfx9.swizzle_mode & ~0x3; 1758 surf->u.gfx9.color.fmask_epitch = surf->u.gfx9.epitch; 1759 } 1760 1761 surf->u.gfx9.surf_slice_size = out.sliceSize; 1762 surf->u.gfx9.surf_pitch = out.pitch; 1763 surf->u.gfx9.surf_height = out.height; 1764 surf->surf_size = out.surfSize; 1765 surf->surf_alignment_log2 = util_logbase2(out.baseAlign); 1766 1767 if (!compressed && surf->blk_w > 1 && out.pitch == out.pixelPitch && 1768 surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) { 1769 /* Adjust surf_pitch to be in elements units not in pixels */ 1770 surf->u.gfx9.surf_pitch = align(surf->u.gfx9.surf_pitch / surf->blk_w, 256 / surf->bpe); 1771 surf->u.gfx9.epitch = 1772 MAX2(surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch * surf->blk_w - 1); 1773 /* The surface is really a surf->bpe bytes per pixel surface even if we 1774 * use it as a surf->bpe bytes per element one. 1775 * Adjust surf_slice_size and surf_size to reflect the change 1776 * made to surf_pitch. 1777 */ 1778 surf->u.gfx9.surf_slice_size = 1779 MAX2(surf->u.gfx9.surf_slice_size, 1780 surf->u.gfx9.surf_pitch * out.height * surf->bpe * surf->blk_w); 1781 surf->surf_size = surf->u.gfx9.surf_slice_size * in->numSlices; 1782 } 1783 1784 if (in->swizzleMode == ADDR_SW_LINEAR) { 1785 int alignment = 256 / surf->bpe; 1786 for (unsigned i = 0; i < in->numMipLevels; i++) { 1787 surf->u.gfx9.offset[i] = mip_info[i].offset; 1788 /* Adjust pitch like we did for surf_pitch */ 1789 surf->u.gfx9.pitch[i] = align(mip_info[i].pitch / surf->blk_w, alignment); 1790 } 1791 } 1792 1793 surf->u.gfx9.base_mip_width = mip_info[0].pitch; 1794 surf->u.gfx9.base_mip_height = mip_info[0].height; 1795 1796 if (in->flags.depth) { 1797 assert(in->swizzleMode != ADDR_SW_LINEAR); 1798 1799 if (surf->flags & RADEON_SURF_NO_HTILE) 1800 return 0; 1801 1802 /* HTILE */ 1803 ADDR2_COMPUTE_HTILE_INFO_INPUT hin = {0}; 1804 ADDR2_COMPUTE_HTILE_INFO_OUTPUT hout = {0}; 1805 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1806 1807 hin.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_INPUT); 1808 hout.size = sizeof(ADDR2_COMPUTE_HTILE_INFO_OUTPUT); 1809 hout.pMipInfo = meta_mip_info; 1810 1811 assert(in->flags.metaPipeUnaligned == 0); 1812 assert(in->flags.metaRbUnaligned == 0); 1813 1814 hin.hTileFlags.pipeAligned = 1; 1815 hin.hTileFlags.rbAligned = 1; 1816 hin.depthFlags = in->flags; 1817 hin.swizzleMode = in->swizzleMode; 1818 hin.unalignedWidth = in->width; 1819 hin.unalignedHeight = in->height; 1820 hin.numSlices = in->numSlices; 1821 hin.numMipLevels = in->numMipLevels; 1822 hin.firstMipIdInTail = out.firstMipIdInTail; 1823 1824 ret = Addr2ComputeHtileInfo(addrlib->handle, &hin, &hout); 1825 if (ret != ADDR_OK) 1826 return ret; 1827 1828 surf->meta_size = hout.htileBytes; 1829 surf->meta_slice_size = hout.sliceSize; 1830 surf->meta_alignment_log2 = util_logbase2(hout.baseAlign); 1831 surf->meta_pitch = hout.pitch; 1832 surf->num_meta_levels = in->numMipLevels; 1833 1834 for (unsigned i = 0; i < in->numMipLevels; i++) { 1835 surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; 1836 surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize; 1837 1838 if (meta_mip_info[i].inMiptail) { 1839 /* GFX10 can only compress the first level 1840 * in the mip tail. 1841 */ 1842 surf->num_meta_levels = i + 1; 1843 break; 1844 } 1845 } 1846 1847 if (!surf->num_meta_levels) 1848 surf->meta_size = 0; 1849 1850 if (info->gfx_level >= GFX10) 1851 ac_copy_htile_equation(info, &hout, &surf->u.gfx9.zs.htile_equation); 1852 return 0; 1853 } 1854 1855 { 1856 /* Compute tile swizzle for the color surface. 1857 * All *_X and *_T modes can use the swizzle. 1858 */ 1859 if (config->info.surf_index && in->swizzleMode >= ADDR_SW_64KB_Z_T && !out.mipChainInTail && 1860 !(surf->flags & RADEON_SURF_SHAREABLE) && !in->flags.display) { 1861 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 1862 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 1863 1864 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 1865 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 1866 1867 xin.surfIndex = p_atomic_inc_return(config->info.surf_index) - 1; 1868 xin.flags = in->flags; 1869 xin.swizzleMode = in->swizzleMode; 1870 xin.resourceType = in->resourceType; 1871 xin.format = in->format; 1872 xin.numSamples = in->numSamples; 1873 xin.numFrags = in->numFrags; 1874 1875 ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout); 1876 if (ret != ADDR_OK) 1877 return ret; 1878 1879 assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->tile_swizzle) * 8)); 1880 surf->tile_swizzle = xout.pipeBankXor; 1881 } 1882 1883 bool use_dcc = false; 1884 if (surf->modifier != DRM_FORMAT_MOD_INVALID) { 1885 use_dcc = ac_modifier_has_dcc(surf->modifier); 1886 } else { 1887 use_dcc = info->has_graphics && !(surf->flags & RADEON_SURF_DISABLE_DCC) && !compressed && 1888 is_dcc_supported_by_CB(info, in->swizzleMode) && 1889 (!in->flags.display || 1890 is_dcc_supported_by_DCN(info, config, surf, !in->flags.metaRbUnaligned, 1891 !in->flags.metaPipeUnaligned)); 1892 } 1893 1894 /* DCC */ 1895 if (use_dcc) { 1896 ADDR2_COMPUTE_DCCINFO_INPUT din = {0}; 1897 ADDR2_COMPUTE_DCCINFO_OUTPUT dout = {0}; 1898 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 1899 1900 din.size = sizeof(ADDR2_COMPUTE_DCCINFO_INPUT); 1901 dout.size = sizeof(ADDR2_COMPUTE_DCCINFO_OUTPUT); 1902 dout.pMipInfo = meta_mip_info; 1903 1904 din.dccKeyFlags.pipeAligned = !in->flags.metaPipeUnaligned; 1905 din.dccKeyFlags.rbAligned = !in->flags.metaRbUnaligned; 1906 din.resourceType = in->resourceType; 1907 din.swizzleMode = in->swizzleMode; 1908 din.bpp = in->bpp; 1909 din.unalignedWidth = in->width; 1910 din.unalignedHeight = in->height; 1911 din.numSlices = in->numSlices; 1912 din.numFrags = in->numFrags; 1913 din.numMipLevels = in->numMipLevels; 1914 din.dataSurfaceSize = out.surfSize; 1915 din.firstMipIdInTail = out.firstMipIdInTail; 1916 1917 if (info->gfx_level == GFX9) 1918 simple_mtx_lock(&addrlib->lock); 1919 ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout); 1920 if (info->gfx_level == GFX9) 1921 simple_mtx_unlock(&addrlib->lock); 1922 1923 if (ret != ADDR_OK) 1924 return ret; 1925 1926 surf->u.gfx9.color.dcc.rb_aligned = din.dccKeyFlags.rbAligned; 1927 surf->u.gfx9.color.dcc.pipe_aligned = din.dccKeyFlags.pipeAligned; 1928 surf->u.gfx9.color.dcc_block_width = dout.compressBlkWidth; 1929 surf->u.gfx9.color.dcc_block_height = dout.compressBlkHeight; 1930 surf->u.gfx9.color.dcc_block_depth = dout.compressBlkDepth; 1931 surf->u.gfx9.color.dcc_pitch_max = dout.pitch - 1; 1932 surf->u.gfx9.color.dcc_height = dout.height; 1933 surf->meta_size = dout.dccRamSize; 1934 surf->meta_slice_size = dout.dccRamSliceSize; 1935 surf->meta_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); 1936 surf->num_meta_levels = in->numMipLevels; 1937 1938 /* Disable DCC for levels that are in the mip tail. 1939 * 1940 * There are two issues that this is intended to 1941 * address: 1942 * 1943 * 1. Multiple mip levels may share a cache line. This 1944 * can lead to corruption when switching between 1945 * rendering to different mip levels because the 1946 * RBs don't maintain coherency. 1947 * 1948 * 2. Texturing with metadata after rendering sometimes 1949 * fails with corruption, probably for a similar 1950 * reason. 1951 * 1952 * Working around these issues for all levels in the 1953 * mip tail may be overly conservative, but it's what 1954 * Vulkan does. 1955 * 1956 * Alternative solutions that also work but are worse: 1957 * - Disable DCC entirely. 1958 * - Flush TC L2 after rendering. 1959 */ 1960 for (unsigned i = 0; i < in->numMipLevels; i++) { 1961 surf->u.gfx9.meta_levels[i].offset = meta_mip_info[i].offset; 1962 surf->u.gfx9.meta_levels[i].size = meta_mip_info[i].sliceSize; 1963 1964 if (meta_mip_info[i].inMiptail) { 1965 /* GFX10 can only compress the first level 1966 * in the mip tail. 1967 * 1968 * TODO: Try to do the same thing for gfx9 1969 * if there are no regressions. 1970 */ 1971 if (info->gfx_level >= GFX10) 1972 surf->num_meta_levels = i + 1; 1973 else 1974 surf->num_meta_levels = i; 1975 break; 1976 } 1977 } 1978 1979 if (!surf->num_meta_levels) 1980 surf->meta_size = 0; 1981 1982 surf->u.gfx9.color.display_dcc_size = surf->meta_size; 1983 surf->u.gfx9.color.display_dcc_alignment_log2 = surf->meta_alignment_log2; 1984 surf->u.gfx9.color.display_dcc_pitch_max = surf->u.gfx9.color.dcc_pitch_max; 1985 surf->u.gfx9.color.display_dcc_height = surf->u.gfx9.color.dcc_height; 1986 1987 if (in->resourceType == ADDR_RSRC_TEX_2D) 1988 ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.dcc_equation); 1989 1990 /* Compute displayable DCC. */ 1991 if (((in->flags.display && info->use_display_dcc_with_retile_blit) || 1992 ac_modifier_has_dcc_retile(surf->modifier)) && surf->num_meta_levels) { 1993 /* Compute displayable DCC info. */ 1994 din.dccKeyFlags.pipeAligned = 0; 1995 din.dccKeyFlags.rbAligned = 0; 1996 1997 assert(din.numSlices == 1); 1998 assert(din.numMipLevels == 1); 1999 assert(din.numFrags == 1); 2000 assert(surf->tile_swizzle == 0); 2001 assert(surf->u.gfx9.color.dcc.pipe_aligned || surf->u.gfx9.color.dcc.rb_aligned); 2002 2003 if (info->gfx_level == GFX9) 2004 simple_mtx_lock(&addrlib->lock); 2005 ret = Addr2ComputeDccInfo(addrlib->handle, &din, &dout); 2006 if (info->gfx_level == GFX9) 2007 simple_mtx_unlock(&addrlib->lock); 2008 2009 if (ret != ADDR_OK) 2010 return ret; 2011 2012 surf->u.gfx9.color.display_dcc_size = dout.dccRamSize; 2013 surf->u.gfx9.color.display_dcc_alignment_log2 = util_logbase2(dout.dccRamBaseAlign); 2014 surf->u.gfx9.color.display_dcc_pitch_max = dout.pitch - 1; 2015 surf->u.gfx9.color.display_dcc_height = dout.height; 2016 assert(surf->u.gfx9.color.display_dcc_size <= surf->meta_size); 2017 2018 ac_copy_dcc_equation(info, &dout, &surf->u.gfx9.color.display_dcc_equation); 2019 surf->u.gfx9.color.dcc.display_equation_valid = true; 2020 } 2021 } 2022 2023 /* FMASK (it doesn't exist on GFX11) */ 2024 if (info->gfx_level <= GFX10_3 && info->has_graphics && 2025 in->numSamples > 1 && !(surf->flags & RADEON_SURF_NO_FMASK)) { 2026 ADDR2_COMPUTE_FMASK_INFO_INPUT fin = {0}; 2027 ADDR2_COMPUTE_FMASK_INFO_OUTPUT fout = {0}; 2028 2029 fin.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_INPUT); 2030 fout.size = sizeof(ADDR2_COMPUTE_FMASK_INFO_OUTPUT); 2031 2032 ret = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, in, true, &fin.swizzleMode); 2033 if (ret != ADDR_OK) 2034 return ret; 2035 2036 fin.unalignedWidth = in->width; 2037 fin.unalignedHeight = in->height; 2038 fin.numSlices = in->numSlices; 2039 fin.numSamples = in->numSamples; 2040 fin.numFrags = in->numFrags; 2041 2042 ret = Addr2ComputeFmaskInfo(addrlib->handle, &fin, &fout); 2043 if (ret != ADDR_OK) 2044 return ret; 2045 2046 surf->u.gfx9.color.fmask_swizzle_mode = fin.swizzleMode; 2047 surf->u.gfx9.color.fmask_epitch = fout.pitch - 1; 2048 surf->fmask_size = fout.fmaskBytes; 2049 surf->fmask_alignment_log2 = util_logbase2(fout.baseAlign); 2050 surf->fmask_slice_size = fout.sliceSize; 2051 2052 /* Compute tile swizzle for the FMASK surface. */ 2053 if (config->info.fmask_surf_index && fin.swizzleMode >= ADDR_SW_64KB_Z_T && 2054 !(surf->flags & RADEON_SURF_SHAREABLE)) { 2055 ADDR2_COMPUTE_PIPEBANKXOR_INPUT xin = {0}; 2056 ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT xout = {0}; 2057 2058 xin.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_INPUT); 2059 xout.size = sizeof(ADDR2_COMPUTE_PIPEBANKXOR_OUTPUT); 2060 2061 /* This counter starts from 1 instead of 0. */ 2062 xin.surfIndex = p_atomic_inc_return(config->info.fmask_surf_index); 2063 xin.flags = in->flags; 2064 xin.swizzleMode = fin.swizzleMode; 2065 xin.resourceType = in->resourceType; 2066 xin.format = in->format; 2067 xin.numSamples = in->numSamples; 2068 xin.numFrags = in->numFrags; 2069 2070 ret = Addr2ComputePipeBankXor(addrlib->handle, &xin, &xout); 2071 if (ret != ADDR_OK) 2072 return ret; 2073 2074 assert(xout.pipeBankXor <= u_bit_consecutive(0, sizeof(surf->fmask_tile_swizzle) * 8)); 2075 surf->fmask_tile_swizzle = xout.pipeBankXor; 2076 } 2077 } 2078 2079 /* CMASK -- on GFX10 only for FMASK (and it doesn't exist on GFX11) */ 2080 if (info->gfx_level <= GFX10_3 && info->has_graphics && 2081 in->swizzleMode != ADDR_SW_LINEAR && in->resourceType == ADDR_RSRC_TEX_2D && 2082 ((info->gfx_level <= GFX9 && in->numSamples == 1 && in->flags.metaPipeUnaligned == 0 && 2083 in->flags.metaRbUnaligned == 0) || 2084 (surf->fmask_size && in->numSamples >= 2))) { 2085 ADDR2_COMPUTE_CMASK_INFO_INPUT cin = {0}; 2086 ADDR2_COMPUTE_CMASK_INFO_OUTPUT cout = {0}; 2087 ADDR2_META_MIP_INFO meta_mip_info[RADEON_SURF_MAX_LEVELS] = {0}; 2088 2089 cin.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_INPUT); 2090 cout.size = sizeof(ADDR2_COMPUTE_CMASK_INFO_OUTPUT); 2091 cout.pMipInfo = meta_mip_info; 2092 2093 assert(in->flags.metaPipeUnaligned == 0); 2094 assert(in->flags.metaRbUnaligned == 0); 2095 2096 cin.cMaskFlags.pipeAligned = 1; 2097 cin.cMaskFlags.rbAligned = 1; 2098 cin.resourceType = in->resourceType; 2099 cin.unalignedWidth = in->width; 2100 cin.unalignedHeight = in->height; 2101 cin.numSlices = in->numSlices; 2102 cin.numMipLevels = in->numMipLevels; 2103 cin.firstMipIdInTail = out.firstMipIdInTail; 2104 2105 if (in->numSamples > 1) 2106 cin.swizzleMode = surf->u.gfx9.color.fmask_swizzle_mode; 2107 else 2108 cin.swizzleMode = in->swizzleMode; 2109 2110 if (info->gfx_level == GFX9) 2111 simple_mtx_lock(&addrlib->lock); 2112 ret = Addr2ComputeCmaskInfo(addrlib->handle, &cin, &cout); 2113 if (info->gfx_level == GFX9) 2114 simple_mtx_unlock(&addrlib->lock); 2115 2116 if (ret != ADDR_OK) 2117 return ret; 2118 2119 surf->cmask_size = cout.cmaskBytes; 2120 surf->cmask_alignment_log2 = util_logbase2(cout.baseAlign); 2121 surf->cmask_slice_size = cout.sliceSize; 2122 surf->cmask_pitch = cout.pitch; 2123 surf->cmask_height = cout.height; 2124 surf->u.gfx9.color.cmask_level0.offset = meta_mip_info[0].offset; 2125 surf->u.gfx9.color.cmask_level0.size = meta_mip_info[0].sliceSize; 2126 2127 ac_copy_cmask_equation(info, &cout, &surf->u.gfx9.color.cmask_equation); 2128 } 2129 } 2130 2131 return 0; 2132} 2133 2134static int gfx9_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 2135 const struct ac_surf_config *config, enum radeon_surf_mode mode, 2136 struct radeon_surf *surf) 2137{ 2138 bool compressed; 2139 ADDR2_COMPUTE_SURFACE_INFO_INPUT AddrSurfInfoIn = {0}; 2140 int r; 2141 2142 AddrSurfInfoIn.size = sizeof(ADDR2_COMPUTE_SURFACE_INFO_INPUT); 2143 2144 compressed = surf->blk_w == 4 && surf->blk_h == 4; 2145 2146 /* The format must be set correctly for the allocation of compressed 2147 * textures to work. In other cases, setting the bpp is sufficient. */ 2148 if (compressed) { 2149 switch (surf->bpe) { 2150 case 8: 2151 AddrSurfInfoIn.format = ADDR_FMT_BC1; 2152 break; 2153 case 16: 2154 AddrSurfInfoIn.format = ADDR_FMT_BC3; 2155 break; 2156 default: 2157 assert(0); 2158 } 2159 } else { 2160 switch (surf->bpe) { 2161 case 1: 2162 assert(!(surf->flags & RADEON_SURF_ZBUFFER)); 2163 AddrSurfInfoIn.format = ADDR_FMT_8; 2164 break; 2165 case 2: 2166 assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER)); 2167 AddrSurfInfoIn.format = ADDR_FMT_16; 2168 break; 2169 case 4: 2170 assert(surf->flags & RADEON_SURF_ZBUFFER || !(surf->flags & RADEON_SURF_SBUFFER)); 2171 AddrSurfInfoIn.format = ADDR_FMT_32; 2172 break; 2173 case 8: 2174 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2175 AddrSurfInfoIn.format = ADDR_FMT_32_32; 2176 break; 2177 case 12: 2178 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2179 AddrSurfInfoIn.format = ADDR_FMT_32_32_32; 2180 break; 2181 case 16: 2182 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2183 AddrSurfInfoIn.format = ADDR_FMT_32_32_32_32; 2184 break; 2185 default: 2186 assert(0); 2187 } 2188 AddrSurfInfoIn.bpp = surf->bpe * 8; 2189 } 2190 2191 bool is_color_surface = !(surf->flags & RADEON_SURF_Z_OR_SBUFFER); 2192 AddrSurfInfoIn.flags.color = is_color_surface && !(surf->flags & RADEON_SURF_NO_RENDER_TARGET); 2193 AddrSurfInfoIn.flags.depth = (surf->flags & RADEON_SURF_ZBUFFER) != 0; 2194 AddrSurfInfoIn.flags.display = get_display_flag(config, surf); 2195 /* flags.texture currently refers to TC-compatible HTILE */ 2196 AddrSurfInfoIn.flags.texture = is_color_surface || surf->flags & RADEON_SURF_TC_COMPATIBLE_HTILE; 2197 AddrSurfInfoIn.flags.opt4space = 1; 2198 AddrSurfInfoIn.flags.prt = (surf->flags & RADEON_SURF_PRT) != 0; 2199 2200 AddrSurfInfoIn.numMipLevels = config->info.levels; 2201 AddrSurfInfoIn.numSamples = MAX2(1, config->info.samples); 2202 AddrSurfInfoIn.numFrags = AddrSurfInfoIn.numSamples; 2203 2204 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)) 2205 AddrSurfInfoIn.numFrags = MAX2(1, config->info.storage_samples); 2206 2207 /* GFX9 doesn't support 1D depth textures, so allocate all 1D textures 2208 * as 2D to avoid having shader variants for 1D vs 2D, so all shaders 2209 * must sample 1D textures as 2D. */ 2210 if (config->is_3d) 2211 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_3D; 2212 else if (info->gfx_level != GFX9 && config->is_1d) 2213 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_1D; 2214 else 2215 AddrSurfInfoIn.resourceType = ADDR_RSRC_TEX_2D; 2216 2217 AddrSurfInfoIn.width = config->info.width; 2218 AddrSurfInfoIn.height = config->info.height; 2219 2220 if (config->is_3d) 2221 AddrSurfInfoIn.numSlices = config->info.depth; 2222 else if (config->is_cube) 2223 AddrSurfInfoIn.numSlices = 6; 2224 else 2225 AddrSurfInfoIn.numSlices = config->info.array_size; 2226 2227 /* This is propagated to DCC. It must be 0 for HTILE and CMASK. */ 2228 AddrSurfInfoIn.flags.metaPipeUnaligned = 0; 2229 AddrSurfInfoIn.flags.metaRbUnaligned = 0; 2230 2231 if (ac_modifier_has_dcc(surf->modifier)) { 2232 ac_modifier_fill_dcc_params(surf->modifier, surf, &AddrSurfInfoIn); 2233 } else if (!AddrSurfInfoIn.flags.depth && !AddrSurfInfoIn.flags.stencil) { 2234 /* Optimal values for the L2 cache. */ 2235 /* Don't change the DCC settings for imported buffers - they might differ. */ 2236 if (!(surf->flags & RADEON_SURF_IMPORTED)) { 2237 if (info->gfx_level == GFX9) { 2238 surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 2239 surf->u.gfx9.color.dcc.independent_128B_blocks = 0; 2240 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2241 } else if (info->gfx_level >= GFX10) { 2242 surf->u.gfx9.color.dcc.independent_64B_blocks = 0; 2243 surf->u.gfx9.color.dcc.independent_128B_blocks = 1; 2244 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_128B; 2245 } 2246 } 2247 2248 if (AddrSurfInfoIn.flags.display) { 2249 /* The display hardware can only read DCC with RB_ALIGNED=0 and 2250 * PIPE_ALIGNED=0. PIPE_ALIGNED really means L2CACHE_ALIGNED. 2251 * 2252 * The CB block requires RB_ALIGNED=1 except 1 RB chips. 2253 * PIPE_ALIGNED is optional, but PIPE_ALIGNED=0 requires L2 flushes 2254 * after rendering, so PIPE_ALIGNED=1 is recommended. 2255 */ 2256 if (info->use_display_dcc_unaligned) { 2257 AddrSurfInfoIn.flags.metaPipeUnaligned = 1; 2258 AddrSurfInfoIn.flags.metaRbUnaligned = 1; 2259 } 2260 2261 /* Adjust DCC settings to meet DCN requirements. */ 2262 /* Don't change the DCC settings for imported buffers - they might differ. */ 2263 if (!(surf->flags & RADEON_SURF_IMPORTED) && 2264 (info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit)) { 2265 /* Only Navi12/14 support independent 64B blocks in L2, 2266 * but without DCC image stores. 2267 */ 2268 if (info->family == CHIP_NAVI12 || info->family == CHIP_NAVI14) { 2269 surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 2270 surf->u.gfx9.color.dcc.independent_128B_blocks = 0; 2271 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2272 } 2273 2274 if ((info->gfx_level >= GFX10_3 && info->family <= CHIP_REMBRANDT) || 2275 /* Newer chips will skip this when possible to get better performance. 2276 * This is also possible for other gfx10.3 chips, but is disabled for 2277 * interoperability between different Mesa versions. 2278 */ 2279 (info->family > CHIP_REMBRANDT && 2280 gfx10_DCN_requires_independent_64B_blocks(info, config))) { 2281 surf->u.gfx9.color.dcc.independent_64B_blocks = 1; 2282 surf->u.gfx9.color.dcc.independent_128B_blocks = 1; 2283 surf->u.gfx9.color.dcc.max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B; 2284 } 2285 } 2286 } 2287 } 2288 2289 if (surf->modifier == DRM_FORMAT_MOD_INVALID) { 2290 switch (mode) { 2291 case RADEON_SURF_MODE_LINEAR_ALIGNED: 2292 assert(config->info.samples <= 1); 2293 assert(!(surf->flags & RADEON_SURF_Z_OR_SBUFFER)); 2294 AddrSurfInfoIn.swizzleMode = ADDR_SW_LINEAR; 2295 break; 2296 2297 case RADEON_SURF_MODE_1D: 2298 case RADEON_SURF_MODE_2D: 2299 if (surf->flags & RADEON_SURF_IMPORTED || 2300 (info->gfx_level >= GFX10 && surf->flags & RADEON_SURF_FORCE_SWIZZLE_MODE)) { 2301 AddrSurfInfoIn.swizzleMode = surf->u.gfx9.swizzle_mode; 2302 break; 2303 } 2304 2305 r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false, 2306 &AddrSurfInfoIn.swizzleMode); 2307 if (r) 2308 return r; 2309 break; 2310 2311 default: 2312 assert(0); 2313 } 2314 } else { 2315 /* We have a valid and required modifier here. */ 2316 2317 assert(!compressed); 2318 assert(!ac_modifier_has_dcc(surf->modifier) || 2319 !(surf->flags & RADEON_SURF_DISABLE_DCC)); 2320 2321 AddrSurfInfoIn.swizzleMode = ac_modifier_gfx9_swizzle_mode(surf->modifier); 2322 } 2323 2324 surf->u.gfx9.resource_type = AddrSurfInfoIn.resourceType; 2325 surf->has_stencil = !!(surf->flags & RADEON_SURF_SBUFFER); 2326 2327 surf->num_meta_levels = 0; 2328 surf->surf_size = 0; 2329 surf->fmask_size = 0; 2330 surf->meta_size = 0; 2331 surf->meta_slice_size = 0; 2332 surf->u.gfx9.surf_offset = 0; 2333 if (AddrSurfInfoIn.flags.stencil) 2334 surf->u.gfx9.zs.stencil_offset = 0; 2335 surf->cmask_size = 0; 2336 2337 const bool only_stencil = 2338 (surf->flags & RADEON_SURF_SBUFFER) && !(surf->flags & RADEON_SURF_ZBUFFER); 2339 2340 /* Calculate texture layout information. */ 2341 if (!only_stencil) { 2342 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn); 2343 if (r) 2344 return r; 2345 } 2346 2347 /* Calculate texture layout information for stencil. */ 2348 if (surf->flags & RADEON_SURF_SBUFFER) { 2349 AddrSurfInfoIn.flags.stencil = 1; 2350 AddrSurfInfoIn.bpp = 8; 2351 AddrSurfInfoIn.format = ADDR_FMT_8; 2352 2353 if (!AddrSurfInfoIn.flags.depth) { 2354 r = gfx9_get_preferred_swizzle_mode(addrlib->handle, info, surf, &AddrSurfInfoIn, false, 2355 &AddrSurfInfoIn.swizzleMode); 2356 if (r) 2357 return r; 2358 } else 2359 AddrSurfInfoIn.flags.depth = 0; 2360 2361 r = gfx9_compute_miptree(addrlib, info, config, surf, compressed, &AddrSurfInfoIn); 2362 if (r) 2363 return r; 2364 } 2365 2366 surf->is_linear = surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR; 2367 2368 /* Query whether the surface is displayable. */ 2369 /* This is only useful for surfaces that are allocated without SCANOUT. */ 2370 BOOL_32 displayable = false; 2371 if (!config->is_3d && !config->is_cube) { 2372 r = Addr2IsValidDisplaySwizzleMode(addrlib->handle, surf->u.gfx9.swizzle_mode, 2373 surf->bpe * 8, &displayable); 2374 if (r) 2375 return r; 2376 2377 /* Display needs unaligned DCC. */ 2378 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 2379 surf->num_meta_levels && 2380 (!is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 2381 surf->u.gfx9.color.dcc.pipe_aligned) || 2382 /* Don't set is_displayable if displayable DCC is missing. */ 2383 (info->use_display_dcc_with_retile_blit && !surf->u.gfx9.color.dcc.display_equation_valid))) 2384 displayable = false; 2385 } 2386 surf->is_displayable = displayable; 2387 2388 /* Validate that we allocated a displayable surface if requested. */ 2389 assert(!AddrSurfInfoIn.flags.display || surf->is_displayable); 2390 2391 /* Validate that DCC is set up correctly. */ 2392 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->num_meta_levels) { 2393 assert(is_dcc_supported_by_L2(info, surf)); 2394 if (AddrSurfInfoIn.flags.color) 2395 assert(is_dcc_supported_by_CB(info, surf->u.gfx9.swizzle_mode)); 2396 if (AddrSurfInfoIn.flags.display && surf->modifier == DRM_FORMAT_MOD_INVALID) { 2397 assert(is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 2398 surf->u.gfx9.color.dcc.pipe_aligned)); 2399 } 2400 } 2401 2402 if (info->has_graphics && !compressed && !config->is_3d && config->info.levels == 1 && 2403 AddrSurfInfoIn.flags.color && !surf->is_linear && 2404 (1 << surf->surf_alignment_log2) >= 64 * 1024 && /* 64KB tiling */ 2405 !(surf->flags & (RADEON_SURF_DISABLE_DCC | RADEON_SURF_FORCE_SWIZZLE_MODE | 2406 RADEON_SURF_FORCE_MICRO_TILE_MODE)) && 2407 surf->modifier == DRM_FORMAT_MOD_INVALID && 2408 is_dcc_supported_by_DCN(info, config, surf, surf->u.gfx9.color.dcc.rb_aligned, 2409 surf->u.gfx9.color.dcc.pipe_aligned)) { 2410 /* Validate that DCC is enabled if DCN can do it. */ 2411 if ((info->use_display_dcc_unaligned || info->use_display_dcc_with_retile_blit) && 2412 AddrSurfInfoIn.flags.display && surf->bpe == 4) { 2413 assert(surf->num_meta_levels); 2414 } 2415 2416 /* Validate that non-scanout DCC is always enabled. */ 2417 if (!AddrSurfInfoIn.flags.display) 2418 assert(surf->num_meta_levels); 2419 } 2420 2421 if (!surf->meta_size) { 2422 /* Unset this if HTILE is not present. */ 2423 surf->flags &= ~RADEON_SURF_TC_COMPATIBLE_HTILE; 2424 } 2425 2426 if (surf->modifier != DRM_FORMAT_MOD_INVALID) { 2427 assert((surf->num_meta_levels != 0) == ac_modifier_has_dcc(surf->modifier)); 2428 } 2429 2430 switch (surf->u.gfx9.swizzle_mode) { 2431 /* S = standard. */ 2432 case ADDR_SW_256B_S: 2433 case ADDR_SW_4KB_S: 2434 case ADDR_SW_64KB_S: 2435 case ADDR_SW_64KB_S_T: 2436 case ADDR_SW_4KB_S_X: 2437 case ADDR_SW_64KB_S_X: 2438 case ADDR_SW_256KB_S_X: 2439 surf->micro_tile_mode = RADEON_MICRO_MODE_STANDARD; 2440 break; 2441 2442 /* D = display. */ 2443 case ADDR_SW_LINEAR: 2444 case ADDR_SW_256B_D: 2445 case ADDR_SW_4KB_D: 2446 case ADDR_SW_64KB_D: 2447 case ADDR_SW_64KB_D_T: 2448 case ADDR_SW_4KB_D_X: 2449 case ADDR_SW_64KB_D_X: 2450 case ADDR_SW_256KB_D_X: 2451 surf->micro_tile_mode = RADEON_MICRO_MODE_DISPLAY; 2452 break; 2453 2454 /* R = rotated (gfx9), render target (gfx10). */ 2455 case ADDR_SW_256B_R: 2456 case ADDR_SW_4KB_R: 2457 case ADDR_SW_64KB_R: 2458 case ADDR_SW_64KB_R_T: 2459 case ADDR_SW_4KB_R_X: 2460 case ADDR_SW_64KB_R_X: 2461 case ADDR_SW_256KB_R_X: 2462 /* The rotated micro tile mode doesn't work if both CMASK and RB+ are 2463 * used at the same time. We currently do not use rotated 2464 * in gfx9. 2465 */ 2466 assert(info->gfx_level >= GFX10 || !"rotate micro tile mode is unsupported"); 2467 surf->micro_tile_mode = RADEON_MICRO_MODE_RENDER; 2468 break; 2469 2470 /* Z = depth. */ 2471 case ADDR_SW_4KB_Z: 2472 case ADDR_SW_64KB_Z: 2473 case ADDR_SW_64KB_Z_T: 2474 case ADDR_SW_4KB_Z_X: 2475 case ADDR_SW_64KB_Z_X: 2476 case ADDR_SW_256KB_Z_X: 2477 surf->micro_tile_mode = RADEON_MICRO_MODE_DEPTH; 2478 break; 2479 2480 default: 2481 assert(0); 2482 } 2483 2484 return 0; 2485} 2486 2487int ac_compute_surface(struct ac_addrlib *addrlib, const struct radeon_info *info, 2488 const struct ac_surf_config *config, enum radeon_surf_mode mode, 2489 struct radeon_surf *surf) 2490{ 2491 int r; 2492 2493 r = surf_config_sanity(config, surf->flags); 2494 if (r) 2495 return r; 2496 2497 if (info->family_id >= FAMILY_AI) 2498 r = gfx9_compute_surface(addrlib, info, config, mode, surf); 2499 else 2500 r = gfx6_compute_surface(addrlib->handle, info, config, mode, surf); 2501 2502 if (r) 2503 return r; 2504 2505 /* Determine the memory layout of multiple allocations in one buffer. */ 2506 surf->total_size = surf->surf_size; 2507 surf->alignment_log2 = surf->surf_alignment_log2; 2508 2509 /* Ensure the offsets are always 0 if not available. */ 2510 surf->meta_offset = surf->display_dcc_offset = surf->fmask_offset = surf->cmask_offset = 0; 2511 2512 if (surf->fmask_size) { 2513 assert(config->info.samples >= 2); 2514 surf->fmask_offset = align64(surf->total_size, 1 << surf->fmask_alignment_log2); 2515 surf->total_size = surf->fmask_offset + surf->fmask_size; 2516 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->fmask_alignment_log2); 2517 } 2518 2519 /* Single-sample CMASK is in a separate buffer. */ 2520 if (surf->cmask_size && config->info.samples >= 2) { 2521 surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2); 2522 surf->total_size = surf->cmask_offset + surf->cmask_size; 2523 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2); 2524 } 2525 2526 if (surf->is_displayable) 2527 surf->flags |= RADEON_SURF_SCANOUT; 2528 2529 if (surf->meta_size && 2530 /* dcc_size is computed on GFX9+ only if it's displayable. */ 2531 (info->gfx_level >= GFX9 || !get_display_flag(config, surf))) { 2532 /* It's better when displayable DCC is immediately after 2533 * the image due to hw-specific reasons. 2534 */ 2535 if (info->gfx_level >= GFX9 && 2536 !(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && 2537 surf->u.gfx9.color.dcc.display_equation_valid) { 2538 /* Add space for the displayable DCC buffer. */ 2539 surf->display_dcc_offset = align64(surf->total_size, 1 << surf->u.gfx9.color.display_dcc_alignment_log2); 2540 surf->total_size = surf->display_dcc_offset + surf->u.gfx9.color.display_dcc_size; 2541 } 2542 2543 surf->meta_offset = align64(surf->total_size, 1 << surf->meta_alignment_log2); 2544 surf->total_size = surf->meta_offset + surf->meta_size; 2545 surf->alignment_log2 = MAX2(surf->alignment_log2, surf->meta_alignment_log2); 2546 } 2547 2548 return 0; 2549} 2550 2551/* This is meant to be used for disabling DCC. */ 2552void ac_surface_zero_dcc_fields(struct radeon_surf *surf) 2553{ 2554 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER) 2555 return; 2556 2557 surf->meta_offset = 0; 2558 surf->display_dcc_offset = 0; 2559 if (!surf->fmask_offset && !surf->cmask_offset) { 2560 surf->total_size = surf->surf_size; 2561 surf->alignment_log2 = surf->surf_alignment_log2; 2562 } 2563} 2564 2565static unsigned eg_tile_split(unsigned tile_split) 2566{ 2567 switch (tile_split) { 2568 case 0: 2569 tile_split = 64; 2570 break; 2571 case 1: 2572 tile_split = 128; 2573 break; 2574 case 2: 2575 tile_split = 256; 2576 break; 2577 case 3: 2578 tile_split = 512; 2579 break; 2580 default: 2581 case 4: 2582 tile_split = 1024; 2583 break; 2584 case 5: 2585 tile_split = 2048; 2586 break; 2587 case 6: 2588 tile_split = 4096; 2589 break; 2590 } 2591 return tile_split; 2592} 2593 2594static unsigned eg_tile_split_rev(unsigned eg_tile_split) 2595{ 2596 switch (eg_tile_split) { 2597 case 64: 2598 return 0; 2599 case 128: 2600 return 1; 2601 case 256: 2602 return 2; 2603 case 512: 2604 return 3; 2605 default: 2606 case 1024: 2607 return 4; 2608 case 2048: 2609 return 5; 2610 case 4096: 2611 return 6; 2612 } 2613} 2614 2615#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_SHIFT 45 2616#define AMDGPU_TILING_DCC_MAX_COMPRESSED_BLOCK_SIZE_MASK 0x3 2617 2618/* This should be called before ac_compute_surface. */ 2619void ac_surface_set_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2620 uint64_t tiling_flags, enum radeon_surf_mode *mode) 2621{ 2622 bool scanout; 2623 2624 if (info->gfx_level >= GFX9) { 2625 surf->u.gfx9.swizzle_mode = AMDGPU_TILING_GET(tiling_flags, SWIZZLE_MODE); 2626 surf->u.gfx9.color.dcc.independent_64B_blocks = 2627 AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_64B); 2628 surf->u.gfx9.color.dcc.independent_128B_blocks = 2629 AMDGPU_TILING_GET(tiling_flags, DCC_INDEPENDENT_128B); 2630 surf->u.gfx9.color.dcc.max_compressed_block_size = 2631 AMDGPU_TILING_GET(tiling_flags, DCC_MAX_COMPRESSED_BLOCK_SIZE); 2632 surf->u.gfx9.color.display_dcc_pitch_max = AMDGPU_TILING_GET(tiling_flags, DCC_PITCH_MAX); 2633 scanout = AMDGPU_TILING_GET(tiling_flags, SCANOUT); 2634 *mode = 2635 surf->u.gfx9.swizzle_mode > 0 ? RADEON_SURF_MODE_2D : RADEON_SURF_MODE_LINEAR_ALIGNED; 2636 } else { 2637 surf->u.legacy.pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); 2638 surf->u.legacy.bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); 2639 surf->u.legacy.bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); 2640 surf->u.legacy.tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); 2641 surf->u.legacy.mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); 2642 surf->u.legacy.num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); 2643 scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ 2644 2645 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ 2646 *mode = RADEON_SURF_MODE_2D; 2647 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ 2648 *mode = RADEON_SURF_MODE_1D; 2649 else 2650 *mode = RADEON_SURF_MODE_LINEAR_ALIGNED; 2651 } 2652 2653 if (scanout) 2654 surf->flags |= RADEON_SURF_SCANOUT; 2655 else 2656 surf->flags &= ~RADEON_SURF_SCANOUT; 2657} 2658 2659void ac_surface_get_bo_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2660 uint64_t *tiling_flags) 2661{ 2662 *tiling_flags = 0; 2663 2664 if (info->gfx_level >= GFX9) { 2665 uint64_t dcc_offset = 0; 2666 2667 if (surf->meta_offset) { 2668 dcc_offset = surf->display_dcc_offset ? surf->display_dcc_offset : surf->meta_offset; 2669 assert((dcc_offset >> 8) != 0 && (dcc_offset >> 8) < (1 << 24)); 2670 } 2671 2672 *tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, surf->u.gfx9.swizzle_mode); 2673 *tiling_flags |= AMDGPU_TILING_SET(DCC_OFFSET_256B, dcc_offset >> 8); 2674 *tiling_flags |= AMDGPU_TILING_SET(DCC_PITCH_MAX, surf->u.gfx9.color.display_dcc_pitch_max); 2675 *tiling_flags |= 2676 AMDGPU_TILING_SET(DCC_INDEPENDENT_64B, surf->u.gfx9.color.dcc.independent_64B_blocks); 2677 *tiling_flags |= 2678 AMDGPU_TILING_SET(DCC_INDEPENDENT_128B, surf->u.gfx9.color.dcc.independent_128B_blocks); 2679 *tiling_flags |= AMDGPU_TILING_SET(DCC_MAX_COMPRESSED_BLOCK_SIZE, 2680 surf->u.gfx9.color.dcc.max_compressed_block_size); 2681 *tiling_flags |= AMDGPU_TILING_SET(SCANOUT, (surf->flags & RADEON_SURF_SCANOUT) != 0); 2682 } else { 2683 if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D) 2684 *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ 2685 else if (surf->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D) 2686 *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ 2687 else 2688 *tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ 2689 2690 *tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, surf->u.legacy.pipe_config); 2691 *tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(surf->u.legacy.bankw)); 2692 *tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(surf->u.legacy.bankh)); 2693 if (surf->u.legacy.tile_split) 2694 *tiling_flags |= 2695 AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(surf->u.legacy.tile_split)); 2696 *tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(surf->u.legacy.mtilea)); 2697 *tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(surf->u.legacy.num_banks) - 1); 2698 2699 if (surf->flags & RADEON_SURF_SCANOUT) 2700 *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ 2701 else 2702 *tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ 2703 } 2704} 2705 2706static uint32_t ac_get_umd_metadata_word1(const struct radeon_info *info) 2707{ 2708 return (ATI_VENDOR_ID << 16) | info->pci_id; 2709} 2710 2711/* This should be called after ac_compute_surface. */ 2712bool ac_surface_set_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2713 unsigned num_storage_samples, unsigned num_mipmap_levels, 2714 unsigned size_metadata, const uint32_t metadata[64]) 2715{ 2716 const uint32_t *desc = &metadata[2]; 2717 uint64_t offset; 2718 2719 if (surf->modifier != DRM_FORMAT_MOD_INVALID) 2720 return true; 2721 2722 if (info->gfx_level >= GFX9) 2723 offset = surf->u.gfx9.surf_offset; 2724 else 2725 offset = (uint64_t)surf->u.legacy.level[0].offset_256B * 256; 2726 2727 if (offset || /* Non-zero planes ignore metadata. */ 2728 size_metadata < 10 * 4 || /* at least 2(header) + 8(desc) dwords */ 2729 metadata[0] == 0 || /* invalid version number */ 2730 metadata[1] != ac_get_umd_metadata_word1(info)) /* invalid PCI ID */ { 2731 /* Disable DCC because it might not be enabled. */ 2732 ac_surface_zero_dcc_fields(surf); 2733 2734 /* Don't report an error if the texture comes from an incompatible driver, 2735 * but this might not work. 2736 */ 2737 return true; 2738 } 2739 2740 /* Validate that sample counts and the number of mipmap levels match. */ 2741 unsigned desc_last_level = G_008F1C_LAST_LEVEL(desc[3]); 2742 unsigned type = G_008F1C_TYPE(desc[3]); 2743 2744 if (type == V_008F1C_SQ_RSRC_IMG_2D_MSAA || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) { 2745 unsigned log_samples = util_logbase2(MAX2(1, num_storage_samples)); 2746 2747 if (desc_last_level != log_samples) { 2748 fprintf(stderr, 2749 "amdgpu: invalid MSAA texture import, " 2750 "metadata has log2(samples) = %u, the caller set %u\n", 2751 desc_last_level, log_samples); 2752 return false; 2753 } 2754 } else { 2755 if (desc_last_level != num_mipmap_levels - 1) { 2756 fprintf(stderr, 2757 "amdgpu: invalid mipmapped texture import, " 2758 "metadata has last_level = %u, the caller set %u\n", 2759 desc_last_level, num_mipmap_levels - 1); 2760 return false; 2761 } 2762 } 2763 2764 if (info->gfx_level >= GFX8 && G_008F28_COMPRESSION_EN(desc[6])) { 2765 /* Read DCC information. */ 2766 switch (info->gfx_level) { 2767 case GFX8: 2768 surf->meta_offset = (uint64_t)desc[7] << 8; 2769 break; 2770 2771 case GFX9: 2772 surf->meta_offset = 2773 ((uint64_t)desc[7] << 8) | ((uint64_t)G_008F24_META_DATA_ADDRESS(desc[5]) << 40); 2774 surf->u.gfx9.color.dcc.pipe_aligned = G_008F24_META_PIPE_ALIGNED(desc[5]); 2775 surf->u.gfx9.color.dcc.rb_aligned = G_008F24_META_RB_ALIGNED(desc[5]); 2776 2777 /* If DCC is unaligned, this can only be a displayable image. */ 2778 if (!surf->u.gfx9.color.dcc.pipe_aligned && !surf->u.gfx9.color.dcc.rb_aligned) 2779 assert(surf->is_displayable); 2780 break; 2781 2782 case GFX10: 2783 case GFX10_3: 2784 case GFX11: 2785 surf->meta_offset = 2786 ((uint64_t)G_00A018_META_DATA_ADDRESS_LO(desc[6]) << 8) | ((uint64_t)desc[7] << 16); 2787 surf->u.gfx9.color.dcc.pipe_aligned = G_00A018_META_PIPE_ALIGNED(desc[6]); 2788 break; 2789 2790 default: 2791 assert(0); 2792 return false; 2793 } 2794 } else { 2795 /* Disable DCC. dcc_offset is always set by texture_from_handle 2796 * and must be cleared here. 2797 */ 2798 ac_surface_zero_dcc_fields(surf); 2799 } 2800 2801 return true; 2802} 2803 2804void ac_surface_get_umd_metadata(const struct radeon_info *info, struct radeon_surf *surf, 2805 unsigned num_mipmap_levels, uint32_t desc[8], 2806 unsigned *size_metadata, uint32_t metadata[64]) 2807{ 2808 /* Clear the base address and set the relative DCC offset. */ 2809 desc[0] = 0; 2810 desc[1] &= C_008F14_BASE_ADDRESS_HI; 2811 2812 switch (info->gfx_level) { 2813 case GFX6: 2814 case GFX7: 2815 break; 2816 case GFX8: 2817 desc[7] = surf->meta_offset >> 8; 2818 break; 2819 case GFX9: 2820 desc[7] = surf->meta_offset >> 8; 2821 desc[5] &= C_008F24_META_DATA_ADDRESS; 2822 desc[5] |= S_008F24_META_DATA_ADDRESS(surf->meta_offset >> 40); 2823 break; 2824 case GFX10: 2825 case GFX10_3: 2826 case GFX11: 2827 desc[6] &= C_00A018_META_DATA_ADDRESS_LO; 2828 desc[6] |= S_00A018_META_DATA_ADDRESS_LO(surf->meta_offset >> 8); 2829 desc[7] = surf->meta_offset >> 16; 2830 break; 2831 default: 2832 assert(0); 2833 } 2834 2835 /* Metadata image format format version 1: 2836 * [0] = 1 (metadata format identifier) 2837 * [1] = (VENDOR_ID << 16) | PCI_ID 2838 * [2:9] = image descriptor for the whole resource 2839 * [2] is always 0, because the base address is cleared 2840 * [9] is the DCC offset bits [39:8] from the beginning of 2841 * the buffer 2842 * [10:10+LAST_LEVEL] = mipmap level offset bits [39:8] for each level 2843 */ 2844 2845 metadata[0] = 1; /* metadata image format version 1 */ 2846 2847 /* Tiling modes are ambiguous without a PCI ID. */ 2848 metadata[1] = ac_get_umd_metadata_word1(info); 2849 2850 /* Dwords [2:9] contain the image descriptor. */ 2851 memcpy(&metadata[2], desc, 8 * 4); 2852 *size_metadata = 10 * 4; 2853 2854 /* Dwords [10:..] contain the mipmap level offsets. */ 2855 if (info->gfx_level <= GFX8) { 2856 for (unsigned i = 0; i < num_mipmap_levels; i++) 2857 metadata[10 + i] = surf->u.legacy.level[i].offset_256B; 2858 2859 *size_metadata += num_mipmap_levels * 4; 2860 } 2861} 2862 2863static uint32_t ac_surface_get_gfx9_pitch_align(struct radeon_surf *surf) 2864{ 2865 if (surf->u.gfx9.swizzle_mode == ADDR_SW_LINEAR) 2866 return 256 / surf->bpe; 2867 2868 if (surf->u.gfx9.resource_type == RADEON_RESOURCE_3D) 2869 return 1; /* TODO */ 2870 2871 unsigned bpe_shift = util_logbase2(surf->bpe) / 2; 2872 switch(surf->u.gfx9.swizzle_mode & ~3) { 2873 case ADDR_SW_LINEAR: /* 256B block. */ 2874 return 16 >> bpe_shift; 2875 case ADDR_SW_4KB_Z: 2876 case ADDR_SW_4KB_Z_X: 2877 return 64 >> bpe_shift; 2878 case ADDR_SW_64KB_Z: 2879 case ADDR_SW_64KB_Z_T: 2880 case ADDR_SW_64KB_Z_X: 2881 return 256 >> bpe_shift; 2882 case ADDR_SW_256KB_Z_X: 2883 return 512 >> bpe_shift; 2884 default: 2885 return 1; /* TODO */ 2886 } 2887} 2888 2889bool ac_surface_override_offset_stride(const struct radeon_info *info, struct radeon_surf *surf, 2890 unsigned num_mipmap_levels, uint64_t offset, unsigned pitch) 2891{ 2892 /* 2893 * GFX10 and newer don't support custom strides. Furthermore, for 2894 * multiple miplevels or compression data we'd really need to rerun 2895 * addrlib to update all the fields in the surface. That, however, is a 2896 * software limitation and could be relaxed later. 2897 */ 2898 bool require_equal_pitch = surf->surf_size != surf->total_size || 2899 num_mipmap_levels != 1 || 2900 info->gfx_level >= GFX10; 2901 2902 if (info->gfx_level >= GFX9) { 2903 if (pitch) { 2904 if (surf->u.gfx9.surf_pitch != pitch && require_equal_pitch) 2905 return false; 2906 2907 if ((ac_surface_get_gfx9_pitch_align(surf) - 1) & pitch) 2908 return false; 2909 2910 if (pitch != surf->u.gfx9.surf_pitch) { 2911 unsigned slices = surf->surf_size / surf->u.gfx9.surf_slice_size; 2912 2913 surf->u.gfx9.surf_pitch = pitch; 2914 surf->u.gfx9.epitch = pitch - 1; 2915 surf->u.gfx9.surf_slice_size = (uint64_t)pitch * surf->u.gfx9.surf_height * surf->bpe; 2916 surf->total_size = surf->surf_size = surf->u.gfx9.surf_slice_size * slices; 2917 } 2918 } 2919 surf->u.gfx9.surf_offset = offset; 2920 if (surf->u.gfx9.zs.stencil_offset) 2921 surf->u.gfx9.zs.stencil_offset += offset; 2922 } else { 2923 if (pitch) { 2924 if (surf->u.legacy.level[0].nblk_x != pitch && require_equal_pitch) 2925 return false; 2926 2927 surf->u.legacy.level[0].nblk_x = pitch; 2928 surf->u.legacy.level[0].slice_size_dw = 2929 ((uint64_t)pitch * surf->u.legacy.level[0].nblk_y * surf->bpe) / 4; 2930 } 2931 2932 if (offset) { 2933 for (unsigned i = 0; i < ARRAY_SIZE(surf->u.legacy.level); ++i) 2934 surf->u.legacy.level[i].offset_256B += offset / 256; 2935 } 2936 } 2937 2938 if (offset & ((1 << surf->alignment_log2) - 1) || 2939 offset >= UINT64_MAX - surf->total_size) 2940 return false; 2941 2942 if (surf->meta_offset) 2943 surf->meta_offset += offset; 2944 if (surf->fmask_offset) 2945 surf->fmask_offset += offset; 2946 if (surf->cmask_offset) 2947 surf->cmask_offset += offset; 2948 if (surf->display_dcc_offset) 2949 surf->display_dcc_offset += offset; 2950 return true; 2951} 2952 2953unsigned ac_surface_get_nplanes(const struct radeon_surf *surf) 2954{ 2955 if (surf->modifier == DRM_FORMAT_MOD_INVALID) 2956 return 1; 2957 else if (surf->display_dcc_offset) 2958 return 3; 2959 else if (surf->meta_offset) 2960 return 2; 2961 else 2962 return 1; 2963} 2964 2965uint64_t ac_surface_get_plane_offset(enum amd_gfx_level gfx_level, 2966 const struct radeon_surf *surf, 2967 unsigned plane, unsigned layer) 2968{ 2969 switch (plane) { 2970 case 0: 2971 if (gfx_level >= GFX9) { 2972 return surf->u.gfx9.surf_offset + 2973 layer * surf->u.gfx9.surf_slice_size; 2974 } else { 2975 return (uint64_t)surf->u.legacy.level[0].offset_256B * 256 + 2976 layer * (uint64_t)surf->u.legacy.level[0].slice_size_dw * 4; 2977 } 2978 case 1: 2979 assert(!layer); 2980 return surf->display_dcc_offset ? 2981 surf->display_dcc_offset : surf->meta_offset; 2982 case 2: 2983 assert(!layer); 2984 return surf->meta_offset; 2985 default: 2986 unreachable("Invalid plane index"); 2987 } 2988} 2989 2990uint64_t ac_surface_get_plane_stride(enum amd_gfx_level gfx_level, 2991 const struct radeon_surf *surf, 2992 unsigned plane, unsigned level) 2993{ 2994 switch (plane) { 2995 case 0: 2996 if (gfx_level >= GFX9) { 2997 return (surf->is_linear ? surf->u.gfx9.pitch[level] : surf->u.gfx9.surf_pitch) * surf->bpe; 2998 } else { 2999 return surf->u.legacy.level[level].nblk_x * surf->bpe; 3000 } 3001 case 1: 3002 return 1 + (surf->display_dcc_offset ? 3003 surf->u.gfx9.color.display_dcc_pitch_max : surf->u.gfx9.color.dcc_pitch_max); 3004 case 2: 3005 return surf->u.gfx9.color.dcc_pitch_max + 1; 3006 default: 3007 unreachable("Invalid plane index"); 3008 } 3009} 3010 3011uint64_t ac_surface_get_plane_size(const struct radeon_surf *surf, 3012 unsigned plane) 3013{ 3014 switch (plane) { 3015 case 0: 3016 return surf->surf_size; 3017 case 1: 3018 return surf->display_dcc_offset ? 3019 surf->u.gfx9.color.display_dcc_size : surf->meta_size; 3020 case 2: 3021 return surf->meta_size; 3022 default: 3023 unreachable("Invalid plane index"); 3024 } 3025} 3026 3027void ac_surface_print_info(FILE *out, const struct radeon_info *info, 3028 const struct radeon_surf *surf) 3029{ 3030 if (info->gfx_level >= GFX9) { 3031 fprintf(out, 3032 " Surf: size=%" PRIu64 ", slice_size=%" PRIu64 ", " 3033 "alignment=%u, swmode=%u, epitch=%u, pitch=%u, blk_w=%u, " 3034 "blk_h=%u, bpe=%u, flags=0x%"PRIx64"\n", 3035 surf->surf_size, surf->u.gfx9.surf_slice_size, 3036 1 << surf->surf_alignment_log2, surf->u.gfx9.swizzle_mode, 3037 surf->u.gfx9.epitch, surf->u.gfx9.surf_pitch, 3038 surf->blk_w, surf->blk_h, surf->bpe, surf->flags); 3039 3040 if (surf->fmask_offset) 3041 fprintf(out, 3042 " FMask: offset=%" PRIu64 ", size=%" PRIu64 ", " 3043 "alignment=%u, swmode=%u, epitch=%u\n", 3044 surf->fmask_offset, surf->fmask_size, 3045 1 << surf->fmask_alignment_log2, surf->u.gfx9.color.fmask_swizzle_mode, 3046 surf->u.gfx9.color.fmask_epitch); 3047 3048 if (surf->cmask_offset) 3049 fprintf(out, 3050 " CMask: offset=%" PRIu64 ", size=%u, " 3051 "alignment=%u\n", 3052 surf->cmask_offset, surf->cmask_size, 3053 1 << surf->cmask_alignment_log2); 3054 3055 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) 3056 fprintf(out, 3057 " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", 3058 surf->meta_offset, surf->meta_size, 3059 1 << surf->meta_alignment_log2); 3060 3061 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) 3062 fprintf(out, 3063 " DCC: offset=%" PRIu64 ", size=%u, " 3064 "alignment=%u, pitch_max=%u, num_dcc_levels=%u\n", 3065 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2, 3066 surf->u.gfx9.color.display_dcc_pitch_max, surf->num_meta_levels); 3067 3068 if (surf->has_stencil) 3069 fprintf(out, 3070 " Stencil: offset=%" PRIu64 ", swmode=%u, epitch=%u\n", 3071 surf->u.gfx9.zs.stencil_offset, 3072 surf->u.gfx9.zs.stencil_swizzle_mode, 3073 surf->u.gfx9.zs.stencil_epitch); 3074 } else { 3075 fprintf(out, 3076 " Surf: size=%" PRIu64 ", alignment=%u, blk_w=%u, blk_h=%u, " 3077 "bpe=%u, flags=0x%"PRIx64"\n", 3078 surf->surf_size, 1 << surf->surf_alignment_log2, surf->blk_w, 3079 surf->blk_h, surf->bpe, surf->flags); 3080 3081 fprintf(out, 3082 " Layout: size=%" PRIu64 ", alignment=%u, bankw=%u, bankh=%u, " 3083 "nbanks=%u, mtilea=%u, tilesplit=%u, pipeconfig=%u, scanout=%u\n", 3084 surf->surf_size, 1 << surf->surf_alignment_log2, 3085 surf->u.legacy.bankw, surf->u.legacy.bankh, 3086 surf->u.legacy.num_banks, surf->u.legacy.mtilea, 3087 surf->u.legacy.tile_split, surf->u.legacy.pipe_config, 3088 (surf->flags & RADEON_SURF_SCANOUT) != 0); 3089 3090 if (surf->fmask_offset) 3091 fprintf(out, 3092 " FMask: offset=%" PRIu64 ", size=%" PRIu64 ", " 3093 "alignment=%u, pitch_in_pixels=%u, bankh=%u, " 3094 "slice_tile_max=%u, tile_mode_index=%u\n", 3095 surf->fmask_offset, surf->fmask_size, 3096 1 << surf->fmask_alignment_log2, surf->u.legacy.color.fmask.pitch_in_pixels, 3097 surf->u.legacy.color.fmask.bankh, 3098 surf->u.legacy.color.fmask.slice_tile_max, 3099 surf->u.legacy.color.fmask.tiling_index); 3100 3101 if (surf->cmask_offset) 3102 fprintf(out, 3103 " CMask: offset=%" PRIu64 ", size=%u, alignment=%u, " 3104 "slice_tile_max=%u\n", 3105 surf->cmask_offset, surf->cmask_size, 3106 1 << surf->cmask_alignment_log2, surf->u.legacy.color.cmask_slice_tile_max); 3107 3108 if (surf->flags & RADEON_SURF_Z_OR_SBUFFER && surf->meta_offset) 3109 fprintf(out, " HTile: offset=%" PRIu64 ", size=%u, alignment=%u\n", 3110 surf->meta_offset, surf->meta_size, 3111 1 << surf->meta_alignment_log2); 3112 3113 if (!(surf->flags & RADEON_SURF_Z_OR_SBUFFER) && surf->meta_offset) 3114 fprintf(out, " DCC: offset=%" PRIu64 ", size=%u, alignment=%u\n", 3115 surf->meta_offset, surf->meta_size, 1 << surf->meta_alignment_log2); 3116 3117 if (surf->has_stencil) 3118 fprintf(out, " StencilLayout: tilesplit=%u\n", 3119 surf->u.legacy.stencil_tile_split); 3120 } 3121} 3122 3123static nir_ssa_def *gfx10_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3124 struct gfx9_meta_equation *equation, 3125 int blkSizeBias, unsigned blkStart, 3126 nir_ssa_def *meta_pitch, nir_ssa_def *meta_slice_size, 3127 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3128 nir_ssa_def *pipe_xor, 3129 nir_ssa_def **bit_position) 3130{ 3131 nir_ssa_def *zero = nir_imm_int(b, 0); 3132 nir_ssa_def *one = nir_imm_int(b, 1); 3133 3134 assert(info->gfx_level >= GFX10); 3135 3136 unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width); 3137 unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height); 3138 unsigned blkSizeLog2 = meta_block_width_log2 + meta_block_height_log2 + blkSizeBias; 3139 3140 nir_ssa_def *coord[] = {x, y, z, 0}; 3141 nir_ssa_def *address = zero; 3142 3143 for (unsigned i = blkStart; i < blkSizeLog2 + 1; i++) { 3144 nir_ssa_def *v = zero; 3145 3146 for (unsigned c = 0; c < 4; c++) { 3147 unsigned index = i * 4 + c - (blkStart * 4); 3148 if (equation->u.gfx10_bits[index]) { 3149 unsigned mask = equation->u.gfx10_bits[index]; 3150 nir_ssa_def *bits = coord[c]; 3151 3152 while (mask) 3153 v = nir_ixor(b, v, nir_iand(b, nir_ushr_imm(b, bits, u_bit_scan(&mask)), one)); 3154 } 3155 } 3156 3157 address = nir_ior(b, address, nir_ishl(b, v, nir_imm_int(b, i))); 3158 } 3159 3160 unsigned blkMask = (1 << blkSizeLog2) - 1; 3161 unsigned pipeMask = (1 << G_0098F8_NUM_PIPES(info->gb_addr_config)) - 1; 3162 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 3163 nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2); 3164 nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2); 3165 nir_ssa_def *pb = nir_ushr_imm(b, meta_pitch, meta_block_width_log2); 3166 nir_ssa_def *blkIndex = nir_iadd(b, nir_imul(b, yb, pb), xb); 3167 nir_ssa_def *pipeXor = nir_iand_imm(b, nir_ishl(b, nir_iand_imm(b, pipe_xor, pipeMask), 3168 nir_imm_int(b, m_pipeInterleaveLog2)), blkMask); 3169 3170 if (bit_position) 3171 *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)), 3172 nir_imm_int(b, 2)); 3173 3174 return nir_iadd(b, nir_iadd(b, nir_imul(b, meta_slice_size, z), 3175 nir_imul(b, blkIndex, nir_ishl(b, one, nir_imm_int(b, blkSizeLog2)))), 3176 nir_ixor(b, nir_ushr(b, address, one), pipeXor)); 3177} 3178 3179static nir_ssa_def *gfx9_nir_meta_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3180 struct gfx9_meta_equation *equation, 3181 nir_ssa_def *meta_pitch, nir_ssa_def *meta_height, 3182 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3183 nir_ssa_def *sample, nir_ssa_def *pipe_xor, 3184 nir_ssa_def **bit_position) 3185{ 3186 nir_ssa_def *zero = nir_imm_int(b, 0); 3187 nir_ssa_def *one = nir_imm_int(b, 1); 3188 3189 assert(info->gfx_level >= GFX9); 3190 3191 unsigned meta_block_width_log2 = util_logbase2(equation->meta_block_width); 3192 unsigned meta_block_height_log2 = util_logbase2(equation->meta_block_height); 3193 unsigned meta_block_depth_log2 = util_logbase2(equation->meta_block_depth); 3194 3195 unsigned m_pipeInterleaveLog2 = 8 + G_0098F8_PIPE_INTERLEAVE_SIZE_GFX9(info->gb_addr_config); 3196 unsigned numPipeBits = equation->u.gfx9.num_pipe_bits; 3197 nir_ssa_def *pitchInBlock = nir_ushr_imm(b, meta_pitch, meta_block_width_log2); 3198 nir_ssa_def *sliceSizeInBlock = nir_imul(b, nir_ushr_imm(b, meta_height, meta_block_height_log2), 3199 pitchInBlock); 3200 3201 nir_ssa_def *xb = nir_ushr_imm(b, x, meta_block_width_log2); 3202 nir_ssa_def *yb = nir_ushr_imm(b, y, meta_block_height_log2); 3203 nir_ssa_def *zb = nir_ushr_imm(b, z, meta_block_depth_log2); 3204 3205 nir_ssa_def *blockIndex = nir_iadd(b, nir_iadd(b, nir_imul(b, zb, sliceSizeInBlock), 3206 nir_imul(b, yb, pitchInBlock)), xb); 3207 nir_ssa_def *coords[] = {x, y, z, sample, blockIndex}; 3208 3209 nir_ssa_def *address = zero; 3210 unsigned num_bits = equation->u.gfx9.num_bits; 3211 assert(num_bits <= 32); 3212 3213 /* Compute the address up until the last bit that doesn't use the block index. */ 3214 for (unsigned i = 0; i < num_bits - 1; i++) { 3215 nir_ssa_def *xor = zero; 3216 3217 for (unsigned c = 0; c < 5; c++) { 3218 if (equation->u.gfx9.bit[i].coord[c].dim >= 5) 3219 continue; 3220 3221 assert(equation->u.gfx9.bit[i].coord[c].ord < 32); 3222 nir_ssa_def *ison = 3223 nir_iand(b, nir_ushr_imm(b, coords[equation->u.gfx9.bit[i].coord[c].dim], 3224 equation->u.gfx9.bit[i].coord[c].ord), one); 3225 3226 xor = nir_ixor(b, xor, ison); 3227 } 3228 address = nir_ior(b, address, nir_ishl(b, xor, nir_imm_int(b, i))); 3229 } 3230 3231 /* Fill the remaining bits with the block index. */ 3232 unsigned last = num_bits - 1; 3233 address = nir_ior(b, address, 3234 nir_ishl(b, nir_ushr_imm(b, blockIndex, 3235 equation->u.gfx9.bit[last].coord[0].ord), 3236 nir_imm_int(b, last))); 3237 3238 if (bit_position) 3239 *bit_position = nir_ishl(b, nir_iand(b, address, nir_imm_int(b, 1)), 3240 nir_imm_int(b, 2)); 3241 3242 nir_ssa_def *pipeXor = nir_iand_imm(b, pipe_xor, (1 << numPipeBits) - 1); 3243 return nir_ixor(b, nir_ushr(b, address, one), 3244 nir_ishl(b, pipeXor, nir_imm_int(b, m_pipeInterleaveLog2))); 3245} 3246 3247nir_ssa_def *ac_nir_dcc_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3248 unsigned bpe, struct gfx9_meta_equation *equation, 3249 nir_ssa_def *dcc_pitch, nir_ssa_def *dcc_height, 3250 nir_ssa_def *dcc_slice_size, 3251 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3252 nir_ssa_def *sample, nir_ssa_def *pipe_xor) 3253{ 3254 if (info->gfx_level >= GFX10) { 3255 unsigned bpp_log2 = util_logbase2(bpe); 3256 3257 return gfx10_nir_meta_addr_from_coord(b, info, equation, bpp_log2 - 8, 1, 3258 dcc_pitch, dcc_slice_size, 3259 x, y, z, pipe_xor, NULL); 3260 } else { 3261 return gfx9_nir_meta_addr_from_coord(b, info, equation, dcc_pitch, 3262 dcc_height, x, y, z, 3263 sample, pipe_xor, NULL); 3264 } 3265} 3266 3267nir_ssa_def *ac_nir_cmask_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3268 struct gfx9_meta_equation *equation, 3269 nir_ssa_def *cmask_pitch, nir_ssa_def *cmask_height, 3270 nir_ssa_def *cmask_slice_size, 3271 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3272 nir_ssa_def *pipe_xor, 3273 nir_ssa_def **bit_position) 3274{ 3275 nir_ssa_def *zero = nir_imm_int(b, 0); 3276 3277 if (info->gfx_level >= GFX10) { 3278 return gfx10_nir_meta_addr_from_coord(b, info, equation, -7, 1, 3279 cmask_pitch, cmask_slice_size, 3280 x, y, z, pipe_xor, bit_position); 3281 } else { 3282 return gfx9_nir_meta_addr_from_coord(b, info, equation, cmask_pitch, 3283 cmask_height, x, y, z, zero, 3284 pipe_xor, bit_position); 3285 } 3286} 3287 3288nir_ssa_def *ac_nir_htile_addr_from_coord(nir_builder *b, const struct radeon_info *info, 3289 struct gfx9_meta_equation *equation, 3290 nir_ssa_def *htile_pitch, 3291 nir_ssa_def *htile_slice_size, 3292 nir_ssa_def *x, nir_ssa_def *y, nir_ssa_def *z, 3293 nir_ssa_def *pipe_xor) 3294{ 3295 return gfx10_nir_meta_addr_from_coord(b, info, equation, -4, 2, 3296 htile_pitch, htile_slice_size, 3297 x, y, z, pipe_xor, NULL); 3298} 3299