1/* 2 * Copyright 2016 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include <stdint.h> 25 26#define __gen_address_type uint64_t 27#define __gen_user_data void 28 29static uint64_t 30__gen_combine_address(__attribute__((unused)) void *data, 31 __attribute__((unused)) void *loc, uint64_t addr, 32 uint32_t delta) 33{ 34 return addr + delta; 35} 36 37#include "genxml/gen_macros.h" 38#include "genxml/genX_pack.h" 39 40#include "isl_priv.h" 41#include "isl_genX_helpers.h" 42 43#if GFX_VER >= 8 44static const uint8_t isl_encode_tiling[] = { 45 [ISL_TILING_LINEAR] = LINEAR, 46 [ISL_TILING_X] = XMAJOR, 47#if GFX_VERx10 >= 125 48 [ISL_TILING_4] = TILE4, 49 [ISL_TILING_64] = TILE64, 50#else 51 [ISL_TILING_Y0] = YMAJOR, 52 [ISL_TILING_Yf] = YMAJOR, 53 [ISL_TILING_Ys] = YMAJOR, 54#endif 55#if GFX_VER <= 11 56 [ISL_TILING_W] = WMAJOR, 57#endif 58}; 59#endif 60 61#if GFX_VER >= 7 62static const uint32_t isl_encode_multisample_layout[] = { 63 [ISL_MSAA_LAYOUT_NONE] = MSFMT_MSS, 64 [ISL_MSAA_LAYOUT_INTERLEAVED] = MSFMT_DEPTH_STENCIL, 65 [ISL_MSAA_LAYOUT_ARRAY] = MSFMT_MSS, 66}; 67#endif 68 69#if GFX_VER >= 12 70static const uint32_t isl_encode_aux_mode[] = { 71 [ISL_AUX_USAGE_NONE] = AUX_NONE, 72 [ISL_AUX_USAGE_MC] = AUX_NONE, 73 [ISL_AUX_USAGE_MCS] = AUX_CCS_E, 74 [ISL_AUX_USAGE_GFX12_CCS_E] = AUX_CCS_E, 75 [ISL_AUX_USAGE_CCS_E] = AUX_CCS_E, 76 [ISL_AUX_USAGE_HIZ_CCS_WT] = AUX_CCS_E, 77 [ISL_AUX_USAGE_MCS_CCS] = AUX_MCS_LCE, 78 [ISL_AUX_USAGE_STC_CCS] = AUX_CCS_E, 79}; 80#elif GFX_VER >= 9 81static const uint32_t isl_encode_aux_mode[] = { 82 [ISL_AUX_USAGE_NONE] = AUX_NONE, 83 [ISL_AUX_USAGE_HIZ] = AUX_HIZ, 84 [ISL_AUX_USAGE_MCS] = AUX_CCS_D, 85 [ISL_AUX_USAGE_CCS_D] = AUX_CCS_D, 86 [ISL_AUX_USAGE_CCS_E] = AUX_CCS_E, 87}; 88#elif GFX_VER >= 8 89static const uint32_t isl_encode_aux_mode[] = { 90 [ISL_AUX_USAGE_NONE] = AUX_NONE, 91 [ISL_AUX_USAGE_HIZ] = AUX_HIZ, 92 [ISL_AUX_USAGE_MCS] = AUX_MCS, 93 [ISL_AUX_USAGE_CCS_D] = AUX_MCS, 94}; 95#endif 96 97static uint8_t 98get_surftype(enum isl_surf_dim dim, isl_surf_usage_flags_t usage) 99{ 100 switch (dim) { 101 default: 102 unreachable("bad isl_surf_dim"); 103 case ISL_SURF_DIM_1D: 104 assert(!(usage & ISL_SURF_USAGE_CUBE_BIT)); 105 return SURFTYPE_1D; 106 case ISL_SURF_DIM_2D: 107 if ((usage & ISL_SURF_USAGE_CUBE_BIT) && 108 (usage & ISL_SURF_USAGE_TEXTURE_BIT)) { 109 /* We need SURFTYPE_CUBE to make cube sampling work */ 110 return SURFTYPE_CUBE; 111 } else { 112 /* Everything else (render and storage) treat cubes as plain 113 * 2D array textures 114 */ 115 return SURFTYPE_2D; 116 } 117 case ISL_SURF_DIM_3D: 118 assert(!(usage & ISL_SURF_USAGE_CUBE_BIT)); 119 return SURFTYPE_3D; 120 } 121} 122 123#if GFX_VERx10 >= 125 124static uint8_t 125get_media_compression_format(enum isl_format format, 126 enum isl_format lowered_format) 127{ 128 const uint32_t plane_bpb = isl_format_get_layout(lowered_format)->bpb; 129 130 /* From Bspec 43868, Enumeration_MediaCompressionFormat: 131 * 132 * Luma P010 has MSB of 0 while chroma P010 has MSB of 1. 133 * Luma P016 has MSB of 0 while chroma P016 has MSB of 1. 134 * Luma NV12 has MSB of 0 while chroma NV12 has MSB of 1. 135 */ 136 switch (format) { 137 case ISL_FORMAT_PLANAR_420_8: /* NV12 */ 138 assert(plane_bpb == 8 || plane_bpb == 16); 139 assert((isl_format_get_aux_map_encoding(format) & 0xf0) == 0); 140 141 /* drm_fourcc.h defines the chroma plane of NV12 as 16-bpb */ 142 return (plane_bpb == 16) << 4 | isl_format_get_aux_map_encoding(format); 143 case ISL_FORMAT_PLANAR_420_10: 144 case ISL_FORMAT_PLANAR_420_12: 145 case ISL_FORMAT_PLANAR_420_16: 146 assert(plane_bpb == 16 || plane_bpb == 32); 147 assert((isl_format_get_aux_map_encoding(format) & 0xf0) == 0); 148 149 /* drm_fourcc.h defines the chroma plane of P01X as 32-bpb */ 150 return (plane_bpb == 32) << 4 | isl_format_get_aux_map_encoding(format); 151 default: 152 return isl_format_get_aux_map_encoding(format); 153 } 154} 155#endif 156 157void 158isl_genX(surf_fill_state_s)(const struct isl_device *dev, void *state, 159 const struct isl_surf_fill_state_info *restrict info) 160{ 161 struct GENX(RENDER_SURFACE_STATE) s = { 0 }; 162 163 s.SurfaceType = get_surftype(info->surf->dim, info->view->usage); 164 165 if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) 166 assert(isl_format_supports_rendering(dev->info, info->view->format)); 167 else if (info->view->usage & ISL_SURF_USAGE_TEXTURE_BIT) 168 assert(isl_format_supports_sampling(dev->info, info->view->format)); 169 170 /* From the Sky Lake PRM Vol. 2d, RENDER_SURFACE_STATE::SurfaceFormat 171 * 172 * This field cannot be a compressed (BC*, DXT*, FXT*, ETC*, EAC*) 173 * format if the Surface Type is SURFTYPE_1D 174 */ 175 if (info->surf->dim == ISL_SURF_DIM_1D) 176 assert(!isl_format_is_compressed(info->view->format)); 177 178 if (isl_format_is_compressed(info->surf->format)) { 179 /* You're not allowed to make a view of a compressed format with any 180 * format other than the surface format. None of the userspace APIs 181 * allow for this directly and doing so would mess up a number of 182 * surface parameters such as Width, Height, and alignments. Ideally, 183 * we'd like to assert that the two formats match. However, we have an 184 * S3TC workaround that requires us to do reinterpretation. So assert 185 * that they're at least the same bpb and block size. 186 */ 187 ASSERTED const struct isl_format_layout *surf_fmtl = 188 isl_format_get_layout(info->surf->format); 189 ASSERTED const struct isl_format_layout *view_fmtl = 190 isl_format_get_layout(info->surf->format); 191 assert(surf_fmtl->bpb == view_fmtl->bpb); 192 assert(surf_fmtl->bw == view_fmtl->bw); 193 assert(surf_fmtl->bh == view_fmtl->bh); 194 } 195 196 s.SurfaceFormat = info->view->format; 197 198#if GFX_VER >= 12 199 /* The BSpec description of this field says: 200 * 201 * "This bit field, when set, indicates if the resource is created as 202 * Depth/Stencil resource." 203 * 204 * "SW must set this bit for any resource that was created with 205 * Depth/Stencil resource flag. Setting this bit allows HW to properly 206 * interpret the data-layout for various cases. For any resource that's 207 * created without Depth/Stencil resource flag, it must be reset." 208 * 209 * Even though the docs for this bit seem to imply that it's required for 210 * anything which might have been used for depth/stencil, empirical 211 * evidence suggests that it only affects CCS compression usage. There are 212 * a few things which back this up: 213 * 214 * 1. The docs are also pretty clear that this bit was added as part 215 * of enabling Gfx12 depth/stencil lossless compression. 216 * 217 * 2. The only new difference between depth/stencil and color images on 218 * Gfx12 (where the bit was added) is how they treat CCS compression. 219 * All other differences such as alignment requirements and MSAA layout 220 * are already covered by other bits. 221 * 222 * Under these assumptions, it makes sense for ISL to model this bit as 223 * being an extension of AuxiliarySurfaceMode where STC_CCS and HIZ_CCS_WT 224 * are indicated by AuxiliarySurfaceMode == CCS_E and DepthStencilResource 225 * == true. 226 */ 227 s.DepthStencilResource = info->aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT || 228 info->aux_usage == ISL_AUX_USAGE_STC_CCS; 229#endif 230 231#if GFX_VER <= 5 232 s.ColorBufferComponentWriteDisables = info->write_disables; 233 s.ColorBlendEnable = info->blend_enable; 234#else 235 assert(info->write_disables == 0); 236#endif 237 238#if GFX_VERx10 == 75 239 s.IntegerSurfaceFormat = 240 isl_format_has_int_channel((enum isl_format) s.SurfaceFormat); 241#endif 242 243 assert(info->surf->logical_level0_px.width > 0 && 244 info->surf->logical_level0_px.height > 0); 245 246 s.Width = info->surf->logical_level0_px.width - 1; 247 s.Height = info->surf->logical_level0_px.height - 1; 248 249 /* In the gfx6 PRM Volume 1 Part 1: Graphics Core, Section 7.18.3.7.1 250 * (Surface Arrays For all surfaces other than separate stencil buffer): 251 * 252 * "[DevSNB] Errata: Sampler MSAA Qpitch will be 4 greater than the value 253 * calculated in the equation above , for every other odd Surface Height 254 * starting from 1 i.e. 1,5,9,13" 255 * 256 * Since this Qpitch errata only impacts the sampler, we have to adjust the 257 * input for the rendering surface to achieve the same qpitch. For the 258 * affected heights, we increment the height by 1 for the rendering 259 * surface. 260 */ 261 if (GFX_VER == 6 && (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) && 262 info->surf->samples > 1 && 263 (info->surf->logical_level0_px.height % 4) == 1) 264 s.Height++; 265 266 switch (s.SurfaceType) { 267 case SURFTYPE_1D: 268 case SURFTYPE_2D: 269 /* From the Ivy Bridge PRM >> RENDER_SURFACE_STATE::MinimumArrayElement: 270 * 271 * "If Number of Multisamples is not MULTISAMPLECOUNT_1, this field 272 * must be set to zero if this surface is used with sampling engine 273 * messages." 274 * 275 * This restriction appears to exist only on Ivy Bridge. 276 */ 277 if (GFX_VERx10 == 70 && !ISL_DEV_IS_BAYTRAIL(dev) && 278 (info->view->usage & ISL_SURF_USAGE_TEXTURE_BIT) && 279 info->surf->samples > 1) 280 assert(info->view->base_array_layer == 0); 281 282 s.MinimumArrayElement = info->view->base_array_layer; 283 284 /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: 285 * 286 * For SURFTYPE_1D, 2D, and CUBE: The range of this field is reduced 287 * by one for each increase from zero of Minimum Array Element. For 288 * example, if Minimum Array Element is set to 1024 on a 2D surface, 289 * the range of this field is reduced to [0,1023]. 290 * 291 * In other words, 'Depth' is the number of array layers. 292 */ 293 s.Depth = info->view->array_len - 1; 294 295 /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: 296 * 297 * For Render Target and Typed Dataport 1D and 2D Surfaces: 298 * This field must be set to the same value as the Depth field. 299 */ 300 if (info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 301 ISL_SURF_USAGE_STORAGE_BIT)) 302 s.RenderTargetViewExtent = s.Depth; 303 break; 304 case SURFTYPE_CUBE: 305 s.MinimumArrayElement = info->view->base_array_layer; 306 /* Same as SURFTYPE_2D, but divided by 6 */ 307 s.Depth = info->view->array_len / 6 - 1; 308 if (info->view->usage & (ISL_SURF_USAGE_RENDER_TARGET_BIT | 309 ISL_SURF_USAGE_STORAGE_BIT)) 310 s.RenderTargetViewExtent = s.Depth; 311 break; 312 case SURFTYPE_3D: 313 /* From the Broadwell PRM >> RENDER_SURFACE_STATE::Depth: 314 * 315 * If the volume texture is MIP-mapped, this field specifies the 316 * depth of the base MIP level. 317 */ 318 s.Depth = info->surf->logical_level0_px.depth - 1; 319 320 /* From the Broadwell PRM >> RENDER_SURFACE_STATE::RenderTargetViewExtent: 321 * 322 * For Render Target and Typed Dataport 3D Surfaces: This field 323 * indicates the extent of the accessible 'R' coordinates minus 1 on 324 * the LOD currently being rendered to. 325 * 326 * The docs specify that this only matters for render targets and 327 * surfaces used with typed dataport messages. Prior to Ivy Bridge, the 328 * Depth field has more bits than RenderTargetViewExtent so we can have 329 * textures with more levels than we can render to. In order to prevent 330 * assert-failures in the packing function below, we only set the field 331 * when it's actually going to be used by the hardware. 332 * 333 * The MinimumArrayElement field is ignored by all hardware 334 * prior to Sky Lake when texturing, and drivers are responsible 335 * for validating the correctness of this parameter. 336 * KHR_gl_texture_3D_image requires this functionality. 337 */ 338 s.MinimumArrayElement = info->view->base_array_layer; 339 s.RenderTargetViewExtent = info->view->array_len - 1; 340 break; 341 default: 342 unreachable("bad SurfaceType"); 343 } 344 345#if GFX_VER >= 12 346 /* Wa_1806565034: Only set SurfaceArray if arrayed surface is > 1. */ 347 s.SurfaceArray = info->surf->dim != ISL_SURF_DIM_3D && 348 info->view->array_len > 1; 349#elif GFX_VER >= 7 350 s.SurfaceArray = info->surf->dim != ISL_SURF_DIM_3D; 351#endif 352 353 if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 354 /* For render target surfaces, the hardware interprets field 355 * MIPCount/LOD as LOD. The Broadwell PRM says: 356 * 357 * MIPCountLOD defines the LOD that will be rendered into. 358 * SurfaceMinLOD is ignored. 359 */ 360 s.MIPCountLOD = info->view->base_level; 361 s.SurfaceMinLOD = 0; 362 } else { 363 /* For non render target surfaces, the hardware interprets field 364 * MIPCount/LOD as MIPCount. The range of levels accessible by the 365 * sampler engine is [SurfaceMinLOD, SurfaceMinLOD + MIPCountLOD]. 366 */ 367 s.SurfaceMinLOD = info->view->base_level; 368 s.MIPCountLOD = MAX(info->view->levels, 1) - 1; 369 } 370 371#if GFX_VER >= 9 372 /* We don't use miptails yet. The PRM recommends that you set "Mip Tail 373 * Start LOD" to 15 to prevent the hardware from trying to use them. 374 */ 375 s.TiledResourceMode = NONE; 376 s.MipTailStartLOD = 15; 377#endif 378 379#if GFX_VER >= 6 380 const struct isl_extent3d image_align = 381 isl_get_image_alignment(info->surf); 382 s.SurfaceVerticalAlignment = isl_encode_valign(image_align.height); 383#if GFX_VER >= 7 384 s.SurfaceHorizontalAlignment = isl_encode_halign(image_align.width); 385#endif 386#endif 387 388 if (info->surf->dim_layout == ISL_DIM_LAYOUT_GFX9_1D) { 389 /* For gfx9 1-D textures, surface pitch is ignored */ 390 s.SurfacePitch = 0; 391 } else { 392 s.SurfacePitch = info->surf->row_pitch_B - 1; 393 } 394 395#if GFX_VER >= 8 396 s.SurfaceQPitch = isl_get_qpitch(info->surf) >> 2; 397#elif GFX_VER == 7 398 s.SurfaceArraySpacing = info->surf->array_pitch_span == 399 ISL_ARRAY_PITCH_SPAN_COMPACT; 400#endif 401 402#if GFX_VER >= 8 403 assert(GFX_VER < 12 || info->surf->tiling != ISL_TILING_W); 404 405 /* From the SKL+ PRMs, RENDER_SURFACE_STATE:TileMode, 406 * 407 * If Surface Format is ASTC*, this field must be TILEMODE_YMAJOR. 408 */ 409 if (isl_format_get_layout(info->view->format)->txc == ISL_TXC_ASTC) 410 assert(info->surf->tiling == ISL_TILING_Y0); 411 412 s.TileMode = isl_encode_tiling[info->surf->tiling]; 413#else 414 s.TiledSurface = info->surf->tiling != ISL_TILING_LINEAR, 415 s.TileWalk = info->surf->tiling == ISL_TILING_Y0 ? TILEWALK_YMAJOR : 416 TILEWALK_XMAJOR, 417#endif 418 419#if GFX_VER >= 8 420 s.RenderCacheReadWriteMode = WriteOnlyCache; 421#else 422 s.RenderCacheReadWriteMode = 0; 423#endif 424 425#if GFX_VER >= 11 426 /* We've seen dEQP failures when enabling this bit with UINT formats, 427 * which particularly affects blorp_copy() operations. It shouldn't 428 * have any effect on UINT textures anyway, so disable it for them. 429 */ 430 s.EnableUnormPathInColorPipe = 431 !isl_format_has_int_channel(info->view->format); 432#endif 433 434 s.CubeFaceEnablePositiveZ = 1; 435 s.CubeFaceEnableNegativeZ = 1; 436 s.CubeFaceEnablePositiveY = 1; 437 s.CubeFaceEnableNegativeY = 1; 438 s.CubeFaceEnablePositiveX = 1; 439 s.CubeFaceEnableNegativeX = 1; 440 441#if GFX_VER >= 6 442 /* From the Broadwell PRM for "Number of Multisamples": 443 * 444 * "If this field is any value other than MULTISAMPLECOUNT_1, Surface 445 * Min LOD, Mip Count / LOD, and Resource Min LOD must be set to zero." 446 * 447 * This is fine because no 3D API allows multisampling and mipmapping at 448 * the same time. 449 */ 450 if (info->surf->samples > 1) { 451 assert(info->view->min_lod_clamp == 0); 452 assert(info->view->base_level == 0); 453 assert(info->view->levels == 1); 454 } 455 s.NumberofMultisamples = ffs(info->surf->samples) - 1; 456#if GFX_VER >= 7 457 s.MultisampledSurfaceStorageFormat = 458 isl_encode_multisample_layout[info->surf->msaa_layout]; 459#endif 460#endif 461 462#if GFX_VER >= 7 463 s.ResourceMinLOD = info->view->min_lod_clamp; 464#else 465 assert(info->view->min_lod_clamp == 0); 466#endif 467 468#if (GFX_VERx10 >= 75) 469 if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) 470 assert(isl_swizzle_supports_rendering(dev->info, info->view->swizzle)); 471 472 s.ShaderChannelSelectRed = (enum GENX(ShaderChannelSelect)) info->view->swizzle.r; 473 s.ShaderChannelSelectGreen = (enum GENX(ShaderChannelSelect)) info->view->swizzle.g; 474 s.ShaderChannelSelectBlue = (enum GENX(ShaderChannelSelect)) info->view->swizzle.b; 475 s.ShaderChannelSelectAlpha = (enum GENX(ShaderChannelSelect)) info->view->swizzle.a; 476#else 477 assert(isl_swizzle_is_identity(info->view->swizzle)); 478#endif 479 480 s.SurfaceBaseAddress = info->address; 481 482#if GFX_VER >= 6 483 s.MOCS = info->mocs; 484#endif 485 486#if GFX_VERx10 >= 45 487 if (info->x_offset_sa != 0 || info->y_offset_sa != 0) { 488 /* There are fairly strict rules about when the offsets can be used. 489 * These are mostly taken from the Sky Lake PRM documentation for 490 * RENDER_SURFACE_STATE. 491 */ 492 assert(info->surf->tiling != ISL_TILING_LINEAR); 493 assert(info->surf->dim == ISL_SURF_DIM_2D); 494 assert(isl_is_pow2(isl_format_get_layout(info->view->format)->bpb)); 495 assert(info->surf->levels == 1); 496 assert(info->surf->logical_level0_px.array_len == 1); 497 assert(info->aux_usage == ISL_AUX_USAGE_NONE); 498 499 if (GFX_VER >= 8) { 500 /* Broadwell added more rules. */ 501 assert(info->surf->samples == 1); 502 if (isl_format_get_layout(info->view->format)->bpb == 8) 503 assert(info->x_offset_sa % 16 == 0); 504 if (isl_format_get_layout(info->view->format)->bpb == 16) 505 assert(info->x_offset_sa % 8 == 0); 506 } 507 508#if GFX_VER >= 7 509 s.SurfaceArray = false; 510#endif 511 } 512 513 const unsigned x_div = 4; 514 const unsigned y_div = GFX_VER >= 8 ? 4 : 2; 515 assert(info->x_offset_sa % x_div == 0); 516 assert(info->y_offset_sa % y_div == 0); 517 s.XOffset = info->x_offset_sa / x_div; 518 s.YOffset = info->y_offset_sa / y_div; 519#else 520 assert(info->x_offset_sa == 0); 521 assert(info->y_offset_sa == 0); 522#endif 523 524#if GFX_VER >= 7 525 if (info->aux_usage != ISL_AUX_USAGE_NONE) { 526 /* Check valid aux usages per-gen */ 527 if (GFX_VER >= 12) { 528 assert(info->aux_usage == ISL_AUX_USAGE_MCS || 529 info->aux_usage == ISL_AUX_USAGE_CCS_E || 530 info->aux_usage == ISL_AUX_USAGE_GFX12_CCS_E || 531 info->aux_usage == ISL_AUX_USAGE_MC || 532 info->aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT || 533 info->aux_usage == ISL_AUX_USAGE_MCS_CCS || 534 info->aux_usage == ISL_AUX_USAGE_STC_CCS); 535 } else if (GFX_VER >= 9) { 536 assert(info->aux_usage == ISL_AUX_USAGE_HIZ || 537 info->aux_usage == ISL_AUX_USAGE_MCS || 538 info->aux_usage == ISL_AUX_USAGE_CCS_D || 539 info->aux_usage == ISL_AUX_USAGE_CCS_E); 540 } else if (GFX_VER >= 8) { 541 assert(info->aux_usage == ISL_AUX_USAGE_HIZ || 542 info->aux_usage == ISL_AUX_USAGE_MCS || 543 info->aux_usage == ISL_AUX_USAGE_CCS_D); 544 } else if (GFX_VER >= 7) { 545 assert(info->aux_usage == ISL_AUX_USAGE_MCS || 546 info->aux_usage == ISL_AUX_USAGE_CCS_D); 547 } 548 549 /* The docs don't appear to say anything whatsoever about compression 550 * and the data port. Testing seems to indicate that the data port 551 * completely ignores the AuxiliarySurfaceMode field. 552 * 553 * On gfx12 HDC supports compression. 554 */ 555 if (GFX_VER < 12) 556 assert(!(info->view->usage & ISL_SURF_USAGE_STORAGE_BIT)); 557 558 if (isl_surf_usage_is_depth(info->surf->usage)) 559 assert(isl_aux_usage_has_hiz(info->aux_usage)); 560 561 if (isl_surf_usage_is_stencil(info->surf->usage)) 562 assert(info->aux_usage == ISL_AUX_USAGE_STC_CCS); 563 564 if (isl_aux_usage_has_hiz(info->aux_usage)) { 565 /* For Gfx8-10, there are some restrictions around sampling from HiZ. 566 * The Skylake PRM docs for RENDER_SURFACE_STATE::AuxiliarySurfaceMode 567 * say: 568 * 569 * "If this field is set to AUX_HIZ, Number of Multisamples must 570 * be MULTISAMPLECOUNT_1, and Surface Type cannot be SURFTYPE_3D." 571 * 572 * On Gfx12, the docs are a bit less obvious but the restriction is 573 * the same. The limitation isn't called out explicitly but the docs 574 * for the CCS_E value of RENDER_SURFACE_STATE::AuxiliarySurfaceMode 575 * say: 576 * 577 * "If Number of multisamples > 1, programming this value means 578 * MSAA compression is enabled for that surface. Auxiliary surface 579 * is MSC with tile y." 580 * 581 * Since this interpretation ignores whether the surface is 582 * depth/stencil or not and since multisampled depth buffers use 583 * ISL_MSAA_LAYOUT_INTERLEAVED which is incompatible with MCS 584 * compression, this means that we can't even specify MSAA depth CCS 585 * in RENDER_SURFACE_STATE::AuxiliarySurfaceMode. 586 */ 587 assert(info->surf->samples == 1); 588 589 /* The dimension must not be 3D */ 590 assert(info->surf->dim != ISL_SURF_DIM_3D); 591 592 /* The format must be one of the following: */ 593 switch (info->view->format) { 594 case ISL_FORMAT_R32_FLOAT: 595 case ISL_FORMAT_R24_UNORM_X8_TYPELESS: 596 case ISL_FORMAT_R16_UNORM: 597 break; 598 default: 599 assert(!"Incompatible HiZ Sampling format"); 600 break; 601 } 602 } 603 604#if GFX_VERx10 >= 125 605 if (info->aux_usage == ISL_AUX_USAGE_MC) { 606 s.CompressionFormat = 607 get_media_compression_format(info->mc_format, info->surf->format); 608 } else { 609 s.CompressionFormat = 610 isl_get_render_compression_format(info->surf->format); 611 } 612#endif 613#if GFX_VER >= 12 614 s.MemoryCompressionEnable = info->aux_usage == ISL_AUX_USAGE_MC; 615#endif 616#if GFX_VER >= 9 617 /* Some CCS aux usages have format restrictions. The Skylake PRM doc for 618 * RENDER_SURFACE_STATE::AuxiliarySurfaceMode says: 619 * 620 * If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_E setting 621 * is only allowed if Surface Format is supported for Render Target 622 * Compression. This setting enables render target compression. 623 * 624 * If CCS_E is in use, the format must support it. 625 */ 626 if (info->aux_usage == ISL_AUX_USAGE_CCS_E || 627 info->aux_usage == ISL_AUX_USAGE_GFX12_CCS_E) 628 assert(isl_format_supports_ccs_e(dev->info, info->view->format)); 629 630 /* It also says: 631 * 632 * If Number of Multisamples is MULTISAMPLECOUNT_1, AUX_CCS_D setting 633 * is only allowed if Surface Format supported for Fast Clear. In 634 * addition, if the surface is bound to the sampling engine, Surface 635 * Format must be supported for Render Target Compression for 636 * surfaces bound to the sampling engine. For render target surfaces, 637 * this setting disables render target compression. For sampling 638 * engine surfaces, this mode behaves the same as AUX_CCS_E. 639 * 640 * If CCS_D is in use while rendering, the format must support it. If 641 * it's in use while sampling, the format must support CCS_E. 642 */ 643 if (info->aux_usage == ISL_AUX_USAGE_CCS_D) { 644 if (info->view->usage & ISL_SURF_USAGE_RENDER_TARGET_BIT) { 645 assert(isl_format_supports_ccs_d(dev->info, info->view->format)); 646 } else { 647 assert(info->view->usage & ISL_SURF_USAGE_TEXTURE_BIT); 648 assert(isl_format_supports_ccs_e(dev->info, info->view->format)); 649 } 650 } 651#endif 652#if GFX_VER >= 8 653 s.AuxiliarySurfaceMode = isl_encode_aux_mode[info->aux_usage]; 654#else 655 s.MCSEnable = true; 656#endif 657 } 658 659 /* The auxiliary buffer info is filled when it's usable by the HW. 660 * 661 * Starting with Gfx12, the only form of compression that can be used 662 * with RENDER_SURFACE_STATE which requires an aux surface is MCS. 663 * HiZ still requires a surface but the HiZ surface can only be 664 * accessed through 3DSTATE_HIER_DEPTH_BUFFER. 665 * 666 * On all earlier hardware, an aux surface is required for all forms 667 * of compression. 668 */ 669 if ((GFX_VER < 12 && info->aux_usage != ISL_AUX_USAGE_NONE) || 670 (GFX_VER >= 12 && isl_aux_usage_has_mcs(info->aux_usage))) { 671 672 assert(info->aux_surf != NULL); 673 674 struct isl_tile_info tile_info; 675 isl_surf_get_tile_info(info->aux_surf, &tile_info); 676 uint32_t pitch_in_tiles = 677 info->aux_surf->row_pitch_B / tile_info.phys_extent_B.width; 678 679 s.AuxiliarySurfaceBaseAddress = info->aux_address; 680 s.AuxiliarySurfacePitch = pitch_in_tiles - 1; 681 682#if GFX_VER >= 8 683 /* Auxiliary surfaces in ISL have compressed formats but the hardware 684 * doesn't expect our definition of the compression, it expects qpitch 685 * in units of samples on the main surface. 686 */ 687 s.AuxiliarySurfaceQPitch = 688 isl_surf_get_array_pitch_sa_rows(info->aux_surf) >> 2; 689#endif 690 } 691#endif 692 693#if GFX_VER >= 8 && GFX_VER < 11 694 /* From the CHV PRM, Volume 2d, page 321 (RENDER_SURFACE_STATE dword 0 695 * bit 9 "Sampler L2 Bypass Mode Disable" Programming Notes): 696 * 697 * This bit must be set for the following surface types: BC2_UNORM 698 * BC3_UNORM BC5_UNORM BC5_SNORM BC7_UNORM 699 */ 700 if (GFX_VER >= 9 || dev->info->platform == INTEL_PLATFORM_CHV) { 701 switch (info->view->format) { 702 case ISL_FORMAT_BC2_UNORM: 703 case ISL_FORMAT_BC3_UNORM: 704 case ISL_FORMAT_BC5_UNORM: 705 case ISL_FORMAT_BC5_SNORM: 706 case ISL_FORMAT_BC7_UNORM: 707 s.SamplerL2BypassModeDisable = true; 708 break; 709 default: 710 /* From the SKL PRM, Programming Note under Sampler Output Channel 711 * Mapping: 712 * 713 * If a surface has an associated HiZ Auxiliary surface, the 714 * Sampler L2 Bypass Mode Disable field in the RENDER_SURFACE_STATE 715 * must be set. 716 */ 717 if (GFX_VER >= 9 && info->aux_usage == ISL_AUX_USAGE_HIZ) 718 s.SamplerL2BypassModeDisable = true; 719 break; 720 } 721 } 722#endif 723 724 if (isl_aux_usage_has_fast_clears(info->aux_usage)) { 725 if (info->use_clear_address) { 726#if GFX_VER >= 10 727 s.ClearValueAddressEnable = true; 728 s.ClearValueAddress = info->clear_address; 729#else 730 unreachable("Gfx9 and earlier do not support indirect clear colors"); 731#endif 732 } 733 734#if GFX_VER == 11 735 /* 736 * From BXML > GT > Shared Functions > vol5c Shared Functions > 737 * [Structure] RENDER_SURFACE_STATE [BDW+] > ClearColorConversionEnable: 738 * 739 * Project: Gfx11 740 * 741 * "Enables Pixel backend hw to convert clear values into native format 742 * and write back to clear address, so that display and sampler can use 743 * the converted value for resolving fast cleared RTs." 744 * 745 * Summary: 746 * Clear color conversion must be enabled if the clear color is stored 747 * indirectly and fast color clears are enabled. 748 */ 749 if (info->use_clear_address) { 750 s.ClearColorConversionEnable = true; 751 } 752#endif 753 754#if GFX_VER >= 12 755 assert(info->use_clear_address); 756#elif GFX_VER >= 9 757 if (!info->use_clear_address) { 758 s.RedClearColor = info->clear_color.u32[0]; 759 s.GreenClearColor = info->clear_color.u32[1]; 760 s.BlueClearColor = info->clear_color.u32[2]; 761 s.AlphaClearColor = info->clear_color.u32[3]; 762 } 763#elif GFX_VER >= 7 764 /* Prior to Sky Lake, we only have one bit for the clear color which 765 * gives us 0 or 1 in whatever the surface's format happens to be. 766 */ 767 if (isl_format_has_int_channel(info->view->format)) { 768 for (unsigned i = 0; i < 4; i++) { 769 assert(info->clear_color.u32[i] == 0 || 770 info->clear_color.u32[i] == 1); 771 } 772 s.RedClearColor = info->clear_color.u32[0] != 0; 773 s.GreenClearColor = info->clear_color.u32[1] != 0; 774 s.BlueClearColor = info->clear_color.u32[2] != 0; 775 s.AlphaClearColor = info->clear_color.u32[3] != 0; 776 } else { 777 for (unsigned i = 0; i < 4; i++) { 778 assert(info->clear_color.f32[i] == 0.0f || 779 info->clear_color.f32[i] == 1.0f); 780 } 781 s.RedClearColor = info->clear_color.f32[0] != 0.0f; 782 s.GreenClearColor = info->clear_color.f32[1] != 0.0f; 783 s.BlueClearColor = info->clear_color.f32[2] != 0.0f; 784 s.AlphaClearColor = info->clear_color.f32[3] != 0.0f; 785 } 786#endif 787 } 788 789 GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s); 790} 791 792void 793isl_genX(buffer_fill_state_s)(const struct isl_device *dev, void *state, 794 const struct isl_buffer_fill_state_info *restrict info) 795{ 796 uint64_t buffer_size = info->size_B; 797 798 /* Uniform and Storage buffers need to have surface size not less that the 799 * aligned 32-bit size of the buffer. To calculate the array length on 800 * unsized arrays in StorageBuffer the last 2 bits store the padding size 801 * added to the surface, so we can calculate latter the original buffer 802 * size to know the number of elements. 803 * 804 * surface_size = isl_align(buffer_size, 4) + 805 * (isl_align(buffer_size) - buffer_size) 806 * 807 * buffer_size = (surface_size & ~3) - (surface_size & 3) 808 */ 809 if ((info->format == ISL_FORMAT_RAW || 810 info->stride_B < isl_format_get_layout(info->format)->bpb / 8) && 811 !info->is_scratch) { 812 assert(info->stride_B == 1); 813 uint64_t aligned_size = isl_align(buffer_size, 4); 814 buffer_size = aligned_size + (aligned_size - buffer_size); 815 } 816 817 uint32_t num_elements = buffer_size / info->stride_B; 818 819 assert(num_elements > 0); 820 if (info->format == ISL_FORMAT_RAW) { 821 assert(num_elements <= dev->max_buffer_size); 822 } else { 823 /* From the IVB PRM, SURFACE_STATE::Height, 824 * 825 * For typed buffer and structured buffer surfaces, the number 826 * of entries in the buffer ranges from 1 to 2^27. 827 */ 828 assert(num_elements <= (1ull << 27)); 829 } 830 831 struct GENX(RENDER_SURFACE_STATE) s = { 0, }; 832 833 s.SurfaceFormat = info->format; 834 835 s.SurfaceType = SURFTYPE_BUFFER; 836#if GFX_VERx10 >= 125 837 if (info->is_scratch) { 838 /* From the BSpec: 839 * 840 * "For surfaces of type SURFTYPE_SCRATCH, valid range of pitch is: 841 * [63,262143] -> [64B, 256KB]. Also, for SURFTYPE_SCRATCH, the 842 * pitch must be a multiple of 64bytes." 843 */ 844 assert(info->format == ISL_FORMAT_RAW); 845 assert(info->stride_B % 64 == 0); 846 assert(info->stride_B <= 256 * 1024); 847 s.SurfaceType = SURFTYPE_SCRATCH; 848 } 849#else 850 assert(!info->is_scratch); 851#endif 852 853 s.SurfacePitch = info->stride_B - 1; 854 855#if GFX_VER >= 6 856 s.SurfaceVerticalAlignment = isl_encode_valign(4); 857#if GFX_VERx10 >= 125 858 s.SurfaceHorizontalAlignment = isl_encode_halign(128); 859#elif GFX_VER >= 7 860 s.SurfaceHorizontalAlignment = isl_encode_halign(4); 861 s.SurfaceArray = false; 862#endif 863#endif 864 865#if GFX_VER >= 7 866 s.Height = ((num_elements - 1) >> 7) & 0x3fff; 867 s.Width = (num_elements - 1) & 0x7f; 868 s.Depth = ((num_elements - 1) >> 21) & 0x3ff; 869#else 870 s.Height = ((num_elements - 1) >> 7) & 0x1fff; 871 s.Width = (num_elements - 1) & 0x7f; 872 s.Depth = ((num_elements - 1) >> 20) & 0x7f; 873#endif 874 875 if (GFX_VER == 12 && dev->info->revision == 0) { 876 /* TGL-LP A0 has a HW bug (fixed in later HW) which causes buffer 877 * textures with very close base addresses (delta < 64B) to corrupt each 878 * other. We can sort-of work around this by making small buffer 879 * textures 1D textures instead. This doesn't fix the problem for large 880 * buffer textures but the liklihood of large, overlapping, and very 881 * close buffer textures is fairly low and the point is to hack around 882 * the bug so we can run apps and tests. 883 */ 884 if (info->format != ISL_FORMAT_RAW && 885 info->stride_B == isl_format_get_layout(info->format)->bpb / 8 && 886 num_elements <= (1 << 14)) { 887 s.SurfaceType = SURFTYPE_1D; 888 s.Width = num_elements - 1; 889 s.Height = 0; 890 s.Depth = 0; 891 } 892 } 893 894#if GFX_VER >= 6 895 s.NumberofMultisamples = MULTISAMPLECOUNT_1; 896#endif 897 898#if (GFX_VER >= 8) 899 s.TileMode = LINEAR; 900#else 901 s.TiledSurface = false; 902#endif 903 904#if (GFX_VER >= 8) 905 s.RenderCacheReadWriteMode = WriteOnlyCache; 906#else 907 s.RenderCacheReadWriteMode = 0; 908#endif 909 910 s.SurfaceBaseAddress = info->address; 911#if GFX_VER >= 6 912 s.MOCS = info->mocs; 913#endif 914 915#if (GFX_VERx10 >= 75) 916 s.ShaderChannelSelectRed = (enum GENX(ShaderChannelSelect)) info->swizzle.r; 917 s.ShaderChannelSelectGreen = (enum GENX(ShaderChannelSelect)) info->swizzle.g; 918 s.ShaderChannelSelectBlue = (enum GENX(ShaderChannelSelect)) info->swizzle.b; 919 s.ShaderChannelSelectAlpha = (enum GENX(ShaderChannelSelect)) info->swizzle.a; 920#endif 921 922 GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s); 923} 924 925void 926isl_genX(null_fill_state)(const struct isl_device *dev, void *state, 927 const struct isl_null_fill_state_info *restrict info) 928{ 929 struct GENX(RENDER_SURFACE_STATE) s = { 930 .SurfaceType = SURFTYPE_NULL, 931 /* We previously had this format set to B8G8R8A8_UNORM but ran into 932 * hangs on IVB. R32_UINT seems to work for everybody. 933 * 934 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/1872 935 */ 936 .SurfaceFormat = ISL_FORMAT_R32_UINT, 937#if GFX_VER >= 7 938 .SurfaceArray = info->size.depth > 1, 939#endif 940#if GFX_VERx10 >= 125 941 .TileMode = TILE4, 942#elif GFX_VER >= 8 943 .TileMode = YMAJOR, 944#else 945 .TiledSurface = true, 946 .TileWalk = TILEWALK_YMAJOR, 947#endif 948#if GFX_VER >= 6 949 .MOCS = isl_mocs(dev, 0, false), 950#endif 951#if GFX_VER == 7 952 /* According to PRMs: "Volume 4 Part 1: Subsystem and Cores – Shared 953 * Functions" 954 * 955 * RENDER_SURFACE_STATE::Surface Vertical Alignment 956 * 957 * "This field must be set to VALIGN_4 for all tiled Y Render Target 958 * surfaces." 959 * 960 * Affect IVB, HSW. 961 */ 962 .SurfaceVerticalAlignment = VALIGN_4, 963#endif 964 .MIPCountLOD = info->levels, 965 .Width = info->size.width - 1, 966 .Height = info->size.height - 1, 967 .Depth = info->size.depth - 1, 968 .RenderTargetViewExtent = info->size.depth - 1, 969#if GFX_VER <= 5 970 .MinimumArrayElement = info->minimum_array_element, 971 .ColorBufferComponentWriteDisables = 0xf, 972#endif 973 }; 974 GENX(RENDER_SURFACE_STATE_pack)(NULL, state, &s); 975} 976