1/* 2 * Copyright © 2017 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included 12 * in all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 20 * DEALINGS IN THE SOFTWARE. 21 */ 22 23/** 24 * @file crocus_resolve.c 25 * 26 * This file handles resolve tracking for main and auxiliary surfaces. 27 * 28 * It also handles our cache tracking. We have sets for the render cache, 29 * depth cache, and so on. If a BO is in a cache's set, then it may have 30 * data in that cache. The helpers take care of emitting flushes for 31 * render-to-texture, format reinterpretation issues, and other situations. 32 */ 33 34#include "util/hash_table.h" 35#include "util/set.h" 36#include "crocus_context.h" 37#include "compiler/nir/nir.h" 38 39#define FILE_DEBUG_FLAG DEBUG_BLORP 40 41static void 42crocus_update_stencil_shadow(struct crocus_context *ice, 43 struct crocus_resource *res); 44/** 45 * Disable auxiliary buffers if a renderbuffer is also bound as a texture 46 * or shader image. This causes a self-dependency, where both rendering 47 * and sampling may concurrently read or write the CCS buffer, causing 48 * incorrect pixels. 49 */ 50static bool 51disable_rb_aux_buffer(struct crocus_context *ice, 52 bool *draw_aux_buffer_disabled, 53 struct crocus_resource *tex_res, 54 unsigned min_level, unsigned num_levels, 55 const char *usage) 56{ 57 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 58 bool found = false; 59 60 /* We only need to worry about fast clears. */ 61 if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D) 62 return false; 63 64 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 65 struct crocus_surface *surf = (void *) cso_fb->cbufs[i]; 66 if (!surf) 67 continue; 68 69 struct crocus_resource *rb_res = (void *) surf->base.texture; 70 71 if (rb_res->bo == tex_res->bo && 72 surf->base.u.tex.level >= min_level && 73 surf->base.u.tex.level < min_level + num_levels) { 74 found = draw_aux_buffer_disabled[i] = true; 75 } 76 } 77 78 if (found) { 79 perf_debug(&ice->dbg, 80 "Disabling CCS because a renderbuffer is also bound %s.\n", 81 usage); 82 } 83 84 return found; 85} 86 87static void 88resolve_sampler_views(struct crocus_context *ice, 89 struct crocus_batch *batch, 90 struct crocus_shader_state *shs, 91 const struct shader_info *info, 92 bool *draw_aux_buffer_disabled, 93 bool consider_framebuffer) 94{ 95 uint32_t views = info ? (shs->bound_sampler_views & info->textures_used[0]) : 0; 96 97 while (views) { 98 const int i = u_bit_scan(&views); 99 struct crocus_sampler_view *isv = shs->textures[i]; 100 101 if (isv->res->base.b.target != PIPE_BUFFER) { 102 if (consider_framebuffer) { 103 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, isv->res, 104 isv->view.base_level, isv->view.levels, 105 "for sampling"); 106 } 107 108 crocus_resource_prepare_texture(ice, isv->res, isv->view.format, 109 isv->view.base_level, isv->view.levels, 110 isv->view.base_array_layer, 111 isv->view.array_len); 112 } 113 114 crocus_cache_flush_for_read(batch, isv->res->bo); 115 116 if (batch->screen->devinfo.ver == 7 && 117 (isv->base.format == PIPE_FORMAT_X24S8_UINT || 118 isv->base.format == PIPE_FORMAT_X32_S8X24_UINT || 119 isv->base.format == PIPE_FORMAT_S8_UINT)) { 120 struct crocus_resource *zres, *sres; 121 crocus_get_depth_stencil_resources(&batch->screen->devinfo, isv->base.texture, &zres, &sres); 122 crocus_update_stencil_shadow(ice, sres); 123 crocus_cache_flush_for_read(batch, sres->shadow->bo); 124 } 125 } 126} 127 128static void 129resolve_image_views(struct crocus_context *ice, 130 struct crocus_batch *batch, 131 struct crocus_shader_state *shs, 132 bool *draw_aux_buffer_disabled, 133 bool consider_framebuffer) 134{ 135 /* TODO: Consider images used by program */ 136 uint32_t views = shs->bound_image_views; 137 138 while (views) { 139 const int i = u_bit_scan(&views); 140 struct pipe_image_view *pview = &shs->image[i].base; 141 struct crocus_resource *res = (void *) pview->resource; 142 143 if (res->base.b.target != PIPE_BUFFER) { 144 if (consider_framebuffer) { 145 disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, 146 res, pview->u.tex.level, 1, 147 "as a shader image"); 148 } 149 150 unsigned num_layers = 151 pview->u.tex.last_layer - pview->u.tex.first_layer + 1; 152 153 /* The data port doesn't understand any compression */ 154 crocus_resource_prepare_access(ice, res, 155 pview->u.tex.level, 1, 156 pview->u.tex.first_layer, num_layers, 157 ISL_AUX_USAGE_NONE, false); 158 } 159 160 crocus_cache_flush_for_read(batch, res->bo); 161 } 162} 163 164static void 165crocus_update_align_res(struct crocus_batch *batch, 166 struct crocus_surface *surf, 167 bool copy_to_wa) 168{ 169 struct crocus_screen *screen = (struct crocus_screen *)batch->screen; 170 struct pipe_blit_info info = { 0 }; 171 172 info.src.resource = copy_to_wa ? surf->base.texture : surf->align_res; 173 info.src.level = copy_to_wa ? surf->base.u.tex.level : 0; 174 u_box_2d_zslice(0, 0, copy_to_wa ? surf->base.u.tex.first_layer : 0, 175 u_minify(surf->base.texture->width0, surf->base.u.tex.level), 176 u_minify(surf->base.texture->height0, surf->base.u.tex.level), &info.src.box); 177 info.src.format = surf->base.texture->format; 178 info.dst.resource = copy_to_wa ? surf->align_res : surf->base.texture; 179 info.dst.level = copy_to_wa ? 0 : surf->base.u.tex.level; 180 info.dst.box = info.src.box; 181 info.dst.box.z = copy_to_wa ? 0 : surf->base.u.tex.first_layer; 182 info.dst.format = surf->base.texture->format; 183 info.mask = util_format_is_depth_or_stencil(surf->base.texture->format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA; 184 info.filter = 0; 185 if (!screen->vtbl.blit_blt(batch, &info)) { 186 assert(0); 187 } 188} 189 190/** 191 * \brief Resolve buffers before drawing. 192 * 193 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each 194 * enabled depth texture, and flush the render cache for any dirty textures. 195 */ 196void 197crocus_predraw_resolve_inputs(struct crocus_context *ice, 198 struct crocus_batch *batch, 199 bool *draw_aux_buffer_disabled, 200 gl_shader_stage stage, 201 bool consider_framebuffer) 202{ 203 struct crocus_shader_state *shs = &ice->state.shaders[stage]; 204 const struct shader_info *info = crocus_get_shader_info(ice, stage); 205 206 uint64_t stage_dirty = (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage) | 207 (consider_framebuffer ? CROCUS_STAGE_DIRTY_BINDINGS_FS : 0); 208 209 if (ice->state.stage_dirty & stage_dirty) { 210 resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled, 211 consider_framebuffer); 212 resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled, 213 consider_framebuffer); 214 } 215} 216 217void 218crocus_predraw_resolve_framebuffer(struct crocus_context *ice, 219 struct crocus_batch *batch, 220 bool *draw_aux_buffer_disabled) 221{ 222 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 223 struct crocus_screen *screen = (void *) ice->ctx.screen; 224 struct intel_device_info *devinfo = &screen->devinfo; 225 struct crocus_uncompiled_shader *ish = 226 ice->shaders.uncompiled[MESA_SHADER_FRAGMENT]; 227 const nir_shader *nir = ish->nir; 228 229 if (ice->state.dirty & CROCUS_DIRTY_DEPTH_BUFFER) { 230 struct pipe_surface *zs_surf = cso_fb->zsbuf; 231 232 if (zs_surf) { 233 struct crocus_resource *z_res, *s_res; 234 crocus_get_depth_stencil_resources(devinfo, zs_surf->texture, &z_res, &s_res); 235 unsigned num_layers = 236 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 237 238 if (z_res) { 239 crocus_resource_prepare_render(ice, z_res, 240 zs_surf->u.tex.level, 241 zs_surf->u.tex.first_layer, 242 num_layers, ice->state.hiz_usage); 243 crocus_cache_flush_for_depth(batch, z_res->bo); 244 245 if (((struct crocus_surface *)zs_surf)->align_res) { 246 crocus_update_align_res(batch, (struct crocus_surface *)zs_surf, true); 247 } 248 } 249 250 if (s_res) { 251 crocus_cache_flush_for_depth(batch, s_res->bo); 252 } 253 } 254 } 255 256 if (nir->info.outputs_read != 0) { 257 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 258 if (cso_fb->cbufs[i]) { 259 struct crocus_surface *surf = (void *) cso_fb->cbufs[i]; 260 struct crocus_resource *res = (void *) cso_fb->cbufs[i]->texture; 261 262 crocus_resource_prepare_texture(ice, res, surf->view.format, 263 surf->view.base_level, 1, 264 surf->view.base_array_layer, 265 surf->view.array_len); 266 } 267 } 268 } 269 270 if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_FS) { 271 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 272 struct crocus_surface *surf = (void *) cso_fb->cbufs[i]; 273 if (!surf) 274 continue; 275 276 struct crocus_resource *res = (void *) surf->base.texture; 277 278 if (surf->align_res) 279 crocus_update_align_res(batch, surf, true); 280 281 enum isl_aux_usage aux_usage = 282 crocus_resource_render_aux_usage(ice, res, surf->view.base_level, 283 surf->view.format, 284 draw_aux_buffer_disabled[i]); 285 286 if (ice->state.draw_aux_usage[i] != aux_usage) { 287 ice->state.draw_aux_usage[i] = aux_usage; 288 /* XXX: Need to track which bindings to make dirty */ 289 ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS; 290 } 291 292 crocus_resource_prepare_render(ice, res, surf->view.base_level, 293 surf->view.base_array_layer, 294 surf->view.array_len, 295 aux_usage); 296 297 crocus_cache_flush_for_render(batch, res->bo, surf->view.format, 298 aux_usage); 299 } 300 } 301} 302 303/** 304 * \brief Call this after drawing to mark which buffers need resolving 305 * 306 * If the depth buffer was written to and if it has an accompanying HiZ 307 * buffer, then mark that it needs a depth resolve. 308 * 309 * If the color buffer is a multisample window system buffer, then 310 * mark that it needs a downsample. 311 * 312 * Also mark any render targets which will be textured as needing a render 313 * cache flush. 314 */ 315void 316crocus_postdraw_update_resolve_tracking(struct crocus_context *ice, 317 struct crocus_batch *batch) 318{ 319 struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer; 320 struct crocus_screen *screen = (void *) ice->ctx.screen; 321 struct intel_device_info *devinfo = &screen->devinfo; 322 // XXX: front buffer drawing? 323 324 bool may_have_resolved_depth = 325 ice->state.dirty & (CROCUS_DIRTY_DEPTH_BUFFER | 326 CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL); 327 328 struct pipe_surface *zs_surf = cso_fb->zsbuf; 329 if (zs_surf) { 330 struct crocus_resource *z_res, *s_res; 331 crocus_get_depth_stencil_resources(devinfo, zs_surf->texture, &z_res, &s_res); 332 unsigned num_layers = 333 zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1; 334 335 if (z_res) { 336 if (may_have_resolved_depth && ice->state.depth_writes_enabled) { 337 crocus_resource_finish_render(ice, z_res, zs_surf->u.tex.level, 338 zs_surf->u.tex.first_layer, num_layers, 339 ice->state.hiz_usage); 340 } 341 342 if (ice->state.depth_writes_enabled) 343 crocus_depth_cache_add_bo(batch, z_res->bo); 344 345 if (((struct crocus_surface *)zs_surf)->align_res) { 346 crocus_update_align_res(batch, (struct crocus_surface *)zs_surf, false); 347 } 348 } 349 350 if (s_res) { 351 if (may_have_resolved_depth && ice->state.stencil_writes_enabled) { 352 crocus_resource_finish_write(ice, s_res, zs_surf->u.tex.level, 353 zs_surf->u.tex.first_layer, num_layers, 354 s_res->aux.usage); 355 } 356 357 if (ice->state.stencil_writes_enabled) 358 crocus_depth_cache_add_bo(batch, s_res->bo); 359 } 360 } 361 362 bool may_have_resolved_color = 363 ice->state.stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_FS; 364 365 for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) { 366 struct crocus_surface *surf = (void *) cso_fb->cbufs[i]; 367 if (!surf) 368 continue; 369 370 if (surf->align_res) 371 crocus_update_align_res(batch, surf, false); 372 struct crocus_resource *res = (void *) surf->base.texture; 373 enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i]; 374 375 crocus_render_cache_add_bo(batch, res->bo, surf->view.format, 376 aux_usage); 377 378 if (may_have_resolved_color) { 379 union pipe_surface_desc *desc = &surf->base.u; 380 unsigned num_layers = 381 desc->tex.last_layer - desc->tex.first_layer + 1; 382 crocus_resource_finish_render(ice, res, desc->tex.level, 383 desc->tex.first_layer, num_layers, 384 aux_usage); 385 } 386 } 387} 388 389/** 390 * Clear the cache-tracking sets. 391 */ 392void 393crocus_cache_sets_clear(struct crocus_batch *batch) 394{ 395 hash_table_foreach(batch->cache.render, render_entry) 396 _mesa_hash_table_remove(batch->cache.render, render_entry); 397 398 set_foreach(batch->cache.depth, depth_entry) 399 _mesa_set_remove(batch->cache.depth, depth_entry); 400} 401 402/** 403 * Emits an appropriate flush for a BO if it has been rendered to within the 404 * same batchbuffer as a read that's about to be emitted. 405 * 406 * The GPU has separate, incoherent caches for the render cache and the 407 * sampler cache, along with other caches. Usually data in the different 408 * caches don't interact (e.g. we don't render to our driver-generated 409 * immediate constant data), but for render-to-texture in FBOs we definitely 410 * do. When a batchbuffer is flushed, the kernel will ensure that everything 411 * necessary is flushed before another use of that BO, but for reuse from 412 * different caches within a batchbuffer, it's all our responsibility. 413 */ 414void 415crocus_flush_depth_and_render_caches(struct crocus_batch *batch) 416{ 417 const struct intel_device_info *devinfo = &batch->screen->devinfo; 418 if (devinfo->ver >= 6) { 419 crocus_emit_pipe_control_flush(batch, 420 "cache tracker: render-to-texture", 421 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 422 PIPE_CONTROL_RENDER_TARGET_FLUSH | 423 PIPE_CONTROL_CS_STALL); 424 425 crocus_emit_pipe_control_flush(batch, 426 "cache tracker: render-to-texture", 427 PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE | 428 PIPE_CONTROL_CONST_CACHE_INVALIDATE); 429 } else { 430 crocus_emit_mi_flush(batch); 431 } 432 433 crocus_cache_sets_clear(batch); 434} 435 436void 437crocus_cache_flush_for_read(struct crocus_batch *batch, 438 struct crocus_bo *bo) 439{ 440 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) || 441 _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) 442 crocus_flush_depth_and_render_caches(batch); 443} 444 445static void * 446format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage) 447{ 448 return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage); 449} 450 451void 452crocus_cache_flush_for_render(struct crocus_batch *batch, 453 struct crocus_bo *bo, 454 enum isl_format format, 455 enum isl_aux_usage aux_usage) 456{ 457 if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo)) 458 crocus_flush_depth_and_render_caches(batch); 459 460 /* Check to see if this bo has been used by a previous rendering operation 461 * but with a different format or aux usage. If it has, flush the render 462 * cache so we ensure that it's only in there with one format or aux usage 463 * at a time. 464 * 465 * Even though it's not obvious, this can easily happen in practice. 466 * Suppose a client is blending on a surface with sRGB encode enabled on 467 * gen9. This implies that you get AUX_USAGE_CCS_D at best. If the client 468 * then disables sRGB decode and continues blending we will flip on 469 * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is 470 * perfectly valid since CCS_E is a subset of CCS_D). However, this means 471 * that we have fragments in-flight which are rendering with UNORM+CCS_E 472 * and other fragments in-flight with SRGB+CCS_D on the same surface at the 473 * same time and the pixel scoreboard and color blender are trying to sort 474 * it all out. This ends badly (i.e. GPU hangs). 475 * 476 * To date, we have never observed GPU hangs or even corruption to be 477 * associated with switching the format, only the aux usage. However, 478 * there are comments in various docs which indicate that the render cache 479 * isn't 100% resilient to format changes. We may as well be conservative 480 * and flush on format changes too. We can always relax this later if we 481 * find it to be a performance problem. 482 */ 483 struct hash_entry *entry = 484 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 485 if (entry && entry->data != format_aux_tuple(format, aux_usage)) 486 crocus_flush_depth_and_render_caches(batch); 487} 488 489void 490crocus_render_cache_add_bo(struct crocus_batch *batch, 491 struct crocus_bo *bo, 492 enum isl_format format, 493 enum isl_aux_usage aux_usage) 494{ 495#ifndef NDEBUG 496 struct hash_entry *entry = 497 _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo); 498 if (entry) { 499 /* Otherwise, someone didn't do a flush_for_render and that would be 500 * very bad indeed. 501 */ 502 assert(entry->data == format_aux_tuple(format, aux_usage)); 503 } 504#endif 505 506 _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo, 507 format_aux_tuple(format, aux_usage)); 508} 509 510void 511crocus_cache_flush_for_depth(struct crocus_batch *batch, 512 struct crocus_bo *bo) 513{ 514 if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo)) 515 crocus_flush_depth_and_render_caches(batch); 516} 517 518void 519crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo) 520{ 521 _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo); 522} 523 524static void 525crocus_resolve_color(struct crocus_context *ice, 526 struct crocus_batch *batch, 527 struct crocus_resource *res, 528 unsigned level, unsigned layer, 529 enum isl_aux_op resolve_op) 530{ 531 struct crocus_screen *screen = batch->screen; 532 DBG("%s to res %p level %u layer %u\n", __func__, res, level, layer); 533 534 struct blorp_surf surf; 535 crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf, 536 &res->base.b, res->aux.usage, level, true); 537 538 crocus_batch_maybe_flush(batch, 1500); 539 540 /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)": 541 * 542 * "Any transition from any value in {Clear, Render, Resolve} to a 543 * different value in {Clear, Render, Resolve} requires end of pipe 544 * synchronization." 545 * 546 * In other words, fast clear ops are not properly synchronized with 547 * other drawing. We need to use a PIPE_CONTROL to ensure that the 548 * contents of the previous draw hit the render target before we resolve 549 * and again afterwards to ensure that the resolve is complete before we 550 * do any more regular drawing. 551 */ 552 crocus_emit_end_of_pipe_sync(batch, "color resolve: pre-flush", 553 PIPE_CONTROL_RENDER_TARGET_FLUSH); 554 555 struct blorp_batch blorp_batch; 556 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 557 blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1, 558 isl_format_srgb_to_linear(res->surf.format), 559 resolve_op); 560 blorp_batch_finish(&blorp_batch); 561 562 /* See comment above */ 563 crocus_emit_end_of_pipe_sync(batch, "color resolve: post-flush", 564 PIPE_CONTROL_RENDER_TARGET_FLUSH); 565} 566 567static void 568crocus_mcs_partial_resolve(struct crocus_context *ice, 569 struct crocus_batch *batch, 570 struct crocus_resource *res, 571 uint32_t start_layer, 572 uint32_t num_layers) 573{ 574 struct crocus_screen *screen = batch->screen; 575 576 DBG("%s to res %p layers %u-%u\n", __func__, res, 577 start_layer, start_layer + num_layers - 1); 578 579 assert(isl_aux_usage_has_mcs(res->aux.usage)); 580 581 struct blorp_surf surf; 582 crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf, 583 &res->base.b, res->aux.usage, 0, true); 584 585 struct blorp_batch blorp_batch; 586 blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0); 587 blorp_mcs_partial_resolve(&blorp_batch, &surf, 588 isl_format_srgb_to_linear(res->surf.format), 589 start_layer, num_layers); 590 blorp_batch_finish(&blorp_batch); 591} 592 593/** 594 * Perform a HiZ or depth resolve operation. 595 * 596 * For an overview of HiZ ops, see the following sections of the Sandy Bridge 597 * PRM, Volume 1, Part 2: 598 * - 7.5.3.1 Depth Buffer Clear 599 * - 7.5.3.2 Depth Buffer Resolve 600 * - 7.5.3.3 Hierarchical Depth Buffer Resolve 601 */ 602void 603crocus_hiz_exec(struct crocus_context *ice, 604 struct crocus_batch *batch, 605 struct crocus_resource *res, 606 unsigned int level, unsigned int start_layer, 607 unsigned int num_layers, enum isl_aux_op op, 608 bool update_clear_depth) 609{ 610 struct crocus_screen *screen = batch->screen; 611 const struct intel_device_info *devinfo = &batch->screen->devinfo; 612 assert(crocus_resource_level_has_hiz(res, level)); 613 assert(op != ISL_AUX_OP_NONE); 614 UNUSED const char *name = NULL; 615 616 switch (op) { 617 case ISL_AUX_OP_FULL_RESOLVE: 618 name = "depth resolve"; 619 break; 620 case ISL_AUX_OP_AMBIGUATE: 621 name = "hiz ambiguate"; 622 break; 623 case ISL_AUX_OP_FAST_CLEAR: 624 name = "depth clear"; 625 break; 626 case ISL_AUX_OP_PARTIAL_RESOLVE: 627 case ISL_AUX_OP_NONE: 628 unreachable("Invalid HiZ op"); 629 } 630 631 DBG("%s %s to res %p level %d layers %d-%d\n", 632 __func__, name, res, level, start_layer, start_layer + num_layers - 1); 633 634 /* The following stalls and flushes are only documented to be required 635 * for HiZ clear operations. However, they also seem to be required for 636 * resolve operations. 637 * 638 * From the Ivybridge PRM, volume 2, "Depth Buffer Clear": 639 * 640 * "If other rendering operations have preceded this clear, a 641 * PIPE_CONTROL with depth cache flush enabled, Depth Stall bit 642 * enabled must be issued before the rectangle primitive used for 643 * the depth buffer clear operation." 644 * 645 * Same applies for Gen8 and Gen9. 646 * 647 * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1 648 * PIPE_CONTROL, Depth Cache Flush Enable: 649 * 650 * "This bit must not be set when Depth Stall Enable bit is set in 651 * this packet." 652 * 653 * This is confirmed to hold for real, Haswell gets immediate gpu hangs. 654 * 655 * Therefore issue two pipe control flushes, one for cache flush and 656 * another for depth stall. 657 */ 658 if (devinfo->ver == 6) { 659 /* From the Sandy Bridge PRM, volume 2 part 1, page 313: 660 * 661 * "If other rendering operations have preceded this clear, a 662 * PIPE_CONTROL with write cache flush enabled and Z-inhibit 663 * disabled must be issued before the rectangle primitive used for 664 * the depth buffer clear operation. 665 */ 666 crocus_emit_pipe_control_flush(batch, 667 "hiz op: pre-flushes (1)", 668 PIPE_CONTROL_RENDER_TARGET_FLUSH | 669 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 670 PIPE_CONTROL_CS_STALL); 671 } else if (devinfo->ver >= 7) { 672 crocus_emit_pipe_control_flush(batch, 673 "hiz op: pre-flushes (1/2)", 674 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 675 PIPE_CONTROL_CS_STALL); 676 crocus_emit_pipe_control_flush(batch, "hiz op: pre-flushes (2/2)", 677 PIPE_CONTROL_DEPTH_STALL); 678 } 679 680 assert(isl_aux_usage_has_hiz(res->aux.usage) && res->aux.bo); 681 682 crocus_batch_maybe_flush(batch, 1500); 683 684 struct blorp_surf surf; 685 crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf, 686 &res->base.b, res->aux.usage, level, true); 687 688 struct blorp_batch blorp_batch; 689 enum blorp_batch_flags flags = 0; 690 flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR; 691 blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags); 692 blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op); 693 blorp_batch_finish(&blorp_batch); 694 695 /* The following stalls and flushes are only documented to be required 696 * for HiZ clear operations. However, they also seem to be required for 697 * resolve operations. 698 * 699 * From the Broadwell PRM, volume 7, "Depth Buffer Clear": 700 * 701 * "Depth buffer clear pass using any of the methods (WM_STATE, 702 * 3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a 703 * PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits 704 * "set" before starting to render. DepthStall and DepthFlush are 705 * not needed between consecutive depth clear passes nor is it 706 * required if the depth clear pass was done with 707 * 'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP." 708 * 709 * TODO: Such as the spec says, this could be conditional. 710 */ 711 if (devinfo->ver == 6) { 712 /* From the Sandy Bridge PRM, volume 2 part 1, page 314: 713 * 714 * "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be 715 * followed by a PIPE_CONTROL command with DEPTH_STALL bit set 716 * and Then followed by Depth FLUSH' 717 */ 718 crocus_emit_pipe_control_flush(batch, 719 "hiz op: post-flushes (1/2)", 720 PIPE_CONTROL_DEPTH_STALL); 721 722 crocus_emit_pipe_control_flush(batch, 723 "hiz op: post-flushes (2/2)", 724 PIPE_CONTROL_DEPTH_CACHE_FLUSH | 725 PIPE_CONTROL_CS_STALL); 726 } 727} 728 729/** 730 * Does the resource's slice have hiz enabled? 731 */ 732bool 733crocus_resource_level_has_hiz(const struct crocus_resource *res, uint32_t level) 734{ 735 crocus_resource_check_level_layer(res, level, 0); 736 return res->aux.has_hiz & 1 << level; 737} 738 739static bool 740crocus_resource_level_has_aux(const struct crocus_resource *res, uint32_t level) 741{ 742 if (isl_aux_usage_has_hiz(res->aux.usage)) 743 return crocus_resource_level_has_hiz(res, level); 744 else 745 return level < res->aux.surf.levels; 746} 747 748/** \brief Assert that the level and layer are valid for the resource. */ 749void 750crocus_resource_check_level_layer(UNUSED const struct crocus_resource *res, 751 UNUSED uint32_t level, UNUSED uint32_t layer) 752{ 753 assert(level < res->surf.levels); 754 assert(layer < util_num_layers(&res->base.b, level)); 755} 756 757static inline uint32_t 758miptree_level_range_length(const struct crocus_resource *res, 759 uint32_t start_level, uint32_t num_levels) 760{ 761 assert(start_level < res->surf.levels); 762 763 if (num_levels == INTEL_REMAINING_LAYERS) 764 num_levels = res->surf.levels; 765 766 /* Check for overflow */ 767 assert(start_level + num_levels >= start_level); 768 assert(start_level + num_levels <= res->surf.levels); 769 770 return num_levels; 771} 772 773static inline uint32_t 774miptree_layer_range_length(const struct crocus_resource *res, uint32_t level, 775 uint32_t start_layer, uint32_t num_layers) 776{ 777 assert(level <= res->base.b.last_level); 778 779 const uint32_t total_num_layers = crocus_get_num_logical_layers(res, level); 780 assert(start_layer < total_num_layers); 781 if (num_layers == INTEL_REMAINING_LAYERS) 782 num_layers = total_num_layers - start_layer; 783 /* Check for overflow */ 784 assert(start_layer + num_layers >= start_layer); 785 assert(start_layer + num_layers <= total_num_layers); 786 787 return num_layers; 788} 789 790bool 791crocus_has_invalid_primary(const struct crocus_resource *res, 792 unsigned start_level, unsigned num_levels, 793 unsigned start_layer, unsigned num_layers) 794{ 795 if (!res->aux.bo) 796 return false; 797 798 /* Clamp the level range to fit the resource */ 799 num_levels = miptree_level_range_length(res, start_level, num_levels); 800 801 for (uint32_t l = 0; l < num_levels; l++) { 802 const uint32_t level = start_level + l; 803 if (!crocus_resource_level_has_aux(res, level)) 804 continue; 805 806 const uint32_t level_layers = 807 miptree_layer_range_length(res, level, start_layer, num_layers); 808 for (unsigned a = 0; a < level_layers; a++) { 809 enum isl_aux_state aux_state = 810 crocus_resource_get_aux_state(res, level, start_layer + a); 811 if (!isl_aux_state_has_valid_primary(aux_state)) 812 return true; 813 } 814 } 815 816 return false; 817} 818 819void 820crocus_resource_prepare_access(struct crocus_context *ice, 821 struct crocus_resource *res, 822 uint32_t start_level, uint32_t num_levels, 823 uint32_t start_layer, uint32_t num_layers, 824 enum isl_aux_usage aux_usage, 825 bool fast_clear_supported) 826{ 827 if (!res->aux.bo) 828 return; 829 830 /* We can't do resolves on the compute engine, so awkwardly, we have to 831 * do them on the render batch... 832 */ 833 struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER]; 834 835 const uint32_t clamped_levels = 836 miptree_level_range_length(res, start_level, num_levels); 837 for (uint32_t l = 0; l < clamped_levels; l++) { 838 const uint32_t level = start_level + l; 839 if (!crocus_resource_level_has_aux(res, level)) 840 continue; 841 842 const uint32_t level_layers = 843 miptree_layer_range_length(res, level, start_layer, num_layers); 844 for (uint32_t a = 0; a < level_layers; a++) { 845 const uint32_t layer = start_layer + a; 846 const enum isl_aux_state aux_state = 847 crocus_resource_get_aux_state(res, level, layer); 848 const enum isl_aux_op aux_op = 849 isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported); 850 851 /* Prepare the aux buffer for a conditional or unconditional access. 852 * A conditional access is handled by assuming that the access will 853 * not evaluate to a no-op. If the access does in fact occur, the aux 854 * will be in the required state. If it does not, no data is lost 855 * because the aux_op performed is lossless. 856 */ 857 if (aux_op == ISL_AUX_OP_NONE) { 858 /* Nothing to do here. */ 859 } else if (isl_aux_usage_has_mcs(res->aux.usage)) { 860 assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE); 861 crocus_mcs_partial_resolve(ice, batch, res, layer, 1); 862 } else if (isl_aux_usage_has_hiz(res->aux.usage)) { 863 crocus_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false); 864 } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) { 865 unreachable("crocus doesn't resolve STC_CCS resources"); 866 } else { 867 assert(isl_aux_usage_has_ccs(res->aux.usage)); 868 crocus_resolve_color(ice, batch, res, level, layer, aux_op); 869 } 870 871 const enum isl_aux_state new_state = 872 isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op); 873 crocus_resource_set_aux_state(ice, res, level, layer, 1, new_state); 874 } 875 } 876} 877 878void 879crocus_resource_finish_write(struct crocus_context *ice, 880 struct crocus_resource *res, uint32_t level, 881 uint32_t start_layer, uint32_t num_layers, 882 enum isl_aux_usage aux_usage) 883{ 884 if (res->base.b.format == PIPE_FORMAT_S8_UINT) 885 res->shadow_needs_update = true; 886 887 if (!crocus_resource_level_has_aux(res, level)) 888 return; 889 890 const uint32_t level_layers = 891 miptree_layer_range_length(res, level, start_layer, num_layers); 892 893 for (uint32_t a = 0; a < level_layers; a++) { 894 const uint32_t layer = start_layer + a; 895 const enum isl_aux_state aux_state = 896 crocus_resource_get_aux_state(res, level, layer); 897 898 /* Transition the aux state for a conditional or unconditional write. A 899 * conditional write is handled by assuming that the write applies to 900 * only part of the render target. This prevents the new state from 901 * losing the types of compression that might exist in the current state 902 * (e.g. CLEAR). If the write evaluates to a no-op, the state will still 903 * be able to communicate when resolves are necessary (but it may 904 * falsely communicate this as well). 905 */ 906 const enum isl_aux_state new_aux_state = 907 isl_aux_state_transition_write(aux_state, aux_usage, false); 908 909 crocus_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state); 910 } 911} 912 913enum isl_aux_state 914crocus_resource_get_aux_state(const struct crocus_resource *res, 915 uint32_t level, uint32_t layer) 916{ 917 crocus_resource_check_level_layer(res, level, layer); 918 assert(crocus_resource_level_has_aux(res, level)); 919 920 return res->aux.state[level][layer]; 921} 922 923void 924crocus_resource_set_aux_state(struct crocus_context *ice, 925 struct crocus_resource *res, uint32_t level, 926 uint32_t start_layer, uint32_t num_layers, 927 enum isl_aux_state aux_state) 928{ 929 assert(crocus_resource_level_has_aux(res, level)); 930 931 num_layers = miptree_layer_range_length(res, level, start_layer, num_layers); 932 for (unsigned a = 0; a < num_layers; a++) { 933 if (res->aux.state[level][start_layer + a] != aux_state) { 934 res->aux.state[level][start_layer + a] = aux_state; 935 ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES | 936 CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES; 937 /* XXX: Need to track which bindings to make dirty */ 938 ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS; 939 } 940 } 941} 942 943static bool 944isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b) 945{ 946 /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear 947 * values so sRGB curve application was a no-op for all fast-clearable 948 * formats. 949 * 950 * On gen9+, the hardware supports arbitrary clear values. For sRGB clear 951 * values, the hardware interprets the floats, not as what would be 952 * returned from the sampler (or written by the shader), but as being 953 * between format conversion and sRGB curve application. This means that 954 * we can switch between sRGB and UNORM without having to whack the clear 955 * color. 956 */ 957 return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b); 958} 959 960void 961crocus_resource_prepare_texture(struct crocus_context *ice, 962 struct crocus_resource *res, 963 enum isl_format view_format, 964 uint32_t start_level, uint32_t num_levels, 965 uint32_t start_layer, uint32_t num_layers) 966{ 967 enum isl_aux_usage aux_usage = 968 crocus_resource_texture_aux_usage(res); 969 970 bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE; 971 972 /* Clear color is specified as ints or floats and the conversion is done by 973 * the sampler. If we have a texture view, we would have to perform the 974 * clear color conversion manually. Just disable clear color. 975 */ 976 if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format)) 977 clear_supported = false; 978 979 crocus_resource_prepare_access(ice, res, start_level, num_levels, 980 start_layer, num_layers, 981 aux_usage, clear_supported); 982} 983 984enum isl_aux_usage 985crocus_resource_render_aux_usage(struct crocus_context *ice, 986 struct crocus_resource *res, 987 uint32_t level, 988 enum isl_format render_format, 989 bool draw_aux_disabled) 990{ 991 struct crocus_screen *screen = (void *) ice->ctx.screen; 992 struct intel_device_info *devinfo = &screen->devinfo; 993 994 if (draw_aux_disabled) 995 return ISL_AUX_USAGE_NONE; 996 997 switch (res->aux.usage) { 998 case ISL_AUX_USAGE_MCS: 999 return res->aux.usage; 1000 1001 case ISL_AUX_USAGE_CCS_D: 1002 /* Otherwise, we try to fall back to CCS_D */ 1003 if (isl_format_supports_ccs_d(devinfo, render_format)) 1004 return ISL_AUX_USAGE_CCS_D; 1005 1006 return ISL_AUX_USAGE_NONE; 1007 1008 case ISL_AUX_USAGE_HIZ: 1009 assert(render_format == res->surf.format); 1010 return crocus_resource_level_has_hiz(res, level) ? 1011 res->aux.usage : ISL_AUX_USAGE_NONE; 1012 1013 default: 1014 return ISL_AUX_USAGE_NONE; 1015 } 1016} 1017 1018void 1019crocus_resource_prepare_render(struct crocus_context *ice, 1020 struct crocus_resource *res, uint32_t level, 1021 uint32_t start_layer, uint32_t layer_count, 1022 enum isl_aux_usage aux_usage) 1023{ 1024 crocus_resource_prepare_access(ice, res, level, 1, start_layer, 1025 layer_count, aux_usage, 1026 aux_usage != ISL_AUX_USAGE_NONE); 1027} 1028 1029void 1030crocus_resource_finish_render(struct crocus_context *ice, 1031 struct crocus_resource *res, uint32_t level, 1032 uint32_t start_layer, uint32_t layer_count, 1033 enum isl_aux_usage aux_usage) 1034{ 1035 crocus_resource_finish_write(ice, res, level, start_layer, layer_count, 1036 aux_usage); 1037} 1038 1039static void 1040crocus_update_stencil_shadow(struct crocus_context *ice, 1041 struct crocus_resource *res) 1042{ 1043 struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen; 1044 UNUSED const struct intel_device_info *devinfo = &screen->devinfo; 1045 assert(devinfo->ver == 7); 1046 1047 if (!res->shadow_needs_update) 1048 return; 1049 1050 struct pipe_box box; 1051 for (unsigned level = 0; level <= res->base.b.last_level; level++) { 1052 u_box_2d(0, 0, 1053 u_minify(res->base.b.width0, level), 1054 u_minify(res->base.b.height0, level), &box); 1055 const unsigned depth = res->base.b.target == PIPE_TEXTURE_3D ? 1056 u_minify(res->base.b.depth0, level) : res->base.b.array_size; 1057 1058 for (unsigned layer = 0; layer < depth; layer++) { 1059 box.z = layer; 1060 ice->ctx.resource_copy_region(&ice->ctx, 1061 &res->shadow->base.b, level, 0, 0, layer, 1062 &res->base.b, level, &box); 1063 } 1064 } 1065 res->shadow_needs_update = false; 1066} 1067