1/*
2 * Copyright © 2017 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included
12 * in all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
17 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
20 * DEALINGS IN THE SOFTWARE.
21 */
22
23/**
24 * @file crocus_resolve.c
25 *
26 * This file handles resolve tracking for main and auxiliary surfaces.
27 *
28 * It also handles our cache tracking.  We have sets for the render cache,
29 * depth cache, and so on.  If a BO is in a cache's set, then it may have
30 * data in that cache.  The helpers take care of emitting flushes for
31 * render-to-texture, format reinterpretation issues, and other situations.
32 */
33
34#include "util/hash_table.h"
35#include "util/set.h"
36#include "crocus_context.h"
37#include "compiler/nir/nir.h"
38
39#define FILE_DEBUG_FLAG DEBUG_BLORP
40
41static void
42crocus_update_stencil_shadow(struct crocus_context *ice,
43                             struct crocus_resource *res);
44/**
45 * Disable auxiliary buffers if a renderbuffer is also bound as a texture
46 * or shader image.  This causes a self-dependency, where both rendering
47 * and sampling may concurrently read or write the CCS buffer, causing
48 * incorrect pixels.
49 */
50static bool
51disable_rb_aux_buffer(struct crocus_context *ice,
52                      bool *draw_aux_buffer_disabled,
53                      struct crocus_resource *tex_res,
54                      unsigned min_level, unsigned num_levels,
55                      const char *usage)
56{
57   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
58   bool found = false;
59
60   /* We only need to worry about fast clears. */
61   if (tex_res->aux.usage != ISL_AUX_USAGE_CCS_D)
62      return false;
63
64   for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
65      struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
66      if (!surf)
67         continue;
68
69      struct crocus_resource *rb_res = (void *) surf->base.texture;
70
71      if (rb_res->bo == tex_res->bo &&
72          surf->base.u.tex.level >= min_level &&
73          surf->base.u.tex.level < min_level + num_levels) {
74         found = draw_aux_buffer_disabled[i] = true;
75      }
76   }
77
78   if (found) {
79      perf_debug(&ice->dbg,
80                 "Disabling CCS because a renderbuffer is also bound %s.\n",
81                 usage);
82   }
83
84   return found;
85}
86
87static void
88resolve_sampler_views(struct crocus_context *ice,
89                      struct crocus_batch *batch,
90                      struct crocus_shader_state *shs,
91                      const struct shader_info *info,
92                      bool *draw_aux_buffer_disabled,
93                      bool consider_framebuffer)
94{
95   uint32_t views = info ? (shs->bound_sampler_views & info->textures_used[0]) : 0;
96
97   while (views) {
98      const int i = u_bit_scan(&views);
99      struct crocus_sampler_view *isv = shs->textures[i];
100
101      if (isv->res->base.b.target != PIPE_BUFFER) {
102         if (consider_framebuffer) {
103            disable_rb_aux_buffer(ice, draw_aux_buffer_disabled, isv->res,
104                                  isv->view.base_level, isv->view.levels,
105                                  "for sampling");
106         }
107
108         crocus_resource_prepare_texture(ice, isv->res, isv->view.format,
109                                         isv->view.base_level, isv->view.levels,
110                                         isv->view.base_array_layer,
111                                         isv->view.array_len);
112      }
113
114      crocus_cache_flush_for_read(batch, isv->res->bo);
115
116      if (batch->screen->devinfo.ver == 7 &&
117          (isv->base.format == PIPE_FORMAT_X24S8_UINT ||
118           isv->base.format == PIPE_FORMAT_X32_S8X24_UINT ||
119           isv->base.format == PIPE_FORMAT_S8_UINT)) {
120         struct crocus_resource *zres, *sres;
121         crocus_get_depth_stencil_resources(&batch->screen->devinfo, isv->base.texture, &zres, &sres);
122         crocus_update_stencil_shadow(ice, sres);
123         crocus_cache_flush_for_read(batch, sres->shadow->bo);
124      }
125   }
126}
127
128static void
129resolve_image_views(struct crocus_context *ice,
130                    struct crocus_batch *batch,
131                    struct crocus_shader_state *shs,
132                    bool *draw_aux_buffer_disabled,
133                    bool consider_framebuffer)
134{
135   /* TODO: Consider images used by program */
136   uint32_t views = shs->bound_image_views;
137
138   while (views) {
139      const int i = u_bit_scan(&views);
140      struct pipe_image_view *pview = &shs->image[i].base;
141      struct crocus_resource *res = (void *) pview->resource;
142
143      if (res->base.b.target != PIPE_BUFFER) {
144         if (consider_framebuffer) {
145            disable_rb_aux_buffer(ice, draw_aux_buffer_disabled,
146                                  res, pview->u.tex.level, 1,
147                                  "as a shader image");
148         }
149
150         unsigned num_layers =
151            pview->u.tex.last_layer - pview->u.tex.first_layer + 1;
152
153         /* The data port doesn't understand any compression */
154         crocus_resource_prepare_access(ice, res,
155                                        pview->u.tex.level, 1,
156                                        pview->u.tex.first_layer, num_layers,
157                                        ISL_AUX_USAGE_NONE, false);
158      }
159
160      crocus_cache_flush_for_read(batch, res->bo);
161   }
162}
163
164static void
165crocus_update_align_res(struct crocus_batch *batch,
166                        struct crocus_surface *surf,
167                        bool copy_to_wa)
168{
169   struct crocus_screen *screen = (struct crocus_screen *)batch->screen;
170   struct pipe_blit_info info = { 0 };
171
172   info.src.resource = copy_to_wa ? surf->base.texture : surf->align_res;
173   info.src.level = copy_to_wa ? surf->base.u.tex.level : 0;
174   u_box_2d_zslice(0, 0, copy_to_wa ? surf->base.u.tex.first_layer : 0,
175                   u_minify(surf->base.texture->width0, surf->base.u.tex.level),
176                   u_minify(surf->base.texture->height0, surf->base.u.tex.level), &info.src.box);
177   info.src.format = surf->base.texture->format;
178   info.dst.resource = copy_to_wa ? surf->align_res : surf->base.texture;
179   info.dst.level = copy_to_wa ? 0 : surf->base.u.tex.level;
180   info.dst.box = info.src.box;
181   info.dst.box.z = copy_to_wa ? 0 : surf->base.u.tex.first_layer;
182   info.dst.format = surf->base.texture->format;
183   info.mask = util_format_is_depth_or_stencil(surf->base.texture->format) ? PIPE_MASK_ZS : PIPE_MASK_RGBA;
184   info.filter = 0;
185   if (!screen->vtbl.blit_blt(batch, &info)) {
186      assert(0);
187   }
188}
189
190/**
191 * \brief Resolve buffers before drawing.
192 *
193 * Resolve the depth buffer's HiZ buffer, resolve the depth buffer of each
194 * enabled depth texture, and flush the render cache for any dirty textures.
195 */
196void
197crocus_predraw_resolve_inputs(struct crocus_context *ice,
198                              struct crocus_batch *batch,
199                              bool *draw_aux_buffer_disabled,
200                              gl_shader_stage stage,
201                              bool consider_framebuffer)
202{
203   struct crocus_shader_state *shs = &ice->state.shaders[stage];
204   const struct shader_info *info = crocus_get_shader_info(ice, stage);
205
206   uint64_t stage_dirty = (CROCUS_STAGE_DIRTY_BINDINGS_VS << stage) |
207      (consider_framebuffer ? CROCUS_STAGE_DIRTY_BINDINGS_FS : 0);
208
209   if (ice->state.stage_dirty & stage_dirty) {
210      resolve_sampler_views(ice, batch, shs, info, draw_aux_buffer_disabled,
211                            consider_framebuffer);
212      resolve_image_views(ice, batch, shs, draw_aux_buffer_disabled,
213                          consider_framebuffer);
214   }
215}
216
217void
218crocus_predraw_resolve_framebuffer(struct crocus_context *ice,
219                                   struct crocus_batch *batch,
220                                   bool *draw_aux_buffer_disabled)
221{
222   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
223   struct crocus_screen *screen = (void *) ice->ctx.screen;
224   struct intel_device_info *devinfo = &screen->devinfo;
225   struct crocus_uncompiled_shader *ish =
226      ice->shaders.uncompiled[MESA_SHADER_FRAGMENT];
227   const nir_shader *nir = ish->nir;
228
229   if (ice->state.dirty & CROCUS_DIRTY_DEPTH_BUFFER) {
230      struct pipe_surface *zs_surf = cso_fb->zsbuf;
231
232      if (zs_surf) {
233         struct crocus_resource *z_res, *s_res;
234         crocus_get_depth_stencil_resources(devinfo, zs_surf->texture, &z_res, &s_res);
235         unsigned num_layers =
236            zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
237
238         if (z_res) {
239            crocus_resource_prepare_render(ice, z_res,
240                                           zs_surf->u.tex.level,
241                                           zs_surf->u.tex.first_layer,
242                                           num_layers, ice->state.hiz_usage);
243            crocus_cache_flush_for_depth(batch, z_res->bo);
244
245            if (((struct crocus_surface *)zs_surf)->align_res) {
246               crocus_update_align_res(batch, (struct crocus_surface *)zs_surf, true);
247            }
248         }
249
250         if (s_res) {
251            crocus_cache_flush_for_depth(batch, s_res->bo);
252         }
253      }
254   }
255
256   if (nir->info.outputs_read != 0) {
257      for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
258         if (cso_fb->cbufs[i]) {
259            struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
260            struct crocus_resource *res = (void *) cso_fb->cbufs[i]->texture;
261
262            crocus_resource_prepare_texture(ice, res, surf->view.format,
263                                            surf->view.base_level, 1,
264                                            surf->view.base_array_layer,
265                                            surf->view.array_len);
266         }
267      }
268   }
269
270   if (ice->state.stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_FS) {
271      for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
272         struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
273         if (!surf)
274            continue;
275
276         struct crocus_resource *res = (void *) surf->base.texture;
277
278         if (surf->align_res)
279            crocus_update_align_res(batch, surf, true);
280
281         enum isl_aux_usage aux_usage =
282            crocus_resource_render_aux_usage(ice, res, surf->view.base_level,
283                                             surf->view.format,
284                                             draw_aux_buffer_disabled[i]);
285
286         if (ice->state.draw_aux_usage[i] != aux_usage) {
287            ice->state.draw_aux_usage[i] = aux_usage;
288            /* XXX: Need to track which bindings to make dirty */
289            ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
290         }
291
292         crocus_resource_prepare_render(ice, res, surf->view.base_level,
293                                        surf->view.base_array_layer,
294                                        surf->view.array_len,
295                                        aux_usage);
296
297         crocus_cache_flush_for_render(batch, res->bo, surf->view.format,
298                                       aux_usage);
299      }
300   }
301}
302
303/**
304 * \brief Call this after drawing to mark which buffers need resolving
305 *
306 * If the depth buffer was written to and if it has an accompanying HiZ
307 * buffer, then mark that it needs a depth resolve.
308 *
309 * If the color buffer is a multisample window system buffer, then
310 * mark that it needs a downsample.
311 *
312 * Also mark any render targets which will be textured as needing a render
313 * cache flush.
314 */
315void
316crocus_postdraw_update_resolve_tracking(struct crocus_context *ice,
317                                        struct crocus_batch *batch)
318{
319   struct pipe_framebuffer_state *cso_fb = &ice->state.framebuffer;
320   struct crocus_screen *screen = (void *) ice->ctx.screen;
321   struct intel_device_info *devinfo = &screen->devinfo;
322   // XXX: front buffer drawing?
323
324   bool may_have_resolved_depth =
325      ice->state.dirty & (CROCUS_DIRTY_DEPTH_BUFFER |
326                          CROCUS_DIRTY_GEN6_WM_DEPTH_STENCIL);
327
328   struct pipe_surface *zs_surf = cso_fb->zsbuf;
329   if (zs_surf) {
330      struct crocus_resource *z_res, *s_res;
331      crocus_get_depth_stencil_resources(devinfo, zs_surf->texture, &z_res, &s_res);
332      unsigned num_layers =
333         zs_surf->u.tex.last_layer - zs_surf->u.tex.first_layer + 1;
334
335      if (z_res) {
336         if (may_have_resolved_depth && ice->state.depth_writes_enabled) {
337            crocus_resource_finish_render(ice, z_res, zs_surf->u.tex.level,
338                                          zs_surf->u.tex.first_layer, num_layers,
339                                          ice->state.hiz_usage);
340         }
341
342         if (ice->state.depth_writes_enabled)
343            crocus_depth_cache_add_bo(batch, z_res->bo);
344
345         if (((struct crocus_surface *)zs_surf)->align_res) {
346            crocus_update_align_res(batch, (struct crocus_surface *)zs_surf, false);
347         }
348      }
349
350      if (s_res) {
351         if (may_have_resolved_depth && ice->state.stencil_writes_enabled) {
352            crocus_resource_finish_write(ice, s_res, zs_surf->u.tex.level,
353                                         zs_surf->u.tex.first_layer, num_layers,
354                                         s_res->aux.usage);
355         }
356
357         if (ice->state.stencil_writes_enabled)
358            crocus_depth_cache_add_bo(batch, s_res->bo);
359      }
360   }
361
362   bool may_have_resolved_color =
363      ice->state.stage_dirty & CROCUS_STAGE_DIRTY_BINDINGS_FS;
364
365   for (unsigned i = 0; i < cso_fb->nr_cbufs; i++) {
366      struct crocus_surface *surf = (void *) cso_fb->cbufs[i];
367      if (!surf)
368         continue;
369
370      if (surf->align_res)
371         crocus_update_align_res(batch, surf, false);
372      struct crocus_resource *res = (void *) surf->base.texture;
373      enum isl_aux_usage aux_usage = ice->state.draw_aux_usage[i];
374
375      crocus_render_cache_add_bo(batch, res->bo, surf->view.format,
376                                 aux_usage);
377
378      if (may_have_resolved_color) {
379         union pipe_surface_desc *desc = &surf->base.u;
380         unsigned num_layers =
381            desc->tex.last_layer - desc->tex.first_layer + 1;
382         crocus_resource_finish_render(ice, res, desc->tex.level,
383                                       desc->tex.first_layer, num_layers,
384                                       aux_usage);
385      }
386   }
387}
388
389/**
390 * Clear the cache-tracking sets.
391 */
392void
393crocus_cache_sets_clear(struct crocus_batch *batch)
394{
395   hash_table_foreach(batch->cache.render, render_entry)
396      _mesa_hash_table_remove(batch->cache.render, render_entry);
397
398   set_foreach(batch->cache.depth, depth_entry)
399      _mesa_set_remove(batch->cache.depth, depth_entry);
400}
401
402/**
403 * Emits an appropriate flush for a BO if it has been rendered to within the
404 * same batchbuffer as a read that's about to be emitted.
405 *
406 * The GPU has separate, incoherent caches for the render cache and the
407 * sampler cache, along with other caches.  Usually data in the different
408 * caches don't interact (e.g. we don't render to our driver-generated
409 * immediate constant data), but for render-to-texture in FBOs we definitely
410 * do.  When a batchbuffer is flushed, the kernel will ensure that everything
411 * necessary is flushed before another use of that BO, but for reuse from
412 * different caches within a batchbuffer, it's all our responsibility.
413 */
414void
415crocus_flush_depth_and_render_caches(struct crocus_batch *batch)
416{
417   const struct intel_device_info *devinfo = &batch->screen->devinfo;
418   if (devinfo->ver >= 6) {
419      crocus_emit_pipe_control_flush(batch,
420                                     "cache tracker: render-to-texture",
421                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
422                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
423                                     PIPE_CONTROL_CS_STALL);
424
425      crocus_emit_pipe_control_flush(batch,
426                                     "cache tracker: render-to-texture",
427                                     PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
428                                     PIPE_CONTROL_CONST_CACHE_INVALIDATE);
429   } else {
430      crocus_emit_mi_flush(batch);
431   }
432
433   crocus_cache_sets_clear(batch);
434}
435
436void
437crocus_cache_flush_for_read(struct crocus_batch *batch,
438                            struct crocus_bo *bo)
439{
440   if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo) ||
441       _mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
442      crocus_flush_depth_and_render_caches(batch);
443}
444
445static void *
446format_aux_tuple(enum isl_format format, enum isl_aux_usage aux_usage)
447{
448   return (void *)(uintptr_t)((uint32_t)format << 8 | aux_usage);
449}
450
451void
452crocus_cache_flush_for_render(struct crocus_batch *batch,
453                              struct crocus_bo *bo,
454                              enum isl_format format,
455                              enum isl_aux_usage aux_usage)
456{
457   if (_mesa_set_search_pre_hashed(batch->cache.depth, bo->hash, bo))
458      crocus_flush_depth_and_render_caches(batch);
459
460   /* Check to see if this bo has been used by a previous rendering operation
461    * but with a different format or aux usage.  If it has, flush the render
462    * cache so we ensure that it's only in there with one format or aux usage
463    * at a time.
464    *
465    * Even though it's not obvious, this can easily happen in practice.
466    * Suppose a client is blending on a surface with sRGB encode enabled on
467    * gen9.  This implies that you get AUX_USAGE_CCS_D at best.  If the client
468    * then disables sRGB decode and continues blending we will flip on
469    * AUX_USAGE_CCS_E without doing any sort of resolve in-between (this is
470    * perfectly valid since CCS_E is a subset of CCS_D).  However, this means
471    * that we have fragments in-flight which are rendering with UNORM+CCS_E
472    * and other fragments in-flight with SRGB+CCS_D on the same surface at the
473    * same time and the pixel scoreboard and color blender are trying to sort
474    * it all out.  This ends badly (i.e. GPU hangs).
475    *
476    * To date, we have never observed GPU hangs or even corruption to be
477    * associated with switching the format, only the aux usage.  However,
478    * there are comments in various docs which indicate that the render cache
479    * isn't 100% resilient to format changes.  We may as well be conservative
480    * and flush on format changes too.  We can always relax this later if we
481    * find it to be a performance problem.
482    */
483   struct hash_entry *entry =
484      _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
485   if (entry && entry->data != format_aux_tuple(format, aux_usage))
486      crocus_flush_depth_and_render_caches(batch);
487}
488
489void
490crocus_render_cache_add_bo(struct crocus_batch *batch,
491                           struct crocus_bo *bo,
492                           enum isl_format format,
493                           enum isl_aux_usage aux_usage)
494{
495#ifndef NDEBUG
496   struct hash_entry *entry =
497      _mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo);
498   if (entry) {
499      /* Otherwise, someone didn't do a flush_for_render and that would be
500       * very bad indeed.
501       */
502      assert(entry->data == format_aux_tuple(format, aux_usage));
503   }
504#endif
505
506   _mesa_hash_table_insert_pre_hashed(batch->cache.render, bo->hash, bo,
507                                      format_aux_tuple(format, aux_usage));
508}
509
510void
511crocus_cache_flush_for_depth(struct crocus_batch *batch,
512                             struct crocus_bo *bo)
513{
514   if (_mesa_hash_table_search_pre_hashed(batch->cache.render, bo->hash, bo))
515      crocus_flush_depth_and_render_caches(batch);
516}
517
518void
519crocus_depth_cache_add_bo(struct crocus_batch *batch, struct crocus_bo *bo)
520{
521   _mesa_set_add_pre_hashed(batch->cache.depth, bo->hash, bo);
522}
523
524static void
525crocus_resolve_color(struct crocus_context *ice,
526                     struct crocus_batch *batch,
527                     struct crocus_resource *res,
528                     unsigned level, unsigned layer,
529                     enum isl_aux_op resolve_op)
530{
531   struct crocus_screen *screen = batch->screen;
532   DBG("%s to res %p level %u layer %u\n", __func__, res, level, layer);
533
534   struct blorp_surf surf;
535   crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
536                                  &res->base.b, res->aux.usage, level, true);
537
538   crocus_batch_maybe_flush(batch, 1500);
539
540   /* Ivybridge PRM Vol 2, Part 1, "11.7 MCS Buffer for Render Target(s)":
541    *
542    *    "Any transition from any value in {Clear, Render, Resolve} to a
543    *     different value in {Clear, Render, Resolve} requires end of pipe
544    *     synchronization."
545    *
546    * In other words, fast clear ops are not properly synchronized with
547    * other drawing.  We need to use a PIPE_CONTROL to ensure that the
548    * contents of the previous draw hit the render target before we resolve
549    * and again afterwards to ensure that the resolve is complete before we
550    * do any more regular drawing.
551    */
552   crocus_emit_end_of_pipe_sync(batch, "color resolve: pre-flush",
553                                PIPE_CONTROL_RENDER_TARGET_FLUSH);
554
555   struct blorp_batch blorp_batch;
556   blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
557   blorp_ccs_resolve(&blorp_batch, &surf, level, layer, 1,
558                     isl_format_srgb_to_linear(res->surf.format),
559                     resolve_op);
560   blorp_batch_finish(&blorp_batch);
561
562   /* See comment above */
563   crocus_emit_end_of_pipe_sync(batch, "color resolve: post-flush",
564                                PIPE_CONTROL_RENDER_TARGET_FLUSH);
565}
566
567static void
568crocus_mcs_partial_resolve(struct crocus_context *ice,
569                           struct crocus_batch *batch,
570                           struct crocus_resource *res,
571                           uint32_t start_layer,
572                           uint32_t num_layers)
573{
574   struct crocus_screen *screen = batch->screen;
575
576   DBG("%s to res %p layers %u-%u\n", __func__, res,
577       start_layer, start_layer + num_layers - 1);
578
579   assert(isl_aux_usage_has_mcs(res->aux.usage));
580
581   struct blorp_surf surf;
582   crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
583                                  &res->base.b, res->aux.usage, 0, true);
584
585   struct blorp_batch blorp_batch;
586   blorp_batch_init(&ice->blorp, &blorp_batch, batch, 0);
587   blorp_mcs_partial_resolve(&blorp_batch, &surf,
588                             isl_format_srgb_to_linear(res->surf.format),
589                             start_layer, num_layers);
590   blorp_batch_finish(&blorp_batch);
591}
592
593/**
594 * Perform a HiZ or depth resolve operation.
595 *
596 * For an overview of HiZ ops, see the following sections of the Sandy Bridge
597 * PRM, Volume 1, Part 2:
598 *   - 7.5.3.1 Depth Buffer Clear
599 *   - 7.5.3.2 Depth Buffer Resolve
600 *   - 7.5.3.3 Hierarchical Depth Buffer Resolve
601 */
602void
603crocus_hiz_exec(struct crocus_context *ice,
604                struct crocus_batch *batch,
605                struct crocus_resource *res,
606                unsigned int level, unsigned int start_layer,
607                unsigned int num_layers, enum isl_aux_op op,
608                bool update_clear_depth)
609{
610   struct crocus_screen *screen = batch->screen;
611   const struct intel_device_info *devinfo = &batch->screen->devinfo;
612   assert(crocus_resource_level_has_hiz(res, level));
613   assert(op != ISL_AUX_OP_NONE);
614   UNUSED const char *name = NULL;
615
616   switch (op) {
617   case ISL_AUX_OP_FULL_RESOLVE:
618      name = "depth resolve";
619      break;
620   case ISL_AUX_OP_AMBIGUATE:
621      name = "hiz ambiguate";
622      break;
623   case ISL_AUX_OP_FAST_CLEAR:
624      name = "depth clear";
625      break;
626   case ISL_AUX_OP_PARTIAL_RESOLVE:
627   case ISL_AUX_OP_NONE:
628      unreachable("Invalid HiZ op");
629   }
630
631   DBG("%s %s to res %p level %d layers %d-%d\n",
632       __func__, name, res, level, start_layer, start_layer + num_layers - 1);
633
634   /* The following stalls and flushes are only documented to be required
635    * for HiZ clear operations.  However, they also seem to be required for
636    * resolve operations.
637    *
638    * From the Ivybridge PRM, volume 2, "Depth Buffer Clear":
639    *
640    *   "If other rendering operations have preceded this clear, a
641    *    PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
642    *    enabled must be issued before the rectangle primitive used for
643    *    the depth buffer clear operation."
644    *
645    * Same applies for Gen8 and Gen9.
646    *
647    * In addition, from the Ivybridge PRM, volume 2, 1.10.4.1
648    * PIPE_CONTROL, Depth Cache Flush Enable:
649    *
650    *   "This bit must not be set when Depth Stall Enable bit is set in
651    *    this packet."
652    *
653    * This is confirmed to hold for real, Haswell gets immediate gpu hangs.
654    *
655    * Therefore issue two pipe control flushes, one for cache flush and
656    * another for depth stall.
657    */
658   if (devinfo->ver == 6) {
659      /* From the Sandy Bridge PRM, volume 2 part 1, page 313:
660       *
661       *   "If other rendering operations have preceded this clear, a
662       *   PIPE_CONTROL with write cache flush enabled and Z-inhibit
663       *   disabled must be issued before the rectangle primitive used for
664       *   the depth buffer clear operation.
665       */
666      crocus_emit_pipe_control_flush(batch,
667                                     "hiz op: pre-flushes (1)",
668                                     PIPE_CONTROL_RENDER_TARGET_FLUSH |
669                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
670                                     PIPE_CONTROL_CS_STALL);
671   } else if (devinfo->ver >= 7) {
672      crocus_emit_pipe_control_flush(batch,
673                                     "hiz op: pre-flushes (1/2)",
674                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
675                                     PIPE_CONTROL_CS_STALL);
676      crocus_emit_pipe_control_flush(batch, "hiz op: pre-flushes (2/2)",
677                                     PIPE_CONTROL_DEPTH_STALL);
678   }
679
680   assert(isl_aux_usage_has_hiz(res->aux.usage) && res->aux.bo);
681
682   crocus_batch_maybe_flush(batch, 1500);
683
684   struct blorp_surf surf;
685   crocus_blorp_surf_for_resource(&screen->vtbl, &batch->screen->isl_dev, &surf,
686                                  &res->base.b, res->aux.usage, level, true);
687
688   struct blorp_batch blorp_batch;
689   enum blorp_batch_flags flags = 0;
690   flags |= update_clear_depth ? 0 : BLORP_BATCH_NO_UPDATE_CLEAR_COLOR;
691   blorp_batch_init(&ice->blorp, &blorp_batch, batch, flags);
692   blorp_hiz_op(&blorp_batch, &surf, level, start_layer, num_layers, op);
693   blorp_batch_finish(&blorp_batch);
694
695   /* The following stalls and flushes are only documented to be required
696    * for HiZ clear operations.  However, they also seem to be required for
697    * resolve operations.
698    *
699    * From the Broadwell PRM, volume 7, "Depth Buffer Clear":
700    *
701    *    "Depth buffer clear pass using any of the methods (WM_STATE,
702    *     3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a
703    *     PIPE_CONTROL command with DEPTH_STALL bit and Depth FLUSH bits
704    *     "set" before starting to render.  DepthStall and DepthFlush are
705    *     not needed between consecutive depth clear passes nor is it
706    *     required if the depth clear pass was done with
707    *     'full_surf_clear' bit set in the 3DSTATE_WM_HZ_OP."
708    *
709    * TODO: Such as the spec says, this could be conditional.
710    */
711   if (devinfo->ver == 6) {
712      /* From the Sandy Bridge PRM, volume 2 part 1, page 314:
713       *
714       *     "DevSNB, DevSNB-B{W/A}]: Depth buffer clear pass must be
715       *     followed by a PIPE_CONTROL command with DEPTH_STALL bit set
716       *     and Then followed by Depth FLUSH'
717       */
718      crocus_emit_pipe_control_flush(batch,
719                                     "hiz op: post-flushes (1/2)",
720                                     PIPE_CONTROL_DEPTH_STALL);
721
722      crocus_emit_pipe_control_flush(batch,
723                                     "hiz op: post-flushes (2/2)",
724                                     PIPE_CONTROL_DEPTH_CACHE_FLUSH |
725                                     PIPE_CONTROL_CS_STALL);
726   }
727}
728
729/**
730 * Does the resource's slice have hiz enabled?
731 */
732bool
733crocus_resource_level_has_hiz(const struct crocus_resource *res, uint32_t level)
734{
735   crocus_resource_check_level_layer(res, level, 0);
736   return res->aux.has_hiz & 1 << level;
737}
738
739static bool
740crocus_resource_level_has_aux(const struct crocus_resource *res, uint32_t level)
741{
742   if (isl_aux_usage_has_hiz(res->aux.usage))
743      return crocus_resource_level_has_hiz(res, level);
744   else
745      return level < res->aux.surf.levels;
746}
747
748/** \brief Assert that the level and layer are valid for the resource. */
749void
750crocus_resource_check_level_layer(UNUSED const struct crocus_resource *res,
751                                  UNUSED uint32_t level, UNUSED uint32_t layer)
752{
753   assert(level < res->surf.levels);
754   assert(layer < util_num_layers(&res->base.b, level));
755}
756
757static inline uint32_t
758miptree_level_range_length(const struct crocus_resource *res,
759                           uint32_t start_level, uint32_t num_levels)
760{
761   assert(start_level < res->surf.levels);
762
763   if (num_levels == INTEL_REMAINING_LAYERS)
764      num_levels = res->surf.levels;
765
766   /* Check for overflow */
767   assert(start_level + num_levels >= start_level);
768   assert(start_level + num_levels <= res->surf.levels);
769
770   return num_levels;
771}
772
773static inline uint32_t
774miptree_layer_range_length(const struct crocus_resource *res, uint32_t level,
775                           uint32_t start_layer, uint32_t num_layers)
776{
777   assert(level <= res->base.b.last_level);
778
779   const uint32_t total_num_layers = crocus_get_num_logical_layers(res, level);
780   assert(start_layer < total_num_layers);
781   if (num_layers == INTEL_REMAINING_LAYERS)
782      num_layers = total_num_layers - start_layer;
783   /* Check for overflow */
784   assert(start_layer + num_layers >= start_layer);
785   assert(start_layer + num_layers <= total_num_layers);
786
787   return num_layers;
788}
789
790bool
791crocus_has_invalid_primary(const struct crocus_resource *res,
792                           unsigned start_level, unsigned num_levels,
793                           unsigned start_layer, unsigned num_layers)
794{
795   if (!res->aux.bo)
796      return false;
797
798   /* Clamp the level range to fit the resource */
799   num_levels = miptree_level_range_length(res, start_level, num_levels);
800
801   for (uint32_t l = 0; l < num_levels; l++) {
802      const uint32_t level = start_level + l;
803      if (!crocus_resource_level_has_aux(res, level))
804         continue;
805
806      const uint32_t level_layers =
807         miptree_layer_range_length(res, level, start_layer, num_layers);
808      for (unsigned a = 0; a < level_layers; a++) {
809         enum isl_aux_state aux_state =
810            crocus_resource_get_aux_state(res, level, start_layer + a);
811         if (!isl_aux_state_has_valid_primary(aux_state))
812            return true;
813      }
814   }
815
816   return false;
817}
818
819void
820crocus_resource_prepare_access(struct crocus_context *ice,
821                               struct crocus_resource *res,
822                               uint32_t start_level, uint32_t num_levels,
823                               uint32_t start_layer, uint32_t num_layers,
824                               enum isl_aux_usage aux_usage,
825                               bool fast_clear_supported)
826{
827   if (!res->aux.bo)
828      return;
829
830   /* We can't do resolves on the compute engine, so awkwardly, we have to
831    * do them on the render batch...
832    */
833   struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];
834
835   const uint32_t clamped_levels =
836      miptree_level_range_length(res, start_level, num_levels);
837   for (uint32_t l = 0; l < clamped_levels; l++) {
838      const uint32_t level = start_level + l;
839      if (!crocus_resource_level_has_aux(res, level))
840         continue;
841
842      const uint32_t level_layers =
843         miptree_layer_range_length(res, level, start_layer, num_layers);
844      for (uint32_t a = 0; a < level_layers; a++) {
845         const uint32_t layer = start_layer + a;
846         const enum isl_aux_state aux_state =
847            crocus_resource_get_aux_state(res, level, layer);
848         const enum isl_aux_op aux_op =
849            isl_aux_prepare_access(aux_state, aux_usage, fast_clear_supported);
850
851         /* Prepare the aux buffer for a conditional or unconditional access.
852          * A conditional access is handled by assuming that the access will
853          * not evaluate to a no-op. If the access does in fact occur, the aux
854          * will be in the required state. If it does not, no data is lost
855          * because the aux_op performed is lossless.
856          */
857         if (aux_op == ISL_AUX_OP_NONE) {
858            /* Nothing to do here. */
859         } else if (isl_aux_usage_has_mcs(res->aux.usage)) {
860            assert(aux_op == ISL_AUX_OP_PARTIAL_RESOLVE);
861            crocus_mcs_partial_resolve(ice, batch, res, layer, 1);
862         } else if (isl_aux_usage_has_hiz(res->aux.usage)) {
863            crocus_hiz_exec(ice, batch, res, level, layer, 1, aux_op, false);
864         } else if (res->aux.usage == ISL_AUX_USAGE_STC_CCS) {
865            unreachable("crocus doesn't resolve STC_CCS resources");
866         } else {
867            assert(isl_aux_usage_has_ccs(res->aux.usage));
868            crocus_resolve_color(ice, batch, res, level, layer, aux_op);
869         }
870
871         const enum isl_aux_state new_state =
872            isl_aux_state_transition_aux_op(aux_state, res->aux.usage, aux_op);
873         crocus_resource_set_aux_state(ice, res, level, layer, 1, new_state);
874      }
875   }
876}
877
878void
879crocus_resource_finish_write(struct crocus_context *ice,
880                             struct crocus_resource *res, uint32_t level,
881                             uint32_t start_layer, uint32_t num_layers,
882                             enum isl_aux_usage aux_usage)
883{
884   if (res->base.b.format == PIPE_FORMAT_S8_UINT)
885      res->shadow_needs_update = true;
886
887   if (!crocus_resource_level_has_aux(res, level))
888      return;
889
890   const uint32_t level_layers =
891      miptree_layer_range_length(res, level, start_layer, num_layers);
892
893   for (uint32_t a = 0; a < level_layers; a++) {
894      const uint32_t layer = start_layer + a;
895      const enum isl_aux_state aux_state =
896         crocus_resource_get_aux_state(res, level, layer);
897
898      /* Transition the aux state for a conditional or unconditional write. A
899       * conditional write is handled by assuming that the write applies to
900       * only part of the render target. This prevents the new state from
901       * losing the types of compression that might exist in the current state
902       * (e.g. CLEAR). If the write evaluates to a no-op, the state will still
903       * be able to communicate when resolves are necessary (but it may
904       * falsely communicate this as well).
905       */
906      const enum isl_aux_state new_aux_state =
907         isl_aux_state_transition_write(aux_state, aux_usage, false);
908
909      crocus_resource_set_aux_state(ice, res, level, layer, 1, new_aux_state);
910   }
911}
912
913enum isl_aux_state
914crocus_resource_get_aux_state(const struct crocus_resource *res,
915                              uint32_t level, uint32_t layer)
916{
917   crocus_resource_check_level_layer(res, level, layer);
918   assert(crocus_resource_level_has_aux(res, level));
919
920   return res->aux.state[level][layer];
921}
922
923void
924crocus_resource_set_aux_state(struct crocus_context *ice,
925                              struct crocus_resource *res, uint32_t level,
926                              uint32_t start_layer, uint32_t num_layers,
927                              enum isl_aux_state aux_state)
928{
929   assert(crocus_resource_level_has_aux(res, level));
930
931   num_layers = miptree_layer_range_length(res, level, start_layer, num_layers);
932   for (unsigned a = 0; a < num_layers; a++) {
933      if (res->aux.state[level][start_layer + a] != aux_state) {
934         res->aux.state[level][start_layer + a] = aux_state;
935         ice->state.dirty |= CROCUS_DIRTY_RENDER_RESOLVES_AND_FLUSHES |
936                             CROCUS_DIRTY_COMPUTE_RESOLVES_AND_FLUSHES;
937         /* XXX: Need to track which bindings to make dirty */
938         ice->state.stage_dirty |= CROCUS_ALL_STAGE_DIRTY_BINDINGS;
939      }
940   }
941}
942
943static bool
944isl_formats_are_fast_clear_compatible(enum isl_format a, enum isl_format b)
945{
946   /* On gen8 and earlier, the hardware was only capable of handling 0/1 clear
947    * values so sRGB curve application was a no-op for all fast-clearable
948    * formats.
949    *
950    * On gen9+, the hardware supports arbitrary clear values.  For sRGB clear
951    * values, the hardware interprets the floats, not as what would be
952    * returned from the sampler (or written by the shader), but as being
953    * between format conversion and sRGB curve application.  This means that
954    * we can switch between sRGB and UNORM without having to whack the clear
955    * color.
956    */
957   return isl_format_srgb_to_linear(a) == isl_format_srgb_to_linear(b);
958}
959
960void
961crocus_resource_prepare_texture(struct crocus_context *ice,
962                                struct crocus_resource *res,
963                                enum isl_format view_format,
964                                uint32_t start_level, uint32_t num_levels,
965                                uint32_t start_layer, uint32_t num_layers)
966{
967   enum isl_aux_usage aux_usage =
968      crocus_resource_texture_aux_usage(res);
969
970   bool clear_supported = aux_usage != ISL_AUX_USAGE_NONE;
971
972   /* Clear color is specified as ints or floats and the conversion is done by
973    * the sampler.  If we have a texture view, we would have to perform the
974    * clear color conversion manually.  Just disable clear color.
975    */
976   if (!isl_formats_are_fast_clear_compatible(res->surf.format, view_format))
977      clear_supported = false;
978
979   crocus_resource_prepare_access(ice, res, start_level, num_levels,
980                                  start_layer, num_layers,
981                                  aux_usage, clear_supported);
982}
983
984enum isl_aux_usage
985crocus_resource_render_aux_usage(struct crocus_context *ice,
986                                 struct crocus_resource *res,
987                                 uint32_t level,
988                                 enum isl_format render_format,
989                                 bool draw_aux_disabled)
990{
991   struct crocus_screen *screen = (void *) ice->ctx.screen;
992   struct intel_device_info *devinfo = &screen->devinfo;
993
994   if (draw_aux_disabled)
995      return ISL_AUX_USAGE_NONE;
996
997   switch (res->aux.usage) {
998   case ISL_AUX_USAGE_MCS:
999      return res->aux.usage;
1000
1001   case ISL_AUX_USAGE_CCS_D:
1002      /* Otherwise, we try to fall back to CCS_D */
1003      if (isl_format_supports_ccs_d(devinfo, render_format))
1004         return ISL_AUX_USAGE_CCS_D;
1005
1006      return ISL_AUX_USAGE_NONE;
1007
1008   case ISL_AUX_USAGE_HIZ:
1009      assert(render_format == res->surf.format);
1010      return crocus_resource_level_has_hiz(res, level) ?
1011         res->aux.usage : ISL_AUX_USAGE_NONE;
1012
1013   default:
1014      return ISL_AUX_USAGE_NONE;
1015   }
1016}
1017
1018void
1019crocus_resource_prepare_render(struct crocus_context *ice,
1020                               struct crocus_resource *res, uint32_t level,
1021                               uint32_t start_layer, uint32_t layer_count,
1022                               enum isl_aux_usage aux_usage)
1023{
1024   crocus_resource_prepare_access(ice, res, level, 1, start_layer,
1025                                  layer_count, aux_usage,
1026                                  aux_usage != ISL_AUX_USAGE_NONE);
1027}
1028
1029void
1030crocus_resource_finish_render(struct crocus_context *ice,
1031                              struct crocus_resource *res, uint32_t level,
1032                              uint32_t start_layer, uint32_t layer_count,
1033                              enum isl_aux_usage aux_usage)
1034{
1035   crocus_resource_finish_write(ice, res, level, start_layer, layer_count,
1036                                aux_usage);
1037}
1038
1039static void
1040crocus_update_stencil_shadow(struct crocus_context *ice,
1041                             struct crocus_resource *res)
1042{
1043   struct crocus_screen *screen = (struct crocus_screen *)ice->ctx.screen;
1044   UNUSED const struct intel_device_info *devinfo = &screen->devinfo;
1045   assert(devinfo->ver == 7);
1046
1047   if (!res->shadow_needs_update)
1048      return;
1049
1050   struct pipe_box box;
1051   for (unsigned level = 0; level <= res->base.b.last_level; level++) {
1052      u_box_2d(0, 0,
1053               u_minify(res->base.b.width0, level),
1054               u_minify(res->base.b.height0, level), &box);
1055      const unsigned depth = res->base.b.target == PIPE_TEXTURE_3D ?
1056         u_minify(res->base.b.depth0, level) : res->base.b.array_size;
1057
1058      for (unsigned layer = 0; layer < depth; layer++) {
1059         box.z = layer;
1060         ice->ctx.resource_copy_region(&ice->ctx,
1061                                       &res->shadow->base.b, level, 0, 0, layer,
1062                                       &res->base.b, level, &box);
1063      }
1064   }
1065   res->shadow_needs_update = false;
1066}
1067