1/*
2 * Copyright © 2016 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "anv_private.h"
25
26static bool
27lookup_blorp_shader(struct blorp_batch *batch,
28                    const void *key, uint32_t key_size,
29                    uint32_t *kernel_out, void *prog_data_out)
30{
31   struct blorp_context *blorp = batch->blorp;
32   struct anv_device *device = blorp->driver_ctx;
33
34   struct anv_shader_bin *bin =
35      anv_device_search_for_kernel(device, device->internal_cache,
36                                   key, key_size, NULL);
37   if (!bin)
38      return false;
39
40   /* The cache already has a reference and it's not going anywhere so there
41    * is no need to hold a second reference.
42    */
43   anv_shader_bin_unref(device, bin);
44
45   *kernel_out = bin->kernel.offset;
46   *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
47
48   return true;
49}
50
51static bool
52upload_blorp_shader(struct blorp_batch *batch, uint32_t stage,
53                    const void *key, uint32_t key_size,
54                    const void *kernel, uint32_t kernel_size,
55                    const struct brw_stage_prog_data *prog_data,
56                    uint32_t prog_data_size,
57                    uint32_t *kernel_out, void *prog_data_out)
58{
59   struct blorp_context *blorp = batch->blorp;
60   struct anv_device *device = blorp->driver_ctx;
61
62   struct anv_pipeline_bind_map bind_map = {
63      .surface_count = 0,
64      .sampler_count = 0,
65   };
66
67   struct anv_shader_bin *bin =
68      anv_device_upload_kernel(device, device->internal_cache, stage,
69                               key, key_size, kernel, kernel_size,
70                               prog_data, prog_data_size,
71                               NULL, 0, NULL, &bind_map);
72
73   if (!bin)
74      return false;
75
76   /* The cache already has a reference and it's not going anywhere so there
77    * is no need to hold a second reference.
78    */
79   anv_shader_bin_unref(device, bin);
80
81   *kernel_out = bin->kernel.offset;
82   *(const struct brw_stage_prog_data **)prog_data_out = bin->prog_data;
83
84   return true;
85}
86
87void
88anv_device_init_blorp(struct anv_device *device)
89{
90   const struct blorp_config config = {
91      .use_mesh_shading = device->physical->vk.supported_extensions.NV_mesh_shader,
92   };
93
94   blorp_init(&device->blorp, device, &device->isl_dev, &config);
95   device->blorp.compiler = device->physical->compiler;
96   device->blorp.lookup_shader = lookup_blorp_shader;
97   device->blorp.upload_shader = upload_blorp_shader;
98   switch (device->info.verx10) {
99   case 70:
100      device->blorp.exec = gfx7_blorp_exec;
101      break;
102   case 75:
103      device->blorp.exec = gfx75_blorp_exec;
104      break;
105   case 80:
106      device->blorp.exec = gfx8_blorp_exec;
107      break;
108   case 90:
109      device->blorp.exec = gfx9_blorp_exec;
110      break;
111   case 110:
112      device->blorp.exec = gfx11_blorp_exec;
113      break;
114   case 120:
115      device->blorp.exec = gfx12_blorp_exec;
116      break;
117   case 125:
118      device->blorp.exec = gfx125_blorp_exec;
119      break;
120   default:
121      unreachable("Unknown hardware generation");
122   }
123}
124
125void
126anv_device_finish_blorp(struct anv_device *device)
127{
128   blorp_finish(&device->blorp);
129}
130
131static void
132anv_blorp_batch_init(struct anv_cmd_buffer *cmd_buffer,
133                     struct blorp_batch *batch, enum blorp_batch_flags flags)
134{
135   if (!(cmd_buffer->queue_family->queueFlags & VK_QUEUE_GRAPHICS_BIT)) {
136      assert(cmd_buffer->queue_family->queueFlags & VK_QUEUE_COMPUTE_BIT);
137      flags |= BLORP_BATCH_USE_COMPUTE;
138   }
139
140   blorp_batch_init(&cmd_buffer->device->blorp, batch, cmd_buffer, flags);
141}
142
143static void
144anv_blorp_batch_finish(struct blorp_batch *batch)
145{
146   blorp_batch_finish(batch);
147}
148
149static void
150get_blorp_surf_for_anv_buffer(struct anv_device *device,
151                              struct anv_buffer *buffer, uint64_t offset,
152                              uint32_t width, uint32_t height,
153                              uint32_t row_pitch, enum isl_format format,
154                              bool is_dest,
155                              struct blorp_surf *blorp_surf,
156                              struct isl_surf *isl_surf)
157{
158   bool ok UNUSED;
159
160   *blorp_surf = (struct blorp_surf) {
161      .surf = isl_surf,
162      .addr = {
163         .buffer = buffer->address.bo,
164         .offset = buffer->address.offset + offset,
165         .mocs = anv_mocs(device, buffer->address.bo,
166                          is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
167                                  : ISL_SURF_USAGE_TEXTURE_BIT),
168      },
169   };
170
171   ok = isl_surf_init(&device->isl_dev, isl_surf,
172                     .dim = ISL_SURF_DIM_2D,
173                     .format = format,
174                     .width = width,
175                     .height = height,
176                     .depth = 1,
177                     .levels = 1,
178                     .array_len = 1,
179                     .samples = 1,
180                     .row_pitch_B = row_pitch,
181                     .usage = is_dest ? ISL_SURF_USAGE_RENDER_TARGET_BIT
182                                      : ISL_SURF_USAGE_TEXTURE_BIT,
183                     .tiling_flags = ISL_TILING_LINEAR_BIT);
184   assert(ok);
185}
186
187/* Pick something high enough that it won't be used in core and low enough it
188 * will never map to an extension.
189 */
190#define ANV_IMAGE_LAYOUT_EXPLICIT_AUX (VkImageLayout)10000000
191
192static struct blorp_address
193anv_to_blorp_address(struct anv_address addr)
194{
195   return (struct blorp_address) {
196      .buffer = addr.bo,
197      .offset = addr.offset,
198   };
199}
200
201static void
202get_blorp_surf_for_anv_image(const struct anv_device *device,
203                             const struct anv_image *image,
204                             VkImageAspectFlags aspect,
205                             VkImageUsageFlags usage,
206                             VkImageLayout layout,
207                             enum isl_aux_usage aux_usage,
208                             struct blorp_surf *blorp_surf)
209{
210   const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
211
212   if (layout != ANV_IMAGE_LAYOUT_EXPLICIT_AUX) {
213      assert(usage != 0);
214      aux_usage = anv_layout_to_aux_usage(&device->info, image,
215                                          aspect, usage, layout);
216   }
217
218   isl_surf_usage_flags_t mocs_usage =
219      (usage & VK_IMAGE_USAGE_TRANSFER_DST_BIT) ?
220      ISL_SURF_USAGE_RENDER_TARGET_BIT : ISL_SURF_USAGE_TEXTURE_BIT;
221
222   const struct anv_surface *surface = &image->planes[plane].primary_surface;
223   const struct anv_address address =
224      anv_image_address(image, &surface->memory_range);
225
226   *blorp_surf = (struct blorp_surf) {
227      .surf = &surface->isl,
228      .addr = {
229         .buffer = address.bo,
230         .offset = address.offset,
231         .mocs = anv_mocs(device, address.bo, mocs_usage),
232      },
233   };
234
235   if (aux_usage != ISL_AUX_USAGE_NONE) {
236      const struct anv_surface *aux_surface = &image->planes[plane].aux_surface;
237      const struct anv_address aux_address =
238         anv_image_address(image, &aux_surface->memory_range);
239
240      blorp_surf->aux_usage = aux_usage;
241      blorp_surf->aux_surf = &aux_surface->isl;
242
243      if (!anv_address_is_null(aux_address)) {
244         blorp_surf->aux_addr = (struct blorp_address) {
245            .buffer = aux_address.bo,
246            .offset = aux_address.offset,
247            .mocs = anv_mocs(device, aux_address.bo, 0),
248         };
249      }
250
251      /* If we're doing a partial resolve, then we need the indirect clear
252       * color.  If we are doing a fast clear and want to store/update the
253       * clear color, we also pass the address to blorp, otherwise it will only
254       * stomp the CCS to a particular value and won't care about format or
255       * clear value
256       */
257      if (aspect & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
258         const struct anv_address clear_color_addr =
259            anv_image_get_clear_color_addr(device, image, aspect);
260         blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
261      } else if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
262         const struct anv_address clear_color_addr =
263            anv_image_get_clear_color_addr(device, image, aspect);
264         blorp_surf->clear_color_addr = anv_to_blorp_address(clear_color_addr);
265         blorp_surf->clear_color = (union isl_color_value) {
266            .f32 = { ANV_HZ_FC_VAL },
267         };
268      }
269   }
270}
271
272static bool
273get_blorp_surf_for_anv_shadow_image(const struct anv_device *device,
274                                    const struct anv_image *image,
275                                    VkImageAspectFlags aspect,
276                                    struct blorp_surf *blorp_surf)
277{
278
279   const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
280   if (!anv_surface_is_valid(&image->planes[plane].shadow_surface))
281      return false;
282
283   const struct anv_surface *surface = &image->planes[plane].shadow_surface;
284   const struct anv_address address =
285      anv_image_address(image, &surface->memory_range);
286
287   *blorp_surf = (struct blorp_surf) {
288      .surf = &surface->isl,
289      .addr = {
290         .buffer = address.bo,
291         .offset = address.offset,
292         .mocs = anv_mocs(device, address.bo, ISL_SURF_USAGE_RENDER_TARGET_BIT),
293      },
294   };
295
296   return true;
297}
298
299static void
300copy_image(struct anv_cmd_buffer *cmd_buffer,
301           struct blorp_batch *batch,
302           struct anv_image *src_image,
303           VkImageLayout src_image_layout,
304           struct anv_image *dst_image,
305           VkImageLayout dst_image_layout,
306           const VkImageCopy2 *region)
307{
308   VkOffset3D srcOffset =
309      vk_image_sanitize_offset(&src_image->vk, region->srcOffset);
310   VkOffset3D dstOffset =
311      vk_image_sanitize_offset(&dst_image->vk, region->dstOffset);
312   VkExtent3D extent =
313      vk_image_sanitize_extent(&src_image->vk, region->extent);
314
315   const uint32_t dst_level = region->dstSubresource.mipLevel;
316   unsigned dst_base_layer, layer_count;
317   if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
318      dst_base_layer = region->dstOffset.z;
319      layer_count = region->extent.depth;
320   } else {
321      dst_base_layer = region->dstSubresource.baseArrayLayer;
322      layer_count = vk_image_subresource_layer_count(&dst_image->vk,
323                                                     &region->dstSubresource);
324   }
325
326   const uint32_t src_level = region->srcSubresource.mipLevel;
327   unsigned src_base_layer;
328   if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
329      src_base_layer = region->srcOffset.z;
330   } else {
331      src_base_layer = region->srcSubresource.baseArrayLayer;
332      assert(layer_count ==
333             vk_image_subresource_layer_count(&src_image->vk,
334                                              &region->srcSubresource));
335   }
336
337   VkImageAspectFlags src_mask = region->srcSubresource.aspectMask,
338      dst_mask = region->dstSubresource.aspectMask;
339
340   assert(anv_image_aspects_compatible(src_mask, dst_mask));
341
342   if (util_bitcount(src_mask) > 1) {
343      anv_foreach_image_aspect_bit(aspect_bit, src_image, src_mask) {
344         struct blorp_surf src_surf, dst_surf;
345         get_blorp_surf_for_anv_image(cmd_buffer->device,
346                                      src_image, 1UL << aspect_bit,
347                                      VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
348                                      src_image_layout, ISL_AUX_USAGE_NONE,
349                                      &src_surf);
350         get_blorp_surf_for_anv_image(cmd_buffer->device,
351                                      dst_image, 1UL << aspect_bit,
352                                      VK_IMAGE_USAGE_TRANSFER_DST_BIT,
353                                      dst_image_layout, ISL_AUX_USAGE_NONE,
354                                      &dst_surf);
355         anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
356                                           1UL << aspect_bit,
357                                           dst_surf.aux_usage, dst_level,
358                                           dst_base_layer, layer_count);
359
360         for (unsigned i = 0; i < layer_count; i++) {
361            blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
362                       &dst_surf, dst_level, dst_base_layer + i,
363                       srcOffset.x, srcOffset.y,
364                       dstOffset.x, dstOffset.y,
365                       extent.width, extent.height);
366         }
367
368         struct blorp_surf dst_shadow_surf;
369         if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
370                                                 dst_image,
371                                                 1UL << aspect_bit,
372                                                 &dst_shadow_surf)) {
373            for (unsigned i = 0; i < layer_count; i++) {
374               blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
375                          &dst_shadow_surf, dst_level, dst_base_layer + i,
376                          srcOffset.x, srcOffset.y,
377                          dstOffset.x, dstOffset.y,
378                          extent.width, extent.height);
379            }
380         }
381      }
382   } else {
383      struct blorp_surf src_surf, dst_surf;
384      get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, src_mask,
385                                   VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
386                                   src_image_layout, ISL_AUX_USAGE_NONE,
387                                   &src_surf);
388      get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, dst_mask,
389                                   VK_IMAGE_USAGE_TRANSFER_DST_BIT,
390                                   dst_image_layout, ISL_AUX_USAGE_NONE,
391                                   &dst_surf);
392      anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image, dst_mask,
393                                        dst_surf.aux_usage, dst_level,
394                                        dst_base_layer, layer_count);
395
396      for (unsigned i = 0; i < layer_count; i++) {
397         blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
398                    &dst_surf, dst_level, dst_base_layer + i,
399                    srcOffset.x, srcOffset.y,
400                    dstOffset.x, dstOffset.y,
401                    extent.width, extent.height);
402      }
403
404      struct blorp_surf dst_shadow_surf;
405      if (get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
406                                              dst_image, dst_mask,
407                                              &dst_shadow_surf)) {
408         for (unsigned i = 0; i < layer_count; i++) {
409            blorp_copy(batch, &src_surf, src_level, src_base_layer + i,
410                       &dst_shadow_surf, dst_level, dst_base_layer + i,
411                       srcOffset.x, srcOffset.y,
412                       dstOffset.x, dstOffset.y,
413                       extent.width, extent.height);
414         }
415      }
416   }
417}
418
419void anv_CmdCopyImage2(
420    VkCommandBuffer                             commandBuffer,
421    const VkCopyImageInfo2*                     pCopyImageInfo)
422{
423   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
424   ANV_FROM_HANDLE(anv_image, src_image, pCopyImageInfo->srcImage);
425   ANV_FROM_HANDLE(anv_image, dst_image, pCopyImageInfo->dstImage);
426
427   struct blorp_batch batch;
428   anv_blorp_batch_init(cmd_buffer, &batch, 0);
429
430   for (unsigned r = 0; r < pCopyImageInfo->regionCount; r++) {
431      copy_image(cmd_buffer, &batch,
432                 src_image, pCopyImageInfo->srcImageLayout,
433                 dst_image, pCopyImageInfo->dstImageLayout,
434                 &pCopyImageInfo->pRegions[r]);
435   }
436
437   anv_blorp_batch_finish(&batch);
438}
439
440static enum isl_format
441isl_format_for_size(unsigned size_B)
442{
443   /* Prefer 32-bit per component formats for CmdFillBuffer */
444   switch (size_B) {
445   case 1:  return ISL_FORMAT_R8_UINT;
446   case 2:  return ISL_FORMAT_R16_UINT;
447   case 3:  return ISL_FORMAT_R8G8B8_UINT;
448   case 4:  return ISL_FORMAT_R32_UINT;
449   case 6:  return ISL_FORMAT_R16G16B16_UINT;
450   case 8:  return ISL_FORMAT_R32G32_UINT;
451   case 12: return ISL_FORMAT_R32G32B32_UINT;
452   case 16: return ISL_FORMAT_R32G32B32A32_UINT;
453   default:
454      unreachable("Unknown format size");
455   }
456}
457
458static void
459copy_buffer_to_image(struct anv_cmd_buffer *cmd_buffer,
460                     struct blorp_batch *batch,
461                     struct anv_buffer *anv_buffer,
462                     struct anv_image *anv_image,
463                     VkImageLayout image_layout,
464                     const VkBufferImageCopy2* region,
465                     bool buffer_to_image)
466{
467   struct {
468      struct blorp_surf surf;
469      uint32_t level;
470      VkOffset3D offset;
471   } image, buffer, *src, *dst;
472
473   buffer.level = 0;
474   buffer.offset = (VkOffset3D) { 0, 0, 0 };
475
476   if (buffer_to_image) {
477      src = &buffer;
478      dst = &image;
479   } else {
480      src = &image;
481      dst = &buffer;
482   }
483
484   const VkImageAspectFlags aspect = region->imageSubresource.aspectMask;
485
486   get_blorp_surf_for_anv_image(cmd_buffer->device, anv_image, aspect,
487                                buffer_to_image ?
488                                VK_IMAGE_USAGE_TRANSFER_DST_BIT :
489                                VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
490                                image_layout, ISL_AUX_USAGE_NONE,
491                                &image.surf);
492   image.offset =
493      vk_image_sanitize_offset(&anv_image->vk, region->imageOffset);
494   image.level = region->imageSubresource.mipLevel;
495
496   VkExtent3D extent =
497      vk_image_sanitize_extent(&anv_image->vk, region->imageExtent);
498   if (anv_image->vk.image_type != VK_IMAGE_TYPE_3D) {
499      image.offset.z = region->imageSubresource.baseArrayLayer;
500      extent.depth =
501         vk_image_subresource_layer_count(&anv_image->vk,
502                                          &region->imageSubresource);
503   }
504
505   const enum isl_format linear_format =
506      anv_get_isl_format(&cmd_buffer->device->info, anv_image->vk.format,
507                         aspect, VK_IMAGE_TILING_LINEAR);
508   const struct isl_format_layout *linear_fmtl =
509      isl_format_get_layout(linear_format);
510
511   const struct vk_image_buffer_layout buffer_layout =
512      vk_image_buffer_copy_layout(&anv_image->vk, region);
513
514   /* Some formats have additional restrictions which may cause ISL to
515    * fail to create a surface for us.  For example, YCbCr formats
516    * have to have 2-pixel aligned strides.
517    *
518    * To avoid these issues, we always bind the buffer as if it's a
519    * "normal" format like RGBA32_UINT.  Since we're using blorp_copy,
520    * the format doesn't matter as long as it has the right bpb.
521    */
522   const VkExtent2D buffer_extent = {
523      .width = DIV_ROUND_UP(extent.width, linear_fmtl->bw),
524      .height = DIV_ROUND_UP(extent.height, linear_fmtl->bh),
525   };
526   const enum isl_format buffer_format =
527      isl_format_for_size(linear_fmtl->bpb / 8);
528
529   struct isl_surf buffer_isl_surf;
530   get_blorp_surf_for_anv_buffer(cmd_buffer->device,
531                                 anv_buffer, region->bufferOffset,
532                                 buffer_extent.width, buffer_extent.height,
533                                 buffer_layout.row_stride_B, buffer_format,
534                                 false, &buffer.surf, &buffer_isl_surf);
535
536   bool dst_has_shadow = false;
537   struct blorp_surf dst_shadow_surf;
538   if (&image == dst) {
539      /* In this case, the source is the buffer and, since blorp takes its
540       * copy dimensions in terms of the source format, we have to use the
541       * scaled down version for compressed textures because the source
542       * format is an RGB format.
543       */
544      extent.width = buffer_extent.width;
545      extent.height = buffer_extent.height;
546
547      anv_cmd_buffer_mark_image_written(cmd_buffer, anv_image,
548                                        aspect, dst->surf.aux_usage,
549                                        dst->level,
550                                        dst->offset.z, extent.depth);
551
552      dst_has_shadow =
553         get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
554                                             anv_image, aspect,
555                                             &dst_shadow_surf);
556   }
557
558   for (unsigned z = 0; z < extent.depth; z++) {
559      blorp_copy(batch, &src->surf, src->level, src->offset.z,
560                 &dst->surf, dst->level, dst->offset.z,
561                 src->offset.x, src->offset.y, dst->offset.x, dst->offset.y,
562                 extent.width, extent.height);
563
564      if (dst_has_shadow) {
565         blorp_copy(batch, &src->surf, src->level, src->offset.z,
566                    &dst_shadow_surf, dst->level, dst->offset.z,
567                    src->offset.x, src->offset.y,
568                    dst->offset.x, dst->offset.y,
569                    extent.width, extent.height);
570      }
571
572      image.offset.z++;
573      buffer.surf.addr.offset += buffer_layout.image_stride_B;
574   }
575}
576
577void anv_CmdCopyBufferToImage2(
578    VkCommandBuffer                             commandBuffer,
579    const VkCopyBufferToImageInfo2*             pCopyBufferToImageInfo)
580{
581   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
582   ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferToImageInfo->srcBuffer);
583   ANV_FROM_HANDLE(anv_image, dst_image, pCopyBufferToImageInfo->dstImage);
584
585   struct blorp_batch batch;
586   anv_blorp_batch_init(cmd_buffer, &batch, 0);
587
588   for (unsigned r = 0; r < pCopyBufferToImageInfo->regionCount; r++) {
589      copy_buffer_to_image(cmd_buffer, &batch, src_buffer, dst_image,
590                           pCopyBufferToImageInfo->dstImageLayout,
591                           &pCopyBufferToImageInfo->pRegions[r], true);
592   }
593
594   anv_blorp_batch_finish(&batch);
595}
596
597void anv_CmdCopyImageToBuffer2(
598    VkCommandBuffer                             commandBuffer,
599    const VkCopyImageToBufferInfo2*             pCopyImageToBufferInfo)
600{
601   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
602   ANV_FROM_HANDLE(anv_image, src_image, pCopyImageToBufferInfo->srcImage);
603   ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyImageToBufferInfo->dstBuffer);
604
605   struct blorp_batch batch;
606   anv_blorp_batch_init(cmd_buffer, &batch, 0);
607
608   for (unsigned r = 0; r < pCopyImageToBufferInfo->regionCount; r++) {
609      copy_buffer_to_image(cmd_buffer, &batch, dst_buffer, src_image,
610                           pCopyImageToBufferInfo->srcImageLayout,
611                           &pCopyImageToBufferInfo->pRegions[r], false);
612   }
613
614   anv_blorp_batch_finish(&batch);
615
616   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
617}
618
619static bool
620flip_coords(unsigned *src0, unsigned *src1, unsigned *dst0, unsigned *dst1)
621{
622   bool flip = false;
623   if (*src0 > *src1) {
624      unsigned tmp = *src0;
625      *src0 = *src1;
626      *src1 = tmp;
627      flip = !flip;
628   }
629
630   if (*dst0 > *dst1) {
631      unsigned tmp = *dst0;
632      *dst0 = *dst1;
633      *dst1 = tmp;
634      flip = !flip;
635   }
636
637   return flip;
638}
639
640static void
641blit_image(struct anv_cmd_buffer *cmd_buffer,
642           struct blorp_batch *batch,
643           struct anv_image *src_image,
644           VkImageLayout src_image_layout,
645           struct anv_image *dst_image,
646           VkImageLayout dst_image_layout,
647           const VkImageBlit2 *region,
648           VkFilter filter)
649{
650   const VkImageSubresourceLayers *src_res = &region->srcSubresource;
651   const VkImageSubresourceLayers *dst_res = &region->dstSubresource;
652
653   struct blorp_surf src, dst;
654
655   enum blorp_filter blorp_filter;
656   switch (filter) {
657   case VK_FILTER_NEAREST:
658      blorp_filter = BLORP_FILTER_NEAREST;
659      break;
660   case VK_FILTER_LINEAR:
661      blorp_filter = BLORP_FILTER_BILINEAR;
662      break;
663   default:
664      unreachable("Invalid filter");
665   }
666
667   assert(anv_image_aspects_compatible(src_res->aspectMask,
668                                       dst_res->aspectMask));
669
670   anv_foreach_image_aspect_bit(aspect_bit, src_image, src_res->aspectMask) {
671      get_blorp_surf_for_anv_image(cmd_buffer->device,
672                                   src_image, 1U << aspect_bit,
673                                   VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
674                                   src_image_layout, ISL_AUX_USAGE_NONE, &src);
675      get_blorp_surf_for_anv_image(cmd_buffer->device,
676                                   dst_image, 1U << aspect_bit,
677                                   VK_IMAGE_USAGE_TRANSFER_DST_BIT,
678                                   dst_image_layout, ISL_AUX_USAGE_NONE, &dst);
679
680      struct anv_format_plane src_format =
681         anv_get_format_aspect(&cmd_buffer->device->info, src_image->vk.format,
682                               1U << aspect_bit, src_image->vk.tiling);
683      struct anv_format_plane dst_format =
684         anv_get_format_aspect(&cmd_buffer->device->info, dst_image->vk.format,
685                               1U << aspect_bit, dst_image->vk.tiling);
686
687      unsigned dst_start, dst_end;
688      if (dst_image->vk.image_type == VK_IMAGE_TYPE_3D) {
689         assert(dst_res->baseArrayLayer == 0);
690         dst_start = region->dstOffsets[0].z;
691         dst_end = region->dstOffsets[1].z;
692      } else {
693         dst_start = dst_res->baseArrayLayer;
694         dst_end = dst_start +
695            vk_image_subresource_layer_count(&dst_image->vk, dst_res);
696      }
697
698      unsigned src_start, src_end;
699      if (src_image->vk.image_type == VK_IMAGE_TYPE_3D) {
700         assert(src_res->baseArrayLayer == 0);
701         src_start = region->srcOffsets[0].z;
702         src_end = region->srcOffsets[1].z;
703      } else {
704         src_start = src_res->baseArrayLayer;
705         src_end = src_start +
706            vk_image_subresource_layer_count(&src_image->vk, src_res);
707      }
708
709      bool flip_z = flip_coords(&src_start, &src_end, &dst_start, &dst_end);
710      const unsigned num_layers = dst_end - dst_start;
711      float src_z_step = (float)(src_end - src_start) / (float)num_layers;
712
713      /* There is no interpolation to the pixel center during rendering, so
714       * add the 0.5 offset ourselves here. */
715      float depth_center_offset = 0;
716      if (src_image->vk.image_type == VK_IMAGE_TYPE_3D)
717         depth_center_offset = 0.5 / num_layers * (src_end - src_start);
718
719      if (flip_z) {
720         src_start = src_end;
721         src_z_step *= -1;
722         depth_center_offset *= -1;
723      }
724
725      unsigned src_x0 = region->srcOffsets[0].x;
726      unsigned src_x1 = region->srcOffsets[1].x;
727      unsigned dst_x0 = region->dstOffsets[0].x;
728      unsigned dst_x1 = region->dstOffsets[1].x;
729      bool flip_x = flip_coords(&src_x0, &src_x1, &dst_x0, &dst_x1);
730
731      unsigned src_y0 = region->srcOffsets[0].y;
732      unsigned src_y1 = region->srcOffsets[1].y;
733      unsigned dst_y0 = region->dstOffsets[0].y;
734      unsigned dst_y1 = region->dstOffsets[1].y;
735      bool flip_y = flip_coords(&src_y0, &src_y1, &dst_y0, &dst_y1);
736
737      anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
738                                        1U << aspect_bit,
739                                        dst.aux_usage,
740                                        dst_res->mipLevel,
741                                        dst_start, num_layers);
742
743      for (unsigned i = 0; i < num_layers; i++) {
744         unsigned dst_z = dst_start + i;
745         float src_z = src_start + i * src_z_step + depth_center_offset;
746
747         blorp_blit(batch, &src, src_res->mipLevel, src_z,
748                    src_format.isl_format, src_format.swizzle,
749                    &dst, dst_res->mipLevel, dst_z,
750                    dst_format.isl_format, dst_format.swizzle,
751                    src_x0, src_y0, src_x1, src_y1,
752                    dst_x0, dst_y0, dst_x1, dst_y1,
753                    blorp_filter, flip_x, flip_y);
754      }
755   }
756}
757
758void anv_CmdBlitImage2(
759    VkCommandBuffer                             commandBuffer,
760    const VkBlitImageInfo2*                     pBlitImageInfo)
761{
762   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
763   ANV_FROM_HANDLE(anv_image, src_image, pBlitImageInfo->srcImage);
764   ANV_FROM_HANDLE(anv_image, dst_image, pBlitImageInfo->dstImage);
765
766   struct blorp_batch batch;
767   anv_blorp_batch_init(cmd_buffer, &batch, 0);
768
769   for (unsigned r = 0; r < pBlitImageInfo->regionCount; r++) {
770      blit_image(cmd_buffer, &batch,
771                 src_image, pBlitImageInfo->srcImageLayout,
772                 dst_image, pBlitImageInfo->dstImageLayout,
773                 &pBlitImageInfo->pRegions[r], pBlitImageInfo->filter);
774   }
775
776   anv_blorp_batch_finish(&batch);
777}
778
779/**
780 * Returns the greatest common divisor of a and b that is a power of two.
781 */
782static uint64_t
783gcd_pow2_u64(uint64_t a, uint64_t b)
784{
785   assert(a > 0 || b > 0);
786
787   unsigned a_log2 = ffsll(a) - 1;
788   unsigned b_log2 = ffsll(b) - 1;
789
790   /* If either a or b is 0, then a_log2 or b_log2 till be UINT_MAX in which
791    * case, the MIN2() will take the other one.  If both are 0 then we will
792    * hit the assert above.
793    */
794   return 1 << MIN2(a_log2, b_log2);
795}
796
797/* This is maximum possible width/height our HW can handle */
798#define MAX_SURFACE_DIM (1ull << 14)
799
800static void
801copy_buffer(struct anv_device *device,
802            struct blorp_batch *batch,
803            struct anv_buffer *src_buffer,
804            struct anv_buffer *dst_buffer,
805            const VkBufferCopy2 *region)
806{
807   struct blorp_address src = {
808      .buffer = src_buffer->address.bo,
809      .offset = src_buffer->address.offset + region->srcOffset,
810      .mocs = anv_mocs(device, src_buffer->address.bo,
811                       ISL_SURF_USAGE_TEXTURE_BIT),
812   };
813   struct blorp_address dst = {
814      .buffer = dst_buffer->address.bo,
815      .offset = dst_buffer->address.offset + region->dstOffset,
816      .mocs = anv_mocs(device, dst_buffer->address.bo,
817                       ISL_SURF_USAGE_RENDER_TARGET_BIT),
818   };
819
820   blorp_buffer_copy(batch, src, dst, region->size);
821}
822
823void anv_CmdCopyBuffer2(
824    VkCommandBuffer                             commandBuffer,
825    const VkCopyBufferInfo2*                    pCopyBufferInfo)
826{
827   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
828   ANV_FROM_HANDLE(anv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
829   ANV_FROM_HANDLE(anv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
830
831   struct blorp_batch batch;
832   anv_blorp_batch_init(cmd_buffer, &batch, 0);
833
834   for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
835      copy_buffer(cmd_buffer->device, &batch, src_buffer, dst_buffer,
836                  &pCopyBufferInfo->pRegions[r]);
837   }
838
839   anv_blorp_batch_finish(&batch);
840
841   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
842}
843
844
845void anv_CmdUpdateBuffer(
846    VkCommandBuffer                             commandBuffer,
847    VkBuffer                                    dstBuffer,
848    VkDeviceSize                                dstOffset,
849    VkDeviceSize                                dataSize,
850    const void*                                 pData)
851{
852   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
853   ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
854
855   struct blorp_batch batch;
856   anv_blorp_batch_init(cmd_buffer, &batch, 0);
857
858   /* We can't quite grab a full block because the state stream needs a
859    * little data at the top to build its linked list.
860    */
861   const uint32_t max_update_size =
862      cmd_buffer->device->dynamic_state_pool.block_size - 64;
863
864   assert(max_update_size < MAX_SURFACE_DIM * 4);
865
866   /* We're about to read data that was written from the CPU.  Flush the
867    * texture cache so we don't get anything stale.
868    */
869   anv_add_pending_pipe_bits(cmd_buffer,
870                             ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
871                             "before UpdateBuffer");
872
873   while (dataSize) {
874      const uint32_t copy_size = MIN2(dataSize, max_update_size);
875
876      struct anv_state tmp_data =
877         anv_cmd_buffer_alloc_dynamic_state(cmd_buffer, copy_size, 64);
878
879      memcpy(tmp_data.map, pData, copy_size);
880
881      struct blorp_address src = {
882         .buffer = cmd_buffer->device->dynamic_state_pool.block_pool.bo,
883         .offset = tmp_data.offset,
884         .mocs = isl_mocs(&cmd_buffer->device->isl_dev,
885                          ISL_SURF_USAGE_TEXTURE_BIT, false)
886      };
887      struct blorp_address dst = {
888         .buffer = dst_buffer->address.bo,
889         .offset = dst_buffer->address.offset + dstOffset,
890         .mocs = anv_mocs(cmd_buffer->device, dst_buffer->address.bo,
891                          ISL_SURF_USAGE_RENDER_TARGET_BIT),
892      };
893
894      blorp_buffer_copy(&batch, src, dst, copy_size);
895
896      dataSize -= copy_size;
897      dstOffset += copy_size;
898      pData = (void *)pData + copy_size;
899   }
900
901   anv_blorp_batch_finish(&batch);
902
903   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
904}
905
906void anv_CmdFillBuffer(
907    VkCommandBuffer                             commandBuffer,
908    VkBuffer                                    dstBuffer,
909    VkDeviceSize                                dstOffset,
910    VkDeviceSize                                fillSize,
911    uint32_t                                    data)
912{
913   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
914   ANV_FROM_HANDLE(anv_buffer, dst_buffer, dstBuffer);
915   struct blorp_surf surf;
916   struct isl_surf isl_surf;
917
918   struct blorp_batch batch;
919   anv_blorp_batch_init(cmd_buffer, &batch, 0);
920
921   fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize);
922
923   /* From the Vulkan spec:
924    *
925    *    "size is the number of bytes to fill, and must be either a multiple
926    *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
927    *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
928    *    buffer is not a multiple of 4, then the nearest smaller multiple is
929    *    used."
930    */
931   fillSize &= ~3ull;
932
933   /* First, we compute the biggest format that can be used with the
934    * given offsets and size.
935    */
936   int bs = 16;
937   bs = gcd_pow2_u64(bs, dstOffset);
938   bs = gcd_pow2_u64(bs, fillSize);
939   enum isl_format isl_format = isl_format_for_size(bs);
940
941   union isl_color_value color = {
942      .u32 = { data, data, data, data },
943   };
944
945   const uint64_t max_fill_size = MAX_SURFACE_DIM * MAX_SURFACE_DIM * bs;
946   while (fillSize >= max_fill_size) {
947      get_blorp_surf_for_anv_buffer(cmd_buffer->device,
948                                    dst_buffer, dstOffset,
949                                    MAX_SURFACE_DIM, MAX_SURFACE_DIM,
950                                    MAX_SURFACE_DIM * bs, isl_format, true,
951                                    &surf, &isl_surf);
952
953      blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
954                  0, 0, 1, 0, 0, MAX_SURFACE_DIM, MAX_SURFACE_DIM,
955                  color, 0 /* color_write_disable */);
956      fillSize -= max_fill_size;
957      dstOffset += max_fill_size;
958   }
959
960   uint64_t height = fillSize / (MAX_SURFACE_DIM * bs);
961   assert(height < MAX_SURFACE_DIM);
962   if (height != 0) {
963      const uint64_t rect_fill_size = height * MAX_SURFACE_DIM * bs;
964      get_blorp_surf_for_anv_buffer(cmd_buffer->device,
965                                    dst_buffer, dstOffset,
966                                    MAX_SURFACE_DIM, height,
967                                    MAX_SURFACE_DIM * bs, isl_format, true,
968                                    &surf, &isl_surf);
969
970      blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
971                  0, 0, 1, 0, 0, MAX_SURFACE_DIM, height,
972                  color, 0 /* color_write_disable */);
973      fillSize -= rect_fill_size;
974      dstOffset += rect_fill_size;
975   }
976
977   if (fillSize != 0) {
978      const uint32_t width = fillSize / bs;
979      get_blorp_surf_for_anv_buffer(cmd_buffer->device,
980                                    dst_buffer, dstOffset,
981                                    width, 1,
982                                    width * bs, isl_format, true,
983                                    &surf, &isl_surf);
984
985      blorp_clear(&batch, &surf, isl_format, ISL_SWIZZLE_IDENTITY,
986                  0, 0, 1, 0, 0, width, 1,
987                  color, 0 /* color_write_disable */);
988   }
989
990   anv_blorp_batch_finish(&batch);
991
992   cmd_buffer->state.pending_pipe_bits |= ANV_PIPE_RENDER_TARGET_BUFFER_WRITES;
993}
994
995void anv_CmdClearColorImage(
996    VkCommandBuffer                             commandBuffer,
997    VkImage                                     _image,
998    VkImageLayout                               imageLayout,
999    const VkClearColorValue*                    pColor,
1000    uint32_t                                    rangeCount,
1001    const VkImageSubresourceRange*              pRanges)
1002{
1003   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1004   ANV_FROM_HANDLE(anv_image, image, _image);
1005
1006   struct blorp_batch batch;
1007   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1008
1009   for (unsigned r = 0; r < rangeCount; r++) {
1010      if (pRanges[r].aspectMask == 0)
1011         continue;
1012
1013      assert(pRanges[r].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1014
1015      struct blorp_surf surf;
1016      get_blorp_surf_for_anv_image(cmd_buffer->device,
1017                                   image, pRanges[r].aspectMask,
1018                                   VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1019                                   imageLayout, ISL_AUX_USAGE_NONE, &surf);
1020
1021      struct anv_format_plane src_format =
1022         anv_get_format_aspect(&cmd_buffer->device->info, image->vk.format,
1023                               VK_IMAGE_ASPECT_COLOR_BIT, image->vk.tiling);
1024
1025      unsigned base_layer = pRanges[r].baseArrayLayer;
1026      uint32_t layer_count =
1027         vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1028      uint32_t level_count =
1029         vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1030
1031      for (uint32_t i = 0; i < level_count; i++) {
1032         const unsigned level = pRanges[r].baseMipLevel + i;
1033         const unsigned level_width = anv_minify(image->vk.extent.width, level);
1034         const unsigned level_height = anv_minify(image->vk.extent.height, level);
1035
1036         if (image->vk.image_type == VK_IMAGE_TYPE_3D) {
1037            base_layer = 0;
1038            layer_count = anv_minify(image->vk.extent.depth, level);
1039         }
1040
1041         anv_cmd_buffer_mark_image_written(cmd_buffer, image,
1042                                           pRanges[r].aspectMask,
1043                                           surf.aux_usage, level,
1044                                           base_layer, layer_count);
1045
1046         blorp_clear(&batch, &surf,
1047                     src_format.isl_format, src_format.swizzle,
1048                     level, base_layer, layer_count,
1049                     0, 0, level_width, level_height,
1050                     vk_to_isl_color(*pColor), 0 /* color_write_disable */);
1051      }
1052   }
1053
1054   anv_blorp_batch_finish(&batch);
1055}
1056
1057void anv_CmdClearDepthStencilImage(
1058    VkCommandBuffer                             commandBuffer,
1059    VkImage                                     image_h,
1060    VkImageLayout                               imageLayout,
1061    const VkClearDepthStencilValue*             pDepthStencil,
1062    uint32_t                                    rangeCount,
1063    const VkImageSubresourceRange*              pRanges)
1064{
1065   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1066   ANV_FROM_HANDLE(anv_image, image, image_h);
1067
1068   struct blorp_batch batch;
1069   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1070   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1071
1072   struct blorp_surf depth, stencil, stencil_shadow;
1073   if (image->vk.aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1074      get_blorp_surf_for_anv_image(cmd_buffer->device,
1075                                   image, VK_IMAGE_ASPECT_DEPTH_BIT,
1076                                   VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1077                                   imageLayout, ISL_AUX_USAGE_NONE, &depth);
1078   } else {
1079      memset(&depth, 0, sizeof(depth));
1080   }
1081
1082   bool has_stencil_shadow = false;
1083   if (image->vk.aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1084      get_blorp_surf_for_anv_image(cmd_buffer->device,
1085                                   image, VK_IMAGE_ASPECT_STENCIL_BIT,
1086                                   VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1087                                   imageLayout, ISL_AUX_USAGE_NONE, &stencil);
1088
1089      has_stencil_shadow =
1090         get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1091                                             VK_IMAGE_ASPECT_STENCIL_BIT,
1092                                             &stencil_shadow);
1093   } else {
1094      memset(&stencil, 0, sizeof(stencil));
1095   }
1096
1097   for (unsigned r = 0; r < rangeCount; r++) {
1098      if (pRanges[r].aspectMask == 0)
1099         continue;
1100
1101      bool clear_depth = pRanges[r].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1102      bool clear_stencil = pRanges[r].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1103
1104      unsigned base_layer = pRanges[r].baseArrayLayer;
1105      uint32_t layer_count =
1106         vk_image_subresource_layer_count(&image->vk, &pRanges[r]);
1107      uint32_t level_count =
1108         vk_image_subresource_level_count(&image->vk, &pRanges[r]);
1109
1110      for (uint32_t i = 0; i < level_count; i++) {
1111         const unsigned level = pRanges[r].baseMipLevel + i;
1112         const unsigned level_width = anv_minify(image->vk.extent.width, level);
1113         const unsigned level_height = anv_minify(image->vk.extent.height, level);
1114
1115         if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1116            layer_count = anv_minify(image->vk.extent.depth, level);
1117
1118         blorp_clear_depth_stencil(&batch, &depth, &stencil,
1119                                   level, base_layer, layer_count,
1120                                   0, 0, level_width, level_height,
1121                                   clear_depth, pDepthStencil->depth,
1122                                   clear_stencil ? 0xff : 0,
1123                                   pDepthStencil->stencil);
1124
1125         if (clear_stencil && has_stencil_shadow) {
1126            union isl_color_value stencil_color = {
1127               .u32 = { pDepthStencil->stencil, },
1128            };
1129            blorp_clear(&batch, &stencil_shadow,
1130                        ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1131                        level, base_layer, layer_count,
1132                        0, 0, level_width, level_height,
1133                        stencil_color, 0 /* color_write_disable */);
1134         }
1135      }
1136   }
1137
1138   anv_blorp_batch_finish(&batch);
1139}
1140
1141VkResult
1142anv_cmd_buffer_alloc_blorp_binding_table(struct anv_cmd_buffer *cmd_buffer,
1143                                         uint32_t num_entries,
1144                                         uint32_t *state_offset,
1145                                         struct anv_state *bt_state)
1146{
1147   *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1148                                                  state_offset);
1149   if (bt_state->map == NULL) {
1150      /* We ran out of space.  Grab a new binding table block. */
1151      VkResult result = anv_cmd_buffer_new_binding_table_block(cmd_buffer);
1152      if (result != VK_SUCCESS)
1153         return result;
1154
1155      /* Re-emit state base addresses so we get the new surface state base
1156       * address before we start emitting binding tables etc.
1157       */
1158      anv_cmd_buffer_emit_state_base_address(cmd_buffer);
1159
1160      *bt_state = anv_cmd_buffer_alloc_binding_table(cmd_buffer, num_entries,
1161                                                     state_offset);
1162      assert(bt_state->map != NULL);
1163   }
1164
1165   return VK_SUCCESS;
1166}
1167
1168static VkResult
1169binding_table_for_surface_state(struct anv_cmd_buffer *cmd_buffer,
1170                                struct anv_state surface_state,
1171                                uint32_t *bt_offset)
1172{
1173   uint32_t state_offset;
1174   struct anv_state bt_state;
1175
1176   VkResult result =
1177      anv_cmd_buffer_alloc_blorp_binding_table(cmd_buffer, 1, &state_offset,
1178                                               &bt_state);
1179   if (result != VK_SUCCESS)
1180      return result;
1181
1182   uint32_t *bt_map = bt_state.map;
1183   bt_map[0] = surface_state.offset + state_offset;
1184
1185   *bt_offset = bt_state.offset;
1186   return VK_SUCCESS;
1187}
1188
1189static void
1190clear_color_attachment(struct anv_cmd_buffer *cmd_buffer,
1191                       struct blorp_batch *batch,
1192                       const VkClearAttachment *attachment,
1193                       uint32_t rectCount, const VkClearRect *pRects)
1194{
1195   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1196   const uint32_t att_idx = attachment->colorAttachment;
1197   assert(att_idx < gfx->color_att_count);
1198   const struct anv_attachment *att = &gfx->color_att[att_idx];
1199
1200   if (att->vk_format == VK_FORMAT_UNDEFINED)
1201      return;
1202
1203   uint32_t binding_table;
1204   VkResult result =
1205      binding_table_for_surface_state(cmd_buffer, att->surface_state.state,
1206                                      &binding_table);
1207   if (result != VK_SUCCESS)
1208      return;
1209
1210   union isl_color_value clear_color =
1211      vk_to_isl_color(attachment->clearValue.color);
1212
1213   /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1214   if (gfx->view_mask) {
1215      u_foreach_bit(view_idx, gfx->view_mask) {
1216         for (uint32_t r = 0; r < rectCount; ++r) {
1217            const VkOffset2D offset = pRects[r].rect.offset;
1218            const VkExtent2D extent = pRects[r].rect.extent;
1219            blorp_clear_attachments(batch, binding_table,
1220                                    ISL_FORMAT_UNSUPPORTED,
1221                                    gfx->samples,
1222                                    view_idx, 1,
1223                                    offset.x, offset.y,
1224                                    offset.x + extent.width,
1225                                    offset.y + extent.height,
1226                                    true, clear_color, false, 0.0f, 0, 0);
1227         }
1228      }
1229      return;
1230   }
1231
1232   for (uint32_t r = 0; r < rectCount; ++r) {
1233      const VkOffset2D offset = pRects[r].rect.offset;
1234      const VkExtent2D extent = pRects[r].rect.extent;
1235      assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1236      blorp_clear_attachments(batch, binding_table,
1237                              ISL_FORMAT_UNSUPPORTED,
1238                              gfx->samples,
1239                              pRects[r].baseArrayLayer,
1240                              pRects[r].layerCount,
1241                              offset.x, offset.y,
1242                              offset.x + extent.width, offset.y + extent.height,
1243                              true, clear_color, false, 0.0f, 0, 0);
1244   }
1245}
1246
1247static void
1248clear_depth_stencil_attachment(struct anv_cmd_buffer *cmd_buffer,
1249                               struct blorp_batch *batch,
1250                               const VkClearAttachment *attachment,
1251                               uint32_t rectCount, const VkClearRect *pRects)
1252{
1253   static const union isl_color_value color_value = { .u32 = { 0, } };
1254   struct anv_cmd_graphics_state *gfx = &cmd_buffer->state.gfx;
1255   const struct anv_attachment *d_att = &gfx->depth_att;
1256   const struct anv_attachment *s_att = &gfx->stencil_att;
1257   if (d_att->vk_format == VK_FORMAT_UNDEFINED &&
1258       s_att->vk_format == VK_FORMAT_UNDEFINED)
1259      return;
1260
1261   bool clear_depth = attachment->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT;
1262   bool clear_stencil = attachment->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT;
1263
1264   enum isl_format depth_format = ISL_FORMAT_UNSUPPORTED;
1265   if (d_att->vk_format != VK_FORMAT_UNDEFINED) {
1266      depth_format = anv_get_isl_format(&cmd_buffer->device->info,
1267                                        d_att->vk_format,
1268                                        VK_IMAGE_ASPECT_DEPTH_BIT,
1269                                        VK_IMAGE_TILING_OPTIMAL);
1270   }
1271
1272   uint32_t binding_table;
1273   VkResult result =
1274      binding_table_for_surface_state(cmd_buffer,
1275                                      gfx->null_surface_state,
1276                                      &binding_table);
1277   if (result != VK_SUCCESS)
1278      return;
1279
1280   /* If multiview is enabled we ignore baseArrayLayer and layerCount */
1281   if (gfx->view_mask) {
1282      u_foreach_bit(view_idx, gfx->view_mask) {
1283         for (uint32_t r = 0; r < rectCount; ++r) {
1284            const VkOffset2D offset = pRects[r].rect.offset;
1285            const VkExtent2D extent = pRects[r].rect.extent;
1286            VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1287            blorp_clear_attachments(batch, binding_table,
1288                                    depth_format,
1289                                    gfx->samples,
1290                                    view_idx, 1,
1291                                    offset.x, offset.y,
1292                                    offset.x + extent.width,
1293                                    offset.y + extent.height,
1294                                    false, color_value,
1295                                    clear_depth, value.depth,
1296                                    clear_stencil ? 0xff : 0, value.stencil);
1297         }
1298      }
1299      return;
1300   }
1301
1302   for (uint32_t r = 0; r < rectCount; ++r) {
1303      const VkOffset2D offset = pRects[r].rect.offset;
1304      const VkExtent2D extent = pRects[r].rect.extent;
1305      VkClearDepthStencilValue value = attachment->clearValue.depthStencil;
1306      assert(pRects[r].layerCount != VK_REMAINING_ARRAY_LAYERS);
1307      blorp_clear_attachments(batch, binding_table,
1308                              depth_format,
1309                              gfx->samples,
1310                              pRects[r].baseArrayLayer,
1311                              pRects[r].layerCount,
1312                              offset.x, offset.y,
1313                              offset.x + extent.width, offset.y + extent.height,
1314                              false, color_value,
1315                              clear_depth, value.depth,
1316                              clear_stencil ? 0xff : 0, value.stencil);
1317   }
1318}
1319
1320void anv_CmdClearAttachments(
1321    VkCommandBuffer                             commandBuffer,
1322    uint32_t                                    attachmentCount,
1323    const VkClearAttachment*                    pAttachments,
1324    uint32_t                                    rectCount,
1325    const VkClearRect*                          pRects)
1326{
1327   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1328
1329   /* Because this gets called within a render pass, we tell blorp not to
1330    * trash our depth and stencil buffers.
1331    */
1332   struct blorp_batch batch;
1333   enum blorp_batch_flags flags = BLORP_BATCH_NO_EMIT_DEPTH_STENCIL;
1334   if (cmd_buffer->state.conditional_render_enabled) {
1335      anv_cmd_emit_conditional_render_predicate(cmd_buffer);
1336      flags |= BLORP_BATCH_PREDICATE_ENABLE;
1337   }
1338   anv_blorp_batch_init(cmd_buffer, &batch, flags);
1339
1340   for (uint32_t a = 0; a < attachmentCount; ++a) {
1341      if (pAttachments[a].aspectMask & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV) {
1342         assert(pAttachments[a].aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
1343         clear_color_attachment(cmd_buffer, &batch,
1344                                &pAttachments[a],
1345                                rectCount, pRects);
1346      } else {
1347         clear_depth_stencil_attachment(cmd_buffer, &batch,
1348                                        &pAttachments[a],
1349                                        rectCount, pRects);
1350      }
1351   }
1352
1353   anv_blorp_batch_finish(&batch);
1354}
1355
1356enum subpass_stage {
1357   SUBPASS_STAGE_LOAD,
1358   SUBPASS_STAGE_DRAW,
1359   SUBPASS_STAGE_RESOLVE,
1360};
1361
1362void
1363anv_image_msaa_resolve(struct anv_cmd_buffer *cmd_buffer,
1364                       const struct anv_image *src_image,
1365                       enum isl_aux_usage src_aux_usage,
1366                       uint32_t src_level, uint32_t src_base_layer,
1367                       const struct anv_image *dst_image,
1368                       enum isl_aux_usage dst_aux_usage,
1369                       uint32_t dst_level, uint32_t dst_base_layer,
1370                       VkImageAspectFlagBits aspect,
1371                       uint32_t src_x, uint32_t src_y,
1372                       uint32_t dst_x, uint32_t dst_y,
1373                       uint32_t width, uint32_t height,
1374                       uint32_t layer_count,
1375                       enum blorp_filter filter)
1376{
1377   struct blorp_batch batch;
1378   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1379   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1380
1381   assert(src_image->vk.image_type == VK_IMAGE_TYPE_2D);
1382   assert(src_image->vk.samples > 1);
1383   assert(dst_image->vk.image_type == VK_IMAGE_TYPE_2D);
1384   assert(dst_image->vk.samples == 1);
1385
1386   struct blorp_surf src_surf, dst_surf;
1387   get_blorp_surf_for_anv_image(cmd_buffer->device, src_image, aspect,
1388                                VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1389                                ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1390                                src_aux_usage, &src_surf);
1391   if (src_aux_usage == ISL_AUX_USAGE_MCS) {
1392      src_surf.clear_color_addr = anv_to_blorp_address(
1393         anv_image_get_clear_color_addr(cmd_buffer->device, src_image,
1394                                        VK_IMAGE_ASPECT_COLOR_BIT));
1395   }
1396   get_blorp_surf_for_anv_image(cmd_buffer->device, dst_image, aspect,
1397                                VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1398                                ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1399                                dst_aux_usage, &dst_surf);
1400   anv_cmd_buffer_mark_image_written(cmd_buffer, dst_image,
1401                                     aspect, dst_aux_usage,
1402                                     dst_level, dst_base_layer, layer_count);
1403
1404   if (filter == BLORP_FILTER_NONE) {
1405      /* If no explicit filter is provided, then it's implied by the type of
1406       * the source image.
1407       */
1408      if ((src_surf.surf->usage & ISL_SURF_USAGE_DEPTH_BIT) ||
1409          (src_surf.surf->usage & ISL_SURF_USAGE_STENCIL_BIT) ||
1410          isl_format_has_int_channel(src_surf.surf->format)) {
1411         filter = BLORP_FILTER_SAMPLE_0;
1412      } else {
1413         filter = BLORP_FILTER_AVERAGE;
1414      }
1415   }
1416
1417   for (uint32_t l = 0; l < layer_count; l++) {
1418      blorp_blit(&batch,
1419                 &src_surf, src_level, src_base_layer + l,
1420                 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1421                 &dst_surf, dst_level, dst_base_layer + l,
1422                 ISL_FORMAT_UNSUPPORTED, ISL_SWIZZLE_IDENTITY,
1423                 src_x, src_y, src_x + width, src_y + height,
1424                 dst_x, dst_y, dst_x + width, dst_y + height,
1425                 filter, false, false);
1426   }
1427
1428   anv_blorp_batch_finish(&batch);
1429}
1430
1431static void
1432resolve_image(struct anv_cmd_buffer *cmd_buffer,
1433              struct anv_image *src_image,
1434              VkImageLayout src_image_layout,
1435              struct anv_image *dst_image,
1436              VkImageLayout dst_image_layout,
1437              const VkImageResolve2 *region)
1438{
1439   assert(region->srcSubresource.aspectMask == region->dstSubresource.aspectMask);
1440   assert(vk_image_subresource_layer_count(&src_image->vk, &region->srcSubresource) ==
1441          vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource));
1442
1443   const uint32_t layer_count =
1444      vk_image_subresource_layer_count(&dst_image->vk, &region->dstSubresource);
1445
1446   anv_foreach_image_aspect_bit(aspect_bit, src_image,
1447                                region->srcSubresource.aspectMask) {
1448      enum isl_aux_usage src_aux_usage =
1449         anv_layout_to_aux_usage(&cmd_buffer->device->info, src_image,
1450                                 (1 << aspect_bit),
1451                                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1452                                 src_image_layout);
1453      enum isl_aux_usage dst_aux_usage =
1454         anv_layout_to_aux_usage(&cmd_buffer->device->info, dst_image,
1455                                 (1 << aspect_bit),
1456                                 VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1457                                 dst_image_layout);
1458
1459      anv_image_msaa_resolve(cmd_buffer,
1460                             src_image, src_aux_usage,
1461                             region->srcSubresource.mipLevel,
1462                             region->srcSubresource.baseArrayLayer,
1463                             dst_image, dst_aux_usage,
1464                             region->dstSubresource.mipLevel,
1465                             region->dstSubresource.baseArrayLayer,
1466                             (1 << aspect_bit),
1467                             region->srcOffset.x,
1468                             region->srcOffset.y,
1469                             region->dstOffset.x,
1470                             region->dstOffset.y,
1471                             region->extent.width,
1472                             region->extent.height,
1473                             layer_count, BLORP_FILTER_NONE);
1474   }
1475}
1476
1477void anv_CmdResolveImage2(
1478    VkCommandBuffer                             commandBuffer,
1479    const VkResolveImageInfo2*                  pResolveImageInfo)
1480{
1481   ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);
1482   ANV_FROM_HANDLE(anv_image, src_image, pResolveImageInfo->srcImage);
1483   ANV_FROM_HANDLE(anv_image, dst_image, pResolveImageInfo->dstImage);
1484
1485   for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {
1486      resolve_image(cmd_buffer,
1487                    src_image, pResolveImageInfo->srcImageLayout,
1488                    dst_image, pResolveImageInfo->dstImageLayout,
1489                    &pResolveImageInfo->pRegions[r]);
1490   }
1491}
1492
1493void
1494anv_image_copy_to_shadow(struct anv_cmd_buffer *cmd_buffer,
1495                         const struct anv_image *image,
1496                         VkImageAspectFlagBits aspect,
1497                         uint32_t base_level, uint32_t level_count,
1498                         uint32_t base_layer, uint32_t layer_count)
1499{
1500   struct blorp_batch batch;
1501   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1502
1503   /* We don't know who touched the main surface last so flush a bunch of
1504    * caches to ensure we get good data.
1505    */
1506   anv_add_pending_pipe_bits(cmd_buffer,
1507                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1508                             ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1509                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1510                             ANV_PIPE_TEXTURE_CACHE_INVALIDATE_BIT,
1511                             "before copy_to_shadow");
1512
1513   struct blorp_surf surf;
1514   get_blorp_surf_for_anv_image(cmd_buffer->device,
1515                                image, aspect,
1516                                VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
1517                                VK_IMAGE_LAYOUT_GENERAL,
1518                                ISL_AUX_USAGE_NONE, &surf);
1519   assert(surf.aux_usage == ISL_AUX_USAGE_NONE);
1520
1521   struct blorp_surf shadow_surf;
1522   get_blorp_surf_for_anv_shadow_image(cmd_buffer->device,
1523                                       image, aspect, &shadow_surf);
1524
1525   for (uint32_t l = 0; l < level_count; l++) {
1526      const uint32_t level = base_level + l;
1527
1528      const VkExtent3D extent = vk_image_mip_level_extent(&image->vk, level);
1529
1530      if (image->vk.image_type == VK_IMAGE_TYPE_3D)
1531         layer_count = extent.depth;
1532
1533      for (uint32_t a = 0; a < layer_count; a++) {
1534         const uint32_t layer = base_layer + a;
1535
1536         blorp_copy(&batch, &surf, level, layer,
1537                    &shadow_surf, level, layer,
1538                    0, 0, 0, 0, extent.width, extent.height);
1539      }
1540   }
1541
1542   /* We just wrote to the buffer with the render cache.  Flush it. */
1543   anv_add_pending_pipe_bits(cmd_buffer,
1544                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT,
1545                             "after copy_to_shadow");
1546
1547   anv_blorp_batch_finish(&batch);
1548}
1549
1550void
1551anv_image_clear_color(struct anv_cmd_buffer *cmd_buffer,
1552                      const struct anv_image *image,
1553                      VkImageAspectFlagBits aspect,
1554                      enum isl_aux_usage aux_usage,
1555                      enum isl_format format, struct isl_swizzle swizzle,
1556                      uint32_t level, uint32_t base_layer, uint32_t layer_count,
1557                      VkRect2D area, union isl_color_value clear_color)
1558{
1559   assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1560
1561   /* We don't support planar images with multisampling yet */
1562   assert(image->n_planes == 1);
1563
1564   struct blorp_batch batch;
1565   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1566
1567   struct blorp_surf surf;
1568   get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1569                                VK_IMAGE_USAGE_TRANSFER_DST_BIT,
1570                                ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1571                                aux_usage, &surf);
1572   anv_cmd_buffer_mark_image_written(cmd_buffer, image, aspect, aux_usage,
1573                                     level, base_layer, layer_count);
1574
1575   blorp_clear(&batch, &surf, format, anv_swizzle_for_render(swizzle),
1576               level, base_layer, layer_count,
1577               area.offset.x, area.offset.y,
1578               area.offset.x + area.extent.width,
1579               area.offset.y + area.extent.height,
1580               clear_color, 0 /* color_write_disable */);
1581
1582   anv_blorp_batch_finish(&batch);
1583}
1584
1585void
1586anv_image_clear_depth_stencil(struct anv_cmd_buffer *cmd_buffer,
1587                              const struct anv_image *image,
1588                              VkImageAspectFlags aspects,
1589                              enum isl_aux_usage depth_aux_usage,
1590                              uint32_t level,
1591                              uint32_t base_layer, uint32_t layer_count,
1592                              VkRect2D area,
1593                              float depth_value, uint8_t stencil_value)
1594{
1595   assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1596                               VK_IMAGE_ASPECT_STENCIL_BIT));
1597
1598   struct blorp_batch batch;
1599   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1600   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1601
1602   struct blorp_surf depth = {};
1603   if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1604      get_blorp_surf_for_anv_image(cmd_buffer->device,
1605                                   image, VK_IMAGE_ASPECT_DEPTH_BIT,
1606                                   0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1607                                   depth_aux_usage, &depth);
1608   }
1609
1610   struct blorp_surf stencil = {};
1611   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1612      const uint32_t plane =
1613         anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1614      get_blorp_surf_for_anv_image(cmd_buffer->device,
1615                                   image, VK_IMAGE_ASPECT_STENCIL_BIT,
1616                                   0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1617                                   image->planes[plane].aux_usage, &stencil);
1618   }
1619
1620   /* Blorp may choose to clear stencil using RGBA32_UINT for better
1621    * performance.  If it does this, we need to flush it out of the depth
1622    * cache before rendering to it.
1623    */
1624   anv_add_pending_pipe_bits(cmd_buffer,
1625                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1626                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1627                             "before clear DS");
1628
1629   blorp_clear_depth_stencil(&batch, &depth, &stencil,
1630                             level, base_layer, layer_count,
1631                             area.offset.x, area.offset.y,
1632                             area.offset.x + area.extent.width,
1633                             area.offset.y + area.extent.height,
1634                             aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1635                             depth_value,
1636                             (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 0xff : 0,
1637                             stencil_value);
1638
1639   /* Blorp may choose to clear stencil using RGBA32_UINT for better
1640    * performance.  If it does this, we need to flush it out of the render
1641    * cache before someone starts trying to do stencil on it.
1642    */
1643   anv_add_pending_pipe_bits(cmd_buffer,
1644                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1645                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1646                             "after clear DS");
1647
1648   struct blorp_surf stencil_shadow;
1649   if ((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
1650       get_blorp_surf_for_anv_shadow_image(cmd_buffer->device, image,
1651                                           VK_IMAGE_ASPECT_STENCIL_BIT,
1652                                           &stencil_shadow)) {
1653      union isl_color_value stencil_color = {
1654         .u32 = { stencil_value },
1655      };
1656      blorp_clear(&batch, &stencil_shadow,
1657                  ISL_FORMAT_R8_UINT, ISL_SWIZZLE_IDENTITY,
1658                  level, base_layer, layer_count,
1659                  area.offset.x, area.offset.y,
1660                  area.offset.x + area.extent.width,
1661                  area.offset.y + area.extent.height,
1662                  stencil_color, 0 /* color_write_disable */);
1663   }
1664
1665   anv_blorp_batch_finish(&batch);
1666}
1667
1668void
1669anv_image_hiz_op(struct anv_cmd_buffer *cmd_buffer,
1670                 const struct anv_image *image,
1671                 VkImageAspectFlagBits aspect, uint32_t level,
1672                 uint32_t base_layer, uint32_t layer_count,
1673                 enum isl_aux_op hiz_op)
1674{
1675   assert(aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
1676   assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, level));
1677   const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1678   assert(plane == 0);
1679
1680   struct blorp_batch batch;
1681   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1682   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1683
1684   struct blorp_surf surf;
1685   get_blorp_surf_for_anv_image(cmd_buffer->device,
1686                                image, VK_IMAGE_ASPECT_DEPTH_BIT,
1687                                0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1688                                image->planes[plane].aux_usage, &surf);
1689
1690   blorp_hiz_op(&batch, &surf, level, base_layer, layer_count, hiz_op);
1691
1692   anv_blorp_batch_finish(&batch);
1693}
1694
1695void
1696anv_image_hiz_clear(struct anv_cmd_buffer *cmd_buffer,
1697                    const struct anv_image *image,
1698                    VkImageAspectFlags aspects,
1699                    uint32_t level,
1700                    uint32_t base_layer, uint32_t layer_count,
1701                    VkRect2D area, uint8_t stencil_value)
1702{
1703   assert(image->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT |
1704                               VK_IMAGE_ASPECT_STENCIL_BIT));
1705
1706   struct blorp_batch batch;
1707   anv_blorp_batch_init(cmd_buffer, &batch, 0);
1708   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1709
1710   struct blorp_surf depth = {};
1711   if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {
1712      const uint32_t plane =
1713         anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_DEPTH_BIT);
1714      assert(base_layer + layer_count <=
1715             anv_image_aux_layers(image, VK_IMAGE_ASPECT_DEPTH_BIT, level));
1716      get_blorp_surf_for_anv_image(cmd_buffer->device,
1717                                   image, VK_IMAGE_ASPECT_DEPTH_BIT,
1718                                   0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1719                                   image->planes[plane].aux_usage, &depth);
1720   }
1721
1722   struct blorp_surf stencil = {};
1723   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
1724      const uint32_t plane =
1725         anv_image_aspect_to_plane(image, VK_IMAGE_ASPECT_STENCIL_BIT);
1726      get_blorp_surf_for_anv_image(cmd_buffer->device,
1727                                   image, VK_IMAGE_ASPECT_STENCIL_BIT,
1728                                   0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1729                                   image->planes[plane].aux_usage, &stencil);
1730   }
1731
1732   /* From the Sky Lake PRM Volume 7, "Depth Buffer Clear":
1733    *
1734    *    "The following is required when performing a depth buffer clear with
1735    *    using the WM_STATE or 3DSTATE_WM:
1736    *
1737    *       * If other rendering operations have preceded this clear, a
1738    *         PIPE_CONTROL with depth cache flush enabled, Depth Stall bit
1739    *         enabled must be issued before the rectangle primitive used for
1740    *         the depth buffer clear operation.
1741    *       * [...]"
1742    *
1743    * Even though the PRM only says that this is required if using 3DSTATE_WM
1744    * and a 3DPRIMITIVE, the GPU appears to also need this to avoid occasional
1745    * hangs when doing a clear with WM_HZ_OP.
1746    */
1747   anv_add_pending_pipe_bits(cmd_buffer,
1748                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1749                             ANV_PIPE_DEPTH_STALL_BIT,
1750                             "before clear hiz");
1751
1752   if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
1753       depth.aux_usage == ISL_AUX_USAGE_HIZ_CCS_WT) {
1754      /* From Bspec 47010 (Depth Buffer Clear):
1755       *
1756       *    Since the fast clear cycles to CCS are not cached in TileCache,
1757       *    any previous depth buffer writes to overlapping pixels must be
1758       *    flushed out of TileCache before a succeeding Depth Buffer Clear.
1759       *    This restriction only applies to Depth Buffer with write-thru
1760       *    enabled, since fast clears to CCS only occur for write-thru mode.
1761       *
1762       * There may have been a write to this depth buffer. Flush it from the
1763       * tile cache just in case.
1764       */
1765      anv_add_pending_pipe_bits(cmd_buffer,
1766                                ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1767                                ANV_PIPE_TILE_CACHE_FLUSH_BIT,
1768                                "before clear hiz_ccs_wt");
1769   }
1770
1771   blorp_hiz_clear_depth_stencil(&batch, &depth, &stencil,
1772                                 level, base_layer, layer_count,
1773                                 area.offset.x, area.offset.y,
1774                                 area.offset.x + area.extent.width,
1775                                 area.offset.y + area.extent.height,
1776                                 aspects & VK_IMAGE_ASPECT_DEPTH_BIT,
1777                                 ANV_HZ_FC_VAL,
1778                                 aspects & VK_IMAGE_ASPECT_STENCIL_BIT,
1779                                 stencil_value);
1780
1781   anv_blorp_batch_finish(&batch);
1782
1783   /* From the SKL PRM, Depth Buffer Clear:
1784    *
1785    *    "Depth Buffer Clear Workaround
1786    *
1787    *    Depth buffer clear pass using any of the methods (WM_STATE,
1788    *    3DSTATE_WM or 3DSTATE_WM_HZ_OP) must be followed by a PIPE_CONTROL
1789    *    command with DEPTH_STALL bit and Depth FLUSH bits “set” before
1790    *    starting to render.  DepthStall and DepthFlush are not needed between
1791    *    consecutive depth clear passes nor is it required if the depth-clear
1792    *    pass was done with “full_surf_clear” bit set in the
1793    *    3DSTATE_WM_HZ_OP."
1794    *
1795    * Even though the PRM provides a bunch of conditions under which this is
1796    * supposedly unnecessary, we choose to perform the flush unconditionally
1797    * just to be safe.
1798    */
1799   anv_add_pending_pipe_bits(cmd_buffer,
1800                             ANV_PIPE_DEPTH_CACHE_FLUSH_BIT |
1801                             ANV_PIPE_DEPTH_STALL_BIT,
1802                             "after clear hiz");
1803}
1804
1805void
1806anv_image_mcs_op(struct anv_cmd_buffer *cmd_buffer,
1807                 const struct anv_image *image,
1808                 enum isl_format format, struct isl_swizzle swizzle,
1809                 VkImageAspectFlagBits aspect,
1810                 uint32_t base_layer, uint32_t layer_count,
1811                 enum isl_aux_op mcs_op, union isl_color_value *clear_value,
1812                 bool predicate)
1813{
1814   assert(image->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT);
1815   assert(image->vk.samples > 1);
1816   assert(base_layer + layer_count <= anv_image_aux_layers(image, aspect, 0));
1817
1818   /* Multisampling with multi-planar formats is not supported */
1819   assert(image->n_planes == 1);
1820
1821   const struct intel_device_info *devinfo = &cmd_buffer->device->info;
1822   struct blorp_batch batch;
1823   anv_blorp_batch_init(cmd_buffer, &batch,
1824                        BLORP_BATCH_PREDICATE_ENABLE * predicate +
1825                        BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1826   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1827
1828   struct blorp_surf surf;
1829   get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1830                                0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1831                                ISL_AUX_USAGE_MCS, &surf);
1832
1833   /* Blorp will store the clear color for us if we provide the clear color
1834    * address and we are doing a fast clear. So we save the clear value into
1835    * the blorp surface.
1836    */
1837   if (clear_value)
1838      surf.clear_color = *clear_value;
1839
1840   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1841    *
1842    *    "After Render target fast clear, pipe-control with color cache
1843    *    write-flush must be issued before sending any DRAW commands on
1844    *    that render target."
1845    *
1846    * This comment is a bit cryptic and doesn't really tell you what's going
1847    * or what's really needed.  It appears that fast clear ops are not
1848    * properly synchronized with other drawing.  This means that we cannot
1849    * have a fast clear operation in the pipe at the same time as other
1850    * regular drawing operations.  We need to use a PIPE_CONTROL to ensure
1851    * that the contents of the previous draw hit the render target before we
1852    * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1853    * that it is completed before any additional drawing occurs.
1854    */
1855   anv_add_pending_pipe_bits(cmd_buffer,
1856                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1857                             ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1858                             (devinfo->verx10 == 120 ?
1859                                ANV_PIPE_DEPTH_STALL_BIT : 0) |
1860                             (devinfo->verx10 == 125 ?
1861                                ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1862                                ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1863                             ANV_PIPE_PSS_STALL_SYNC_BIT |
1864                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1865                             "before fast clear mcs");
1866
1867   switch (mcs_op) {
1868   case ISL_AUX_OP_FAST_CLEAR:
1869      blorp_fast_clear(&batch, &surf, format, swizzle,
1870                       0, base_layer, layer_count,
1871                       0, 0, image->vk.extent.width, image->vk.extent.height);
1872      break;
1873   case ISL_AUX_OP_PARTIAL_RESOLVE:
1874      blorp_mcs_partial_resolve(&batch, &surf, format,
1875                                base_layer, layer_count);
1876      break;
1877   case ISL_AUX_OP_FULL_RESOLVE:
1878   case ISL_AUX_OP_AMBIGUATE:
1879   default:
1880      unreachable("Unsupported MCS operation");
1881   }
1882
1883   anv_add_pending_pipe_bits(cmd_buffer,
1884                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1885                             (devinfo->verx10 == 120 ?
1886                                ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1887                                ANV_PIPE_DEPTH_STALL_BIT : 0) |
1888                             ANV_PIPE_PSS_STALL_SYNC_BIT |
1889                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1890                             "after fast clear mcs");
1891
1892   anv_blorp_batch_finish(&batch);
1893}
1894
1895void
1896anv_image_ccs_op(struct anv_cmd_buffer *cmd_buffer,
1897                 const struct anv_image *image,
1898                 enum isl_format format, struct isl_swizzle swizzle,
1899                 VkImageAspectFlagBits aspect, uint32_t level,
1900                 uint32_t base_layer, uint32_t layer_count,
1901                 enum isl_aux_op ccs_op, union isl_color_value *clear_value,
1902                 bool predicate)
1903{
1904   assert(image->vk.aspects & VK_IMAGE_ASPECT_ANY_COLOR_BIT_ANV);
1905   assert(image->vk.samples == 1);
1906   assert(level < anv_image_aux_levels(image, aspect));
1907   /* Multi-LOD YcBcR is not allowed */
1908   assert(image->n_planes == 1 || level == 0);
1909   assert(base_layer + layer_count <=
1910          anv_image_aux_layers(image, aspect, level));
1911
1912   const uint32_t plane = anv_image_aspect_to_plane(image, aspect);
1913   const struct intel_device_info *devinfo = &cmd_buffer->device->info;
1914
1915   struct blorp_batch batch;
1916   anv_blorp_batch_init(cmd_buffer, &batch,
1917                        BLORP_BATCH_PREDICATE_ENABLE * predicate +
1918                        BLORP_BATCH_NO_UPDATE_CLEAR_COLOR * !clear_value);
1919   assert((batch.flags & BLORP_BATCH_USE_COMPUTE) == 0);
1920
1921   struct blorp_surf surf;
1922   get_blorp_surf_for_anv_image(cmd_buffer->device, image, aspect,
1923                                0, ANV_IMAGE_LAYOUT_EXPLICIT_AUX,
1924                                image->planes[plane].aux_usage,
1925                                &surf);
1926
1927   uint32_t level_width = anv_minify(surf.surf->logical_level0_px.w, level);
1928   uint32_t level_height = anv_minify(surf.surf->logical_level0_px.h, level);
1929
1930   /* Blorp will store the clear color for us if we provide the clear color
1931    * address and we are doing a fast clear. So we save the clear value into
1932    * the blorp surface.
1933    */
1934   if (clear_value)
1935      surf.clear_color = *clear_value;
1936
1937   /* From the Sky Lake PRM Vol. 7, "Render Target Fast Clear":
1938    *
1939    *    "After Render target fast clear, pipe-control with color cache
1940    *    write-flush must be issued before sending any DRAW commands on
1941    *    that render target."
1942    *
1943    * This comment is a bit cryptic and doesn't really tell you what's going
1944    * or what's really needed.  It appears that fast clear ops are not
1945    * properly synchronized with other drawing.  This means that we cannot
1946    * have a fast clear operation in the pipe at the same time as other
1947    * regular drawing operations.  We need to use a PIPE_CONTROL to ensure
1948    * that the contents of the previous draw hit the render target before we
1949    * resolve and then use a second PIPE_CONTROL after the resolve to ensure
1950    * that it is completed before any additional drawing occurs.
1951    */
1952   anv_add_pending_pipe_bits(cmd_buffer,
1953                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1954                             ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1955                             (devinfo->verx10 == 120 ?
1956                                ANV_PIPE_DEPTH_STALL_BIT : 0) |
1957                             (devinfo->verx10 == 125 ?
1958                                ANV_PIPE_HDC_PIPELINE_FLUSH_BIT |
1959                                ANV_PIPE_DATA_CACHE_FLUSH_BIT : 0) |
1960                             ANV_PIPE_PSS_STALL_SYNC_BIT |
1961                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
1962                             "before fast clear ccs");
1963
1964   switch (ccs_op) {
1965   case ISL_AUX_OP_FAST_CLEAR:
1966      blorp_fast_clear(&batch, &surf, format, swizzle,
1967                       level, base_layer, layer_count,
1968                       0, 0, level_width, level_height);
1969      break;
1970   case ISL_AUX_OP_FULL_RESOLVE:
1971   case ISL_AUX_OP_PARTIAL_RESOLVE: {
1972      /* Wa_1508744258: Enable RHWO optimization for resolves */
1973      const bool enable_rhwo_opt = cmd_buffer->device->info.verx10 == 120;
1974
1975      if (enable_rhwo_opt)
1976         cmd_buffer->state.pending_rhwo_optimization_enabled = true;
1977
1978      blorp_ccs_resolve(&batch, &surf, level, base_layer, layer_count,
1979                        format, ccs_op);
1980
1981      if (enable_rhwo_opt)
1982         cmd_buffer->state.pending_rhwo_optimization_enabled = false;
1983      break;
1984   }
1985   case ISL_AUX_OP_AMBIGUATE:
1986      for (uint32_t a = 0; a < layer_count; a++) {
1987         const uint32_t layer = base_layer + a;
1988         blorp_ccs_ambiguate(&batch, &surf, level, layer);
1989      }
1990      break;
1991   default:
1992      unreachable("Unsupported CCS operation");
1993   }
1994
1995   anv_add_pending_pipe_bits(cmd_buffer,
1996                             ANV_PIPE_RENDER_TARGET_CACHE_FLUSH_BIT |
1997                             (devinfo->verx10 == 120 ?
1998                                ANV_PIPE_TILE_CACHE_FLUSH_BIT |
1999                                ANV_PIPE_DEPTH_STALL_BIT : 0) |
2000                             ANV_PIPE_PSS_STALL_SYNC_BIT |
2001                             ANV_PIPE_END_OF_PIPE_SYNC_BIT,
2002                             "after fast clear ccs");
2003
2004   anv_blorp_batch_finish(&batch);
2005}
2006