1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2016 Red Hat.
3bf215546Sopenharmony_ci * Copyright © 2016 Bas Nieuwenhuizen
4bf215546Sopenharmony_ci *
5bf215546Sopenharmony_ci * based in part on anv driver which is:
6bf215546Sopenharmony_ci * Copyright © 2015 Intel Corporation
7bf215546Sopenharmony_ci *
8bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
9bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
10bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
11bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
12bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
13bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
16bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
17bf215546Sopenharmony_ci * Software.
18bf215546Sopenharmony_ci *
19bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
22bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25bf215546Sopenharmony_ci * IN THE SOFTWARE.
26bf215546Sopenharmony_ci */
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci#include "radv_cs.h"
29bf215546Sopenharmony_ci#include "radv_debug.h"
30bf215546Sopenharmony_ci#include "radv_meta.h"
31bf215546Sopenharmony_ci#include "radv_private.h"
32bf215546Sopenharmony_ci#include "radv_radeon_winsys.h"
33bf215546Sopenharmony_ci#include "radv_shader.h"
34bf215546Sopenharmony_ci#include "sid.h"
35bf215546Sopenharmony_ci#include "vk_format.h"
36bf215546Sopenharmony_ci#include "vk_util.h"
37bf215546Sopenharmony_ci#include "vk_enum_defines.h"
38bf215546Sopenharmony_ci#include "vk_common_entrypoints.h"
39bf215546Sopenharmony_ci
40bf215546Sopenharmony_ci#include "ac_debug.h"
41bf215546Sopenharmony_ci#include "ac_shader_args.h"
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci#include "util/fast_idiv_by_const.h"
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_cienum {
46bf215546Sopenharmony_ci   RADV_PREFETCH_VBO_DESCRIPTORS = (1 << 0),
47bf215546Sopenharmony_ci   RADV_PREFETCH_VS = (1 << 1),
48bf215546Sopenharmony_ci   RADV_PREFETCH_TCS = (1 << 2),
49bf215546Sopenharmony_ci   RADV_PREFETCH_TES = (1 << 3),
50bf215546Sopenharmony_ci   RADV_PREFETCH_GS = (1 << 4),
51bf215546Sopenharmony_ci   RADV_PREFETCH_PS = (1 << 5),
52bf215546Sopenharmony_ci   RADV_PREFETCH_MS = (1 << 6),
53bf215546Sopenharmony_ci   RADV_PREFETCH_SHADERS = (RADV_PREFETCH_VS | RADV_PREFETCH_TCS | RADV_PREFETCH_TES |
54bf215546Sopenharmony_ci                            RADV_PREFETCH_GS | RADV_PREFETCH_PS | RADV_PREFETCH_MS)
55bf215546Sopenharmony_ci};
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_cistatic void radv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer,
58bf215546Sopenharmony_ci                                         struct radv_image *image, VkImageLayout src_layout,
59bf215546Sopenharmony_ci                                         bool src_render_loop, VkImageLayout dst_layout,
60bf215546Sopenharmony_ci                                         bool dst_render_loop, uint32_t src_family_index,
61bf215546Sopenharmony_ci                                         uint32_t dst_family_index, const VkImageSubresourceRange *range,
62bf215546Sopenharmony_ci                                         struct radv_sample_locations_state *sample_locs);
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistatic void radv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size);
65bf215546Sopenharmony_ci
66bf215546Sopenharmony_ciconst struct radv_dynamic_state default_dynamic_state = {
67bf215546Sopenharmony_ci   .viewport =
68bf215546Sopenharmony_ci      {
69bf215546Sopenharmony_ci         .count = 0,
70bf215546Sopenharmony_ci      },
71bf215546Sopenharmony_ci   .scissor =
72bf215546Sopenharmony_ci      {
73bf215546Sopenharmony_ci         .count = 0,
74bf215546Sopenharmony_ci      },
75bf215546Sopenharmony_ci   .line_width = 1.0f,
76bf215546Sopenharmony_ci   .depth_bias =
77bf215546Sopenharmony_ci      {
78bf215546Sopenharmony_ci         .bias = 0.0f,
79bf215546Sopenharmony_ci         .clamp = 0.0f,
80bf215546Sopenharmony_ci         .slope = 0.0f,
81bf215546Sopenharmony_ci      },
82bf215546Sopenharmony_ci   .blend_constants = {0.0f, 0.0f, 0.0f, 0.0f},
83bf215546Sopenharmony_ci   .depth_bounds =
84bf215546Sopenharmony_ci      {
85bf215546Sopenharmony_ci         .min = 0.0f,
86bf215546Sopenharmony_ci         .max = 1.0f,
87bf215546Sopenharmony_ci      },
88bf215546Sopenharmony_ci   .stencil_compare_mask =
89bf215546Sopenharmony_ci      {
90bf215546Sopenharmony_ci         .front = ~0u,
91bf215546Sopenharmony_ci         .back = ~0u,
92bf215546Sopenharmony_ci      },
93bf215546Sopenharmony_ci   .stencil_write_mask =
94bf215546Sopenharmony_ci      {
95bf215546Sopenharmony_ci         .front = ~0u,
96bf215546Sopenharmony_ci         .back = ~0u,
97bf215546Sopenharmony_ci      },
98bf215546Sopenharmony_ci   .stencil_reference =
99bf215546Sopenharmony_ci      {
100bf215546Sopenharmony_ci         .front = 0u,
101bf215546Sopenharmony_ci         .back = 0u,
102bf215546Sopenharmony_ci      },
103bf215546Sopenharmony_ci   .line_stipple =
104bf215546Sopenharmony_ci      {
105bf215546Sopenharmony_ci         .factor = 0u,
106bf215546Sopenharmony_ci         .pattern = 0u,
107bf215546Sopenharmony_ci      },
108bf215546Sopenharmony_ci   .cull_mode = 0u,
109bf215546Sopenharmony_ci   .front_face = 0u,
110bf215546Sopenharmony_ci   .primitive_topology = 0u,
111bf215546Sopenharmony_ci   .fragment_shading_rate =
112bf215546Sopenharmony_ci      {
113bf215546Sopenharmony_ci         .size = {1u, 1u},
114bf215546Sopenharmony_ci         .combiner_ops = {VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR,
115bf215546Sopenharmony_ci                          VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR},
116bf215546Sopenharmony_ci      },
117bf215546Sopenharmony_ci   .depth_bias_enable = 0u,
118bf215546Sopenharmony_ci   .primitive_restart_enable = 0u,
119bf215546Sopenharmony_ci   .rasterizer_discard_enable = 0u,
120bf215546Sopenharmony_ci   .logic_op = 0u,
121bf215546Sopenharmony_ci   .color_write_enable = 0xffffffffu,
122bf215546Sopenharmony_ci};
123bf215546Sopenharmony_ci
124bf215546Sopenharmony_cistatic void
125bf215546Sopenharmony_ciradv_bind_dynamic_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_dynamic_state *src)
126bf215546Sopenharmony_ci{
127bf215546Sopenharmony_ci   struct radv_dynamic_state *dest = &cmd_buffer->state.dynamic;
128bf215546Sopenharmony_ci   uint64_t copy_mask = src->mask;
129bf215546Sopenharmony_ci   uint64_t dest_mask = 0;
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_ci   dest->discard_rectangle.count = src->discard_rectangle.count;
132bf215546Sopenharmony_ci   dest->sample_location.count = src->sample_location.count;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_VIEWPORT) {
135bf215546Sopenharmony_ci      if (dest->viewport.count != src->viewport.count) {
136bf215546Sopenharmony_ci         dest->viewport.count = src->viewport.count;
137bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_VIEWPORT;
138bf215546Sopenharmony_ci      }
139bf215546Sopenharmony_ci
140bf215546Sopenharmony_ci      if (memcmp(&dest->viewport.viewports, &src->viewport.viewports,
141bf215546Sopenharmony_ci                 src->viewport.count * sizeof(VkViewport))) {
142bf215546Sopenharmony_ci         typed_memcpy(dest->viewport.viewports, src->viewport.viewports, src->viewport.count);
143bf215546Sopenharmony_ci         typed_memcpy(dest->viewport.xform, src->viewport.xform, src->viewport.count);
144bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_VIEWPORT;
145bf215546Sopenharmony_ci      }
146bf215546Sopenharmony_ci   }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_SCISSOR) {
149bf215546Sopenharmony_ci      if (dest->scissor.count != src->scissor.count) {
150bf215546Sopenharmony_ci         dest->scissor.count = src->scissor.count;
151bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_SCISSOR;
152bf215546Sopenharmony_ci      }
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci      if (memcmp(&dest->scissor.scissors, &src->scissor.scissors,
155bf215546Sopenharmony_ci                 src->scissor.count * sizeof(VkRect2D))) {
156bf215546Sopenharmony_ci         typed_memcpy(dest->scissor.scissors, src->scissor.scissors, src->scissor.count);
157bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_SCISSOR;
158bf215546Sopenharmony_ci      }
159bf215546Sopenharmony_ci   }
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_LINE_WIDTH) {
162bf215546Sopenharmony_ci      if (dest->line_width != src->line_width) {
163bf215546Sopenharmony_ci         dest->line_width = src->line_width;
164bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_LINE_WIDTH;
165bf215546Sopenharmony_ci      }
166bf215546Sopenharmony_ci   }
167bf215546Sopenharmony_ci
168bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS) {
169bf215546Sopenharmony_ci      if (memcmp(&dest->depth_bias, &src->depth_bias, sizeof(src->depth_bias))) {
170bf215546Sopenharmony_ci         dest->depth_bias = src->depth_bias;
171bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_BIAS;
172bf215546Sopenharmony_ci      }
173bf215546Sopenharmony_ci   }
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_BLEND_CONSTANTS) {
176bf215546Sopenharmony_ci      if (memcmp(&dest->blend_constants, &src->blend_constants, sizeof(src->blend_constants))) {
177bf215546Sopenharmony_ci         typed_memcpy(dest->blend_constants, src->blend_constants, 4);
178bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_BLEND_CONSTANTS;
179bf215546Sopenharmony_ci      }
180bf215546Sopenharmony_ci   }
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS) {
183bf215546Sopenharmony_ci      if (memcmp(&dest->depth_bounds, &src->depth_bounds, sizeof(src->depth_bounds))) {
184bf215546Sopenharmony_ci         dest->depth_bounds = src->depth_bounds;
185bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS;
186bf215546Sopenharmony_ci      }
187bf215546Sopenharmony_ci   }
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_STENCIL_COMPARE_MASK) {
190bf215546Sopenharmony_ci      if (memcmp(&dest->stencil_compare_mask, &src->stencil_compare_mask,
191bf215546Sopenharmony_ci                 sizeof(src->stencil_compare_mask))) {
192bf215546Sopenharmony_ci         dest->stencil_compare_mask = src->stencil_compare_mask;
193bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_STENCIL_COMPARE_MASK;
194bf215546Sopenharmony_ci      }
195bf215546Sopenharmony_ci   }
196bf215546Sopenharmony_ci
197bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_STENCIL_WRITE_MASK) {
198bf215546Sopenharmony_ci      if (memcmp(&dest->stencil_write_mask, &src->stencil_write_mask,
199bf215546Sopenharmony_ci                 sizeof(src->stencil_write_mask))) {
200bf215546Sopenharmony_ci         dest->stencil_write_mask = src->stencil_write_mask;
201bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_STENCIL_WRITE_MASK;
202bf215546Sopenharmony_ci      }
203bf215546Sopenharmony_ci   }
204bf215546Sopenharmony_ci
205bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_STENCIL_REFERENCE) {
206bf215546Sopenharmony_ci      if (memcmp(&dest->stencil_reference, &src->stencil_reference,
207bf215546Sopenharmony_ci                 sizeof(src->stencil_reference))) {
208bf215546Sopenharmony_ci         dest->stencil_reference = src->stencil_reference;
209bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_STENCIL_REFERENCE;
210bf215546Sopenharmony_ci      }
211bf215546Sopenharmony_ci   }
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DISCARD_RECTANGLE) {
214bf215546Sopenharmony_ci      if (memcmp(&dest->discard_rectangle.rectangles, &src->discard_rectangle.rectangles,
215bf215546Sopenharmony_ci                 src->discard_rectangle.count * sizeof(VkRect2D))) {
216bf215546Sopenharmony_ci         typed_memcpy(dest->discard_rectangle.rectangles, src->discard_rectangle.rectangles,
217bf215546Sopenharmony_ci                      src->discard_rectangle.count);
218bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DISCARD_RECTANGLE;
219bf215546Sopenharmony_ci      }
220bf215546Sopenharmony_ci   }
221bf215546Sopenharmony_ci
222bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_SAMPLE_LOCATIONS) {
223bf215546Sopenharmony_ci      if (dest->sample_location.per_pixel != src->sample_location.per_pixel ||
224bf215546Sopenharmony_ci          dest->sample_location.grid_size.width != src->sample_location.grid_size.width ||
225bf215546Sopenharmony_ci          dest->sample_location.grid_size.height != src->sample_location.grid_size.height ||
226bf215546Sopenharmony_ci          memcmp(&dest->sample_location.locations, &src->sample_location.locations,
227bf215546Sopenharmony_ci                 src->sample_location.count * sizeof(VkSampleLocationEXT))) {
228bf215546Sopenharmony_ci         dest->sample_location.per_pixel = src->sample_location.per_pixel;
229bf215546Sopenharmony_ci         dest->sample_location.grid_size = src->sample_location.grid_size;
230bf215546Sopenharmony_ci         typed_memcpy(dest->sample_location.locations, src->sample_location.locations,
231bf215546Sopenharmony_ci                      src->sample_location.count);
232bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_SAMPLE_LOCATIONS;
233bf215546Sopenharmony_ci      }
234bf215546Sopenharmony_ci   }
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_LINE_STIPPLE) {
237bf215546Sopenharmony_ci      if (memcmp(&dest->line_stipple, &src->line_stipple, sizeof(src->line_stipple))) {
238bf215546Sopenharmony_ci         dest->line_stipple = src->line_stipple;
239bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_LINE_STIPPLE;
240bf215546Sopenharmony_ci      }
241bf215546Sopenharmony_ci   }
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_CULL_MODE) {
244bf215546Sopenharmony_ci      if (dest->cull_mode != src->cull_mode) {
245bf215546Sopenharmony_ci         dest->cull_mode = src->cull_mode;
246bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_CULL_MODE;
247bf215546Sopenharmony_ci      }
248bf215546Sopenharmony_ci   }
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_FRONT_FACE) {
251bf215546Sopenharmony_ci      if (dest->front_face != src->front_face) {
252bf215546Sopenharmony_ci         dest->front_face = src->front_face;
253bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_FRONT_FACE;
254bf215546Sopenharmony_ci      }
255bf215546Sopenharmony_ci   }
256bf215546Sopenharmony_ci
257bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) {
258bf215546Sopenharmony_ci      if (dest->primitive_topology != src->primitive_topology) {
259bf215546Sopenharmony_ci         dest->primitive_topology = src->primitive_topology;
260bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_PRIMITIVE_TOPOLOGY;
261bf215546Sopenharmony_ci      }
262bf215546Sopenharmony_ci   }
263bf215546Sopenharmony_ci
264bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_TEST_ENABLE) {
265bf215546Sopenharmony_ci      if (dest->depth_test_enable != src->depth_test_enable) {
266bf215546Sopenharmony_ci         dest->depth_test_enable = src->depth_test_enable;
267bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_TEST_ENABLE;
268bf215546Sopenharmony_ci      }
269bf215546Sopenharmony_ci   }
270bf215546Sopenharmony_ci
271bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_WRITE_ENABLE) {
272bf215546Sopenharmony_ci      if (dest->depth_write_enable != src->depth_write_enable) {
273bf215546Sopenharmony_ci         dest->depth_write_enable = src->depth_write_enable;
274bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_WRITE_ENABLE;
275bf215546Sopenharmony_ci      }
276bf215546Sopenharmony_ci   }
277bf215546Sopenharmony_ci
278bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_COMPARE_OP) {
279bf215546Sopenharmony_ci      if (dest->depth_compare_op != src->depth_compare_op) {
280bf215546Sopenharmony_ci         dest->depth_compare_op = src->depth_compare_op;
281bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_COMPARE_OP;
282bf215546Sopenharmony_ci      }
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {
286bf215546Sopenharmony_ci      if (dest->depth_bounds_test_enable != src->depth_bounds_test_enable) {
287bf215546Sopenharmony_ci         dest->depth_bounds_test_enable = src->depth_bounds_test_enable;
288bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
289bf215546Sopenharmony_ci      }
290bf215546Sopenharmony_ci   }
291bf215546Sopenharmony_ci
292bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_STENCIL_TEST_ENABLE) {
293bf215546Sopenharmony_ci      if (dest->stencil_test_enable != src->stencil_test_enable) {
294bf215546Sopenharmony_ci         dest->stencil_test_enable = src->stencil_test_enable;
295bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_STENCIL_TEST_ENABLE;
296bf215546Sopenharmony_ci      }
297bf215546Sopenharmony_ci   }
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_STENCIL_OP) {
300bf215546Sopenharmony_ci      if (memcmp(&dest->stencil_op, &src->stencil_op, sizeof(src->stencil_op))) {
301bf215546Sopenharmony_ci         dest->stencil_op = src->stencil_op;
302bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_STENCIL_OP;
303bf215546Sopenharmony_ci      }
304bf215546Sopenharmony_ci   }
305bf215546Sopenharmony_ci
306bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_FRAGMENT_SHADING_RATE) {
307bf215546Sopenharmony_ci      if (memcmp(&dest->fragment_shading_rate, &src->fragment_shading_rate,
308bf215546Sopenharmony_ci                 sizeof(src->fragment_shading_rate))) {
309bf215546Sopenharmony_ci         dest->fragment_shading_rate = src->fragment_shading_rate;
310bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_FRAGMENT_SHADING_RATE;
311bf215546Sopenharmony_ci      }
312bf215546Sopenharmony_ci   }
313bf215546Sopenharmony_ci
314bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_DEPTH_BIAS_ENABLE) {
315bf215546Sopenharmony_ci      if (dest->depth_bias_enable != src->depth_bias_enable) {
316bf215546Sopenharmony_ci         dest->depth_bias_enable = src->depth_bias_enable;
317bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_DEPTH_BIAS_ENABLE;
318bf215546Sopenharmony_ci      }
319bf215546Sopenharmony_ci   }
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {
322bf215546Sopenharmony_ci      if (dest->primitive_restart_enable != src->primitive_restart_enable) {
323bf215546Sopenharmony_ci         dest->primitive_restart_enable = src->primitive_restart_enable;
324bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
325bf215546Sopenharmony_ci      }
326bf215546Sopenharmony_ci   }
327bf215546Sopenharmony_ci
328bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {
329bf215546Sopenharmony_ci      if (dest->rasterizer_discard_enable != src->rasterizer_discard_enable) {
330bf215546Sopenharmony_ci         dest->rasterizer_discard_enable = src->rasterizer_discard_enable;
331bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
332bf215546Sopenharmony_ci      }
333bf215546Sopenharmony_ci   }
334bf215546Sopenharmony_ci
335bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_LOGIC_OP) {
336bf215546Sopenharmony_ci      if (dest->logic_op != src->logic_op) {
337bf215546Sopenharmony_ci         dest->logic_op = src->logic_op;
338bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_LOGIC_OP;
339bf215546Sopenharmony_ci      }
340bf215546Sopenharmony_ci   }
341bf215546Sopenharmony_ci
342bf215546Sopenharmony_ci   if (copy_mask & RADV_DYNAMIC_COLOR_WRITE_ENABLE) {
343bf215546Sopenharmony_ci      if (dest->color_write_enable != src->color_write_enable) {
344bf215546Sopenharmony_ci         dest->color_write_enable = src->color_write_enable;
345bf215546Sopenharmony_ci         dest_mask |= RADV_DYNAMIC_COLOR_WRITE_ENABLE;
346bf215546Sopenharmony_ci      }
347bf215546Sopenharmony_ci   }
348bf215546Sopenharmony_ci
349bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= dest_mask;
350bf215546Sopenharmony_ci}
351bf215546Sopenharmony_ci
352bf215546Sopenharmony_cibool
353bf215546Sopenharmony_ciradv_cmd_buffer_uses_mec(struct radv_cmd_buffer *cmd_buffer)
354bf215546Sopenharmony_ci{
355bf215546Sopenharmony_ci   return cmd_buffer->qf == RADV_QUEUE_COMPUTE &&
356bf215546Sopenharmony_ci          cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
357bf215546Sopenharmony_ci}
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_cienum amd_ip_type
360bf215546Sopenharmony_ciradv_queue_family_to_ring(struct radv_physical_device *physical_device,
361bf215546Sopenharmony_ci                          enum radv_queue_family f)
362bf215546Sopenharmony_ci{
363bf215546Sopenharmony_ci   switch (f) {
364bf215546Sopenharmony_ci   case RADV_QUEUE_GENERAL:
365bf215546Sopenharmony_ci      return AMD_IP_GFX;
366bf215546Sopenharmony_ci   case RADV_QUEUE_COMPUTE:
367bf215546Sopenharmony_ci      return AMD_IP_COMPUTE;
368bf215546Sopenharmony_ci   case RADV_QUEUE_TRANSFER:
369bf215546Sopenharmony_ci      return AMD_IP_SDMA;
370bf215546Sopenharmony_ci   default:
371bf215546Sopenharmony_ci      unreachable("Unknown queue family");
372bf215546Sopenharmony_ci   }
373bf215546Sopenharmony_ci}
374bf215546Sopenharmony_ci
375bf215546Sopenharmony_cistatic void
376bf215546Sopenharmony_ciradv_emit_write_data_packet(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
377bf215546Sopenharmony_ci                            unsigned count, const uint32_t *data)
378bf215546Sopenharmony_ci{
379bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
380bf215546Sopenharmony_ci
381bf215546Sopenharmony_ci   radeon_check_space(cmd_buffer->device->ws, cs, 4 + count);
382bf215546Sopenharmony_ci
383bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
384bf215546Sopenharmony_ci   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(engine_sel));
385bf215546Sopenharmony_ci   radeon_emit(cs, va);
386bf215546Sopenharmony_ci   radeon_emit(cs, va >> 32);
387bf215546Sopenharmony_ci   radeon_emit_array(cs, data, count);
388bf215546Sopenharmony_ci}
389bf215546Sopenharmony_ci
390bf215546Sopenharmony_cistatic void
391bf215546Sopenharmony_ciradv_emit_clear_data(struct radv_cmd_buffer *cmd_buffer, unsigned engine_sel, uint64_t va,
392bf215546Sopenharmony_ci                     unsigned size)
393bf215546Sopenharmony_ci{
394bf215546Sopenharmony_ci   uint32_t *zeroes = alloca(size);
395bf215546Sopenharmony_ci   memset(zeroes, 0, size);
396bf215546Sopenharmony_ci   radv_emit_write_data_packet(cmd_buffer, engine_sel, va, size / 4, zeroes);
397bf215546Sopenharmony_ci}
398bf215546Sopenharmony_ci
399bf215546Sopenharmony_cistatic void
400bf215546Sopenharmony_ciradv_destroy_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
401bf215546Sopenharmony_ci{
402bf215546Sopenharmony_ci   list_del(&cmd_buffer->pool_link);
403bf215546Sopenharmony_ci
404bf215546Sopenharmony_ci   util_dynarray_fini(&cmd_buffer->cached_vertex_formats);
405bf215546Sopenharmony_ci
406bf215546Sopenharmony_ci   list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
407bf215546Sopenharmony_ci   {
408bf215546Sopenharmony_ci      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
409bf215546Sopenharmony_ci      list_del(&up->list);
410bf215546Sopenharmony_ci      free(up);
411bf215546Sopenharmony_ci   }
412bf215546Sopenharmony_ci
413bf215546Sopenharmony_ci   if (cmd_buffer->upload.upload_bo)
414bf215546Sopenharmony_ci      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, cmd_buffer->upload.upload_bo);
415bf215546Sopenharmony_ci
416bf215546Sopenharmony_ci   if (cmd_buffer->state.own_render_pass) {
417bf215546Sopenharmony_ci      radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device),
418bf215546Sopenharmony_ci                             radv_render_pass_to_handle(cmd_buffer->state.pass), NULL);
419bf215546Sopenharmony_ci      cmd_buffer->state.own_render_pass = false;
420bf215546Sopenharmony_ci   }
421bf215546Sopenharmony_ci
422bf215546Sopenharmony_ci   if (cmd_buffer->cs)
423bf215546Sopenharmony_ci      cmd_buffer->device->ws->cs_destroy(cmd_buffer->cs);
424bf215546Sopenharmony_ci   if (cmd_buffer->ace_internal.cs)
425bf215546Sopenharmony_ci      cmd_buffer->device->ws->cs_destroy(cmd_buffer->ace_internal.cs);
426bf215546Sopenharmony_ci
427bf215546Sopenharmony_ci   for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
428bf215546Sopenharmony_ci      struct radv_descriptor_set_header *set = &cmd_buffer->descriptors[i].push_set.set;
429bf215546Sopenharmony_ci      free(set->mapped_ptr);
430bf215546Sopenharmony_ci      if (set->layout)
431bf215546Sopenharmony_ci         vk_descriptor_set_layout_unref(&cmd_buffer->device->vk, &set->layout->vk);
432bf215546Sopenharmony_ci      vk_object_base_finish(&set->base);
433bf215546Sopenharmony_ci   }
434bf215546Sopenharmony_ci
435bf215546Sopenharmony_ci   vk_object_base_finish(&cmd_buffer->meta_push_descriptors.base);
436bf215546Sopenharmony_ci
437bf215546Sopenharmony_ci   vk_command_buffer_finish(&cmd_buffer->vk);
438bf215546Sopenharmony_ci   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer);
439bf215546Sopenharmony_ci}
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_cistatic VkResult
442bf215546Sopenharmony_ciradv_create_cmd_buffer(struct radv_device *device, struct radv_cmd_pool *pool,
443bf215546Sopenharmony_ci                       VkCommandBufferLevel level, VkCommandBuffer *pCommandBuffer)
444bf215546Sopenharmony_ci{
445bf215546Sopenharmony_ci   struct radv_cmd_buffer *cmd_buffer;
446bf215546Sopenharmony_ci   unsigned ring;
447bf215546Sopenharmony_ci   cmd_buffer = vk_zalloc(&pool->vk.alloc, sizeof(*cmd_buffer), 8,
448bf215546Sopenharmony_ci                          VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
449bf215546Sopenharmony_ci   if (cmd_buffer == NULL)
450bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci   VkResult result =
453bf215546Sopenharmony_ci      vk_command_buffer_init(&cmd_buffer->vk, &pool->vk, level);
454bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
455bf215546Sopenharmony_ci      vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer);
456bf215546Sopenharmony_ci      return result;
457bf215546Sopenharmony_ci   }
458bf215546Sopenharmony_ci
459bf215546Sopenharmony_ci   cmd_buffer->device = device;
460bf215546Sopenharmony_ci   cmd_buffer->pool = pool;
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
463bf215546Sopenharmony_ci   cmd_buffer->qf = vk_queue_to_radv(device->physical_device, pool->vk.queue_family_index);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf);
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   cmd_buffer->cs = device->ws->cs_create(device->ws, ring);
468bf215546Sopenharmony_ci   if (!cmd_buffer->cs) {
469bf215546Sopenharmony_ci      radv_destroy_cmd_buffer(cmd_buffer);
470bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
471bf215546Sopenharmony_ci   }
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   vk_object_base_init(&device->vk, &cmd_buffer->meta_push_descriptors.base,
474bf215546Sopenharmony_ci                       VK_OBJECT_TYPE_DESCRIPTOR_SET);
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_ci   util_dynarray_init(&cmd_buffer->cached_vertex_formats, NULL);
477bf215546Sopenharmony_ci
478bf215546Sopenharmony_ci   for (unsigned i = 0; i < MAX_BIND_POINTS; i++)
479bf215546Sopenharmony_ci      vk_object_base_init(&device->vk, &cmd_buffer->descriptors[i].push_set.set.base,
480bf215546Sopenharmony_ci                          VK_OBJECT_TYPE_DESCRIPTOR_SET);
481bf215546Sopenharmony_ci
482bf215546Sopenharmony_ci   *pCommandBuffer = radv_cmd_buffer_to_handle(cmd_buffer);
483bf215546Sopenharmony_ci
484bf215546Sopenharmony_ci   list_inithead(&cmd_buffer->upload.list);
485bf215546Sopenharmony_ci
486bf215546Sopenharmony_ci   return VK_SUCCESS;
487bf215546Sopenharmony_ci}
488bf215546Sopenharmony_ci
489bf215546Sopenharmony_cistatic VkResult
490bf215546Sopenharmony_ciradv_reset_cmd_buffer(struct radv_cmd_buffer *cmd_buffer)
491bf215546Sopenharmony_ci{
492bf215546Sopenharmony_ci   vk_command_buffer_reset(&cmd_buffer->vk);
493bf215546Sopenharmony_ci
494bf215546Sopenharmony_ci   cmd_buffer->device->ws->cs_reset(cmd_buffer->cs);
495bf215546Sopenharmony_ci   if (cmd_buffer->ace_internal.cs)
496bf215546Sopenharmony_ci      cmd_buffer->device->ws->cs_reset(cmd_buffer->ace_internal.cs);
497bf215546Sopenharmony_ci
498bf215546Sopenharmony_ci   list_for_each_entry_safe(struct radv_cmd_buffer_upload, up, &cmd_buffer->upload.list, list)
499bf215546Sopenharmony_ci   {
500bf215546Sopenharmony_ci      cmd_buffer->device->ws->buffer_destroy(cmd_buffer->device->ws, up->upload_bo);
501bf215546Sopenharmony_ci      list_del(&up->list);
502bf215546Sopenharmony_ci      free(up);
503bf215546Sopenharmony_ci   }
504bf215546Sopenharmony_ci
505bf215546Sopenharmony_ci   if (cmd_buffer->state.own_render_pass) {
506bf215546Sopenharmony_ci      radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device),
507bf215546Sopenharmony_ci                             radv_render_pass_to_handle(cmd_buffer->state.pass), NULL);
508bf215546Sopenharmony_ci      cmd_buffer->state.own_render_pass = false;
509bf215546Sopenharmony_ci   }
510bf215546Sopenharmony_ci
511bf215546Sopenharmony_ci   cmd_buffer->push_constant_stages = 0;
512bf215546Sopenharmony_ci   cmd_buffer->scratch_size_per_wave_needed = 0;
513bf215546Sopenharmony_ci   cmd_buffer->scratch_waves_wanted = 0;
514bf215546Sopenharmony_ci   cmd_buffer->compute_scratch_size_per_wave_needed = 0;
515bf215546Sopenharmony_ci   cmd_buffer->compute_scratch_waves_wanted = 0;
516bf215546Sopenharmony_ci   cmd_buffer->esgs_ring_size_needed = 0;
517bf215546Sopenharmony_ci   cmd_buffer->gsvs_ring_size_needed = 0;
518bf215546Sopenharmony_ci   cmd_buffer->tess_rings_needed = false;
519bf215546Sopenharmony_ci   cmd_buffer->task_rings_needed = false;
520bf215546Sopenharmony_ci   cmd_buffer->mesh_scratch_ring_needed = false;
521bf215546Sopenharmony_ci   cmd_buffer->gds_needed = false;
522bf215546Sopenharmony_ci   cmd_buffer->gds_oa_needed = false;
523bf215546Sopenharmony_ci   cmd_buffer->sample_positions_needed = false;
524bf215546Sopenharmony_ci   cmd_buffer->ace_internal.sem.gfx2ace_value = 0;
525bf215546Sopenharmony_ci   cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = 0;
526bf215546Sopenharmony_ci   cmd_buffer->ace_internal.sem.va = 0;
527bf215546Sopenharmony_ci
528bf215546Sopenharmony_ci   if (cmd_buffer->upload.upload_bo)
529bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->upload.upload_bo);
530bf215546Sopenharmony_ci   cmd_buffer->upload.offset = 0;
531bf215546Sopenharmony_ci
532bf215546Sopenharmony_ci   cmd_buffer->record_result = VK_SUCCESS;
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci   memset(cmd_buffer->vertex_binding_buffers, 0, sizeof(struct radv_buffer *) * cmd_buffer->used_vertex_bindings);
535bf215546Sopenharmony_ci   cmd_buffer->used_vertex_bindings = 0;
536bf215546Sopenharmony_ci
537bf215546Sopenharmony_ci   for (unsigned i = 0; i < MAX_BIND_POINTS; i++) {
538bf215546Sopenharmony_ci      cmd_buffer->descriptors[i].dirty = 0;
539bf215546Sopenharmony_ci      cmd_buffer->descriptors[i].valid = 0;
540bf215546Sopenharmony_ci      cmd_buffer->descriptors[i].push_dirty = false;
541bf215546Sopenharmony_ci   }
542bf215546Sopenharmony_ci
543bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
544bf215546Sopenharmony_ci      uint32_t pred_value = 0;
545bf215546Sopenharmony_ci      uint32_t pred_offset;
546bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &pred_value, &pred_offset))
547bf215546Sopenharmony_ci         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci      cmd_buffer->mec_inv_pred_emitted = false;
550bf215546Sopenharmony_ci      cmd_buffer->mec_inv_pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
551bf215546Sopenharmony_ci   }
552bf215546Sopenharmony_ci
553bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
554bf215546Sopenharmony_ci       cmd_buffer->qf == RADV_QUEUE_GENERAL) {
555bf215546Sopenharmony_ci      unsigned num_db = cmd_buffer->device->physical_device->rad_info.max_render_backends;
556bf215546Sopenharmony_ci      unsigned fence_offset, eop_bug_offset;
557bf215546Sopenharmony_ci      void *fence_ptr;
558bf215546Sopenharmony_ci
559bf215546Sopenharmony_ci      radv_cmd_buffer_upload_alloc(cmd_buffer, 8, &fence_offset, &fence_ptr);
560bf215546Sopenharmony_ci      memset(fence_ptr, 0, 8);
561bf215546Sopenharmony_ci
562bf215546Sopenharmony_ci      cmd_buffer->gfx9_fence_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
563bf215546Sopenharmony_ci      cmd_buffer->gfx9_fence_va += fence_offset;
564bf215546Sopenharmony_ci
565bf215546Sopenharmony_ci      radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_fence_va, 8);
566bf215546Sopenharmony_ci
567bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
568bf215546Sopenharmony_ci         /* Allocate a buffer for the EOP bug on GFX9. */
569bf215546Sopenharmony_ci         radv_cmd_buffer_upload_alloc(cmd_buffer, 16 * num_db, &eop_bug_offset, &fence_ptr);
570bf215546Sopenharmony_ci         memset(fence_ptr, 0, 16 * num_db);
571bf215546Sopenharmony_ci         cmd_buffer->gfx9_eop_bug_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
572bf215546Sopenharmony_ci         cmd_buffer->gfx9_eop_bug_va += eop_bug_offset;
573bf215546Sopenharmony_ci
574bf215546Sopenharmony_ci         radv_emit_clear_data(cmd_buffer, V_370_PFP, cmd_buffer->gfx9_eop_bug_va, 16 * num_db);
575bf215546Sopenharmony_ci      }
576bf215546Sopenharmony_ci   }
577bf215546Sopenharmony_ci
578bf215546Sopenharmony_ci   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_INITIAL;
579bf215546Sopenharmony_ci
580bf215546Sopenharmony_ci   return cmd_buffer->record_result;
581bf215546Sopenharmony_ci}
582bf215546Sopenharmony_ci
583bf215546Sopenharmony_cistatic bool
584bf215546Sopenharmony_ciradv_cmd_buffer_resize_upload_buf(struct radv_cmd_buffer *cmd_buffer, uint64_t min_needed)
585bf215546Sopenharmony_ci{
586bf215546Sopenharmony_ci   uint64_t new_size;
587bf215546Sopenharmony_ci   struct radeon_winsys_bo *bo = NULL;
588bf215546Sopenharmony_ci   struct radv_cmd_buffer_upload *upload;
589bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
590bf215546Sopenharmony_ci
591bf215546Sopenharmony_ci   new_size = MAX2(min_needed, 16 * 1024);
592bf215546Sopenharmony_ci   new_size = MAX2(new_size, 2 * cmd_buffer->upload.size);
593bf215546Sopenharmony_ci
594bf215546Sopenharmony_ci   VkResult result =
595bf215546Sopenharmony_ci      device->ws->buffer_create(device->ws, new_size, 4096, device->ws->cs_domain(device->ws),
596bf215546Sopenharmony_ci                                RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |
597bf215546Sopenharmony_ci                                   RADEON_FLAG_32BIT | RADEON_FLAG_GTT_WC,
598bf215546Sopenharmony_ci                                RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &bo);
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
601bf215546Sopenharmony_ci      cmd_buffer->record_result = result;
602bf215546Sopenharmony_ci      return false;
603bf215546Sopenharmony_ci   }
604bf215546Sopenharmony_ci
605bf215546Sopenharmony_ci   radv_cs_add_buffer(device->ws, cmd_buffer->cs, bo);
606bf215546Sopenharmony_ci   if (cmd_buffer->upload.upload_bo) {
607bf215546Sopenharmony_ci      upload = malloc(sizeof(*upload));
608bf215546Sopenharmony_ci
609bf215546Sopenharmony_ci      if (!upload) {
610bf215546Sopenharmony_ci         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
611bf215546Sopenharmony_ci         device->ws->buffer_destroy(device->ws, bo);
612bf215546Sopenharmony_ci         return false;
613bf215546Sopenharmony_ci      }
614bf215546Sopenharmony_ci
615bf215546Sopenharmony_ci      memcpy(upload, &cmd_buffer->upload, sizeof(*upload));
616bf215546Sopenharmony_ci      list_add(&upload->list, &cmd_buffer->upload.list);
617bf215546Sopenharmony_ci   }
618bf215546Sopenharmony_ci
619bf215546Sopenharmony_ci   cmd_buffer->upload.upload_bo = bo;
620bf215546Sopenharmony_ci   cmd_buffer->upload.size = new_size;
621bf215546Sopenharmony_ci   cmd_buffer->upload.offset = 0;
622bf215546Sopenharmony_ci   cmd_buffer->upload.map = device->ws->buffer_map(cmd_buffer->upload.upload_bo);
623bf215546Sopenharmony_ci
624bf215546Sopenharmony_ci   if (!cmd_buffer->upload.map) {
625bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
626bf215546Sopenharmony_ci      return false;
627bf215546Sopenharmony_ci   }
628bf215546Sopenharmony_ci
629bf215546Sopenharmony_ci   return true;
630bf215546Sopenharmony_ci}
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_cibool
633bf215546Sopenharmony_ciradv_cmd_buffer_upload_alloc(struct radv_cmd_buffer *cmd_buffer, unsigned size,
634bf215546Sopenharmony_ci                             unsigned *out_offset, void **ptr)
635bf215546Sopenharmony_ci{
636bf215546Sopenharmony_ci   assert(size % 4 == 0);
637bf215546Sopenharmony_ci
638bf215546Sopenharmony_ci   struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
639bf215546Sopenharmony_ci
640bf215546Sopenharmony_ci   /* Align to the scalar cache line size if it results in this allocation
641bf215546Sopenharmony_ci    * being placed in less of them.
642bf215546Sopenharmony_ci    */
643bf215546Sopenharmony_ci   unsigned offset = cmd_buffer->upload.offset;
644bf215546Sopenharmony_ci   unsigned line_size = rad_info->gfx_level >= GFX10 ? 64 : 32;
645bf215546Sopenharmony_ci   unsigned gap = align(offset, line_size) - offset;
646bf215546Sopenharmony_ci   if ((size & (line_size - 1)) > gap)
647bf215546Sopenharmony_ci      offset = align(offset, line_size);
648bf215546Sopenharmony_ci
649bf215546Sopenharmony_ci   if (offset + size > cmd_buffer->upload.size) {
650bf215546Sopenharmony_ci      if (!radv_cmd_buffer_resize_upload_buf(cmd_buffer, size))
651bf215546Sopenharmony_ci         return false;
652bf215546Sopenharmony_ci      offset = 0;
653bf215546Sopenharmony_ci   }
654bf215546Sopenharmony_ci
655bf215546Sopenharmony_ci   *out_offset = offset;
656bf215546Sopenharmony_ci   *ptr = cmd_buffer->upload.map + offset;
657bf215546Sopenharmony_ci
658bf215546Sopenharmony_ci   cmd_buffer->upload.offset = offset + size;
659bf215546Sopenharmony_ci   return true;
660bf215546Sopenharmony_ci}
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_cibool
663bf215546Sopenharmony_ciradv_cmd_buffer_upload_data(struct radv_cmd_buffer *cmd_buffer, unsigned size, const void *data,
664bf215546Sopenharmony_ci                            unsigned *out_offset)
665bf215546Sopenharmony_ci{
666bf215546Sopenharmony_ci   uint8_t *ptr;
667bf215546Sopenharmony_ci
668bf215546Sopenharmony_ci   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, out_offset, (void **)&ptr))
669bf215546Sopenharmony_ci      return false;
670bf215546Sopenharmony_ci   assert(ptr);
671bf215546Sopenharmony_ci
672bf215546Sopenharmony_ci   memcpy(ptr, data, size);
673bf215546Sopenharmony_ci   return true;
674bf215546Sopenharmony_ci}
675bf215546Sopenharmony_ci
676bf215546Sopenharmony_civoid
677bf215546Sopenharmony_ciradv_cmd_buffer_trace_emit(struct radv_cmd_buffer *cmd_buffer)
678bf215546Sopenharmony_ci{
679bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
680bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
681bf215546Sopenharmony_ci   uint64_t va;
682bf215546Sopenharmony_ci
683bf215546Sopenharmony_ci   va = radv_buffer_get_va(device->trace_bo);
684bf215546Sopenharmony_ci   if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY)
685bf215546Sopenharmony_ci      va += 4;
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci   ++cmd_buffer->state.trace_id;
688bf215546Sopenharmony_ci   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 1, &cmd_buffer->state.trace_id);
689bf215546Sopenharmony_ci
690bf215546Sopenharmony_ci   radeon_check_space(cmd_buffer->device->ws, cs, 2);
691bf215546Sopenharmony_ci
692bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_NOP, 0, 0));
693bf215546Sopenharmony_ci   radeon_emit(cs, AC_ENCODE_TRACE_POINT(cmd_buffer->state.trace_id));
694bf215546Sopenharmony_ci}
695bf215546Sopenharmony_ci
696bf215546Sopenharmony_cistatic void
697bf215546Sopenharmony_ciradv_ace_internal_barrier(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask,
698bf215546Sopenharmony_ci                          VkPipelineStageFlags2 dst_stage_mask)
699bf215546Sopenharmony_ci{
700bf215546Sopenharmony_ci   /* Update flush bits from the main cmdbuf, except the stage flush. */
701bf215546Sopenharmony_ci   cmd_buffer->ace_internal.flush_bits |=
702bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits & RADV_CMD_FLUSH_ALL_COMPUTE & ~RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
703bf215546Sopenharmony_ci
704bf215546Sopenharmony_ci   /* Add stage flush only when necessary. */
705bf215546Sopenharmony_ci   if (src_stage_mask &
706bf215546Sopenharmony_ci       (VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV | VK_PIPELINE_STAGE_2_TRANSFER_BIT |
707bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
708bf215546Sopenharmony_ci      cmd_buffer->ace_internal.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
709bf215546Sopenharmony_ci
710bf215546Sopenharmony_ci   /* Block task shaders when we have to wait for CP DMA on the GFX cmdbuf. */
711bf215546Sopenharmony_ci   if (src_stage_mask &
712bf215546Sopenharmony_ci       (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT |
713bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
714bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
715bf215546Sopenharmony_ci      dst_stage_mask |= cmd_buffer->state.dma_is_busy ? VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV : 0;
716bf215546Sopenharmony_ci
717bf215546Sopenharmony_ci   /* Increment the GFX/ACE semaphore when task shaders are blocked. */
718bf215546Sopenharmony_ci   if (dst_stage_mask &
719bf215546Sopenharmony_ci       (VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT_KHR | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT |
720bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV))
721bf215546Sopenharmony_ci      cmd_buffer->ace_internal.sem.gfx2ace_value++;
722bf215546Sopenharmony_ci}
723bf215546Sopenharmony_ci
724bf215546Sopenharmony_cistatic void
725bf215546Sopenharmony_ciradv_ace_internal_cache_flush(struct radv_cmd_buffer *cmd_buffer)
726bf215546Sopenharmony_ci{
727bf215546Sopenharmony_ci   struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs;
728bf215546Sopenharmony_ci   const uint32_t flush_bits = cmd_buffer->ace_internal.flush_bits;
729bf215546Sopenharmony_ci   enum rgp_flush_bits sqtt_flush_bits = 0;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci   si_cs_emit_cache_flush(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0,
732bf215546Sopenharmony_ci                          true, flush_bits, &sqtt_flush_bits, 0);
733bf215546Sopenharmony_ci
734bf215546Sopenharmony_ci   cmd_buffer->ace_internal.flush_bits = 0;
735bf215546Sopenharmony_ci}
736bf215546Sopenharmony_ci
737bf215546Sopenharmony_cistatic uint64_t
738bf215546Sopenharmony_ciradv_ace_internal_sem_create(struct radv_cmd_buffer *cmd_buffer)
739bf215546Sopenharmony_ci{
740bf215546Sopenharmony_ci   /* DWORD 0: GFX->ACE semaphore (GFX blocks ACE, ie. ACE waits for GFX)
741bf215546Sopenharmony_ci    * DWORD 1: ACE->GFX semaphore
742bf215546Sopenharmony_ci    */
743bf215546Sopenharmony_ci   uint64_t sem_init = 0;
744bf215546Sopenharmony_ci   uint32_t va_off = 0;
745bf215546Sopenharmony_ci   if (!radv_cmd_buffer_upload_data(cmd_buffer, sizeof(uint64_t), &sem_init, &va_off)) {
746bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
747bf215546Sopenharmony_ci      return 0;
748bf215546Sopenharmony_ci   }
749bf215546Sopenharmony_ci
750bf215546Sopenharmony_ci   return radv_buffer_get_va(cmd_buffer->upload.upload_bo) + va_off;
751bf215546Sopenharmony_ci}
752bf215546Sopenharmony_ci
753bf215546Sopenharmony_cistatic bool
754bf215546Sopenharmony_ciradv_ace_internal_sem_dirty(const struct radv_cmd_buffer *cmd_buffer)
755bf215546Sopenharmony_ci{
756bf215546Sopenharmony_ci   return cmd_buffer->ace_internal.sem.gfx2ace_value !=
757bf215546Sopenharmony_ci          cmd_buffer->ace_internal.sem.emitted_gfx2ace_value;
758bf215546Sopenharmony_ci}
759bf215546Sopenharmony_ci
760bf215546Sopenharmony_ciALWAYS_INLINE static bool
761bf215546Sopenharmony_ciradv_flush_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer)
762bf215546Sopenharmony_ci{
763bf215546Sopenharmony_ci   if (!radv_ace_internal_sem_dirty(cmd_buffer))
764bf215546Sopenharmony_ci      return false;
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_ci   if (!cmd_buffer->ace_internal.sem.va) {
767bf215546Sopenharmony_ci      cmd_buffer->ace_internal.sem.va = radv_ace_internal_sem_create(cmd_buffer);
768bf215546Sopenharmony_ci      if (!cmd_buffer->ace_internal.sem.va)
769bf215546Sopenharmony_ci         return false;
770bf215546Sopenharmony_ci   }
771bf215546Sopenharmony_ci
772bf215546Sopenharmony_ci   /* GFX writes a value to the semaphore which ACE can wait for.*/
773bf215546Sopenharmony_ci   si_cs_emit_write_event_eop(
774bf215546Sopenharmony_ci      cmd_buffer->cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
775bf215546Sopenharmony_ci      radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
776bf215546Sopenharmony_ci      EOP_DATA_SEL_VALUE_32BIT, cmd_buffer->ace_internal.sem.va,
777bf215546Sopenharmony_ci      cmd_buffer->ace_internal.sem.gfx2ace_value, cmd_buffer->gfx9_eop_bug_va);
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_ci   cmd_buffer->ace_internal.sem.emitted_gfx2ace_value = cmd_buffer->ace_internal.sem.gfx2ace_value;
780bf215546Sopenharmony_ci   return true;
781bf215546Sopenharmony_ci}
782bf215546Sopenharmony_ci
783bf215546Sopenharmony_ciALWAYS_INLINE static void
784bf215546Sopenharmony_ciradv_wait_gfx2ace_semaphore(struct radv_cmd_buffer *cmd_buffer)
785bf215546Sopenharmony_ci{
786bf215546Sopenharmony_ci   assert(cmd_buffer->ace_internal.sem.va);
787bf215546Sopenharmony_ci   struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs;
788bf215546Sopenharmony_ci   radeon_check_space(cmd_buffer->device->ws, ace_cs, 7);
789bf215546Sopenharmony_ci
790bf215546Sopenharmony_ci   /* ACE waits for the semaphore which GFX wrote. */
791bf215546Sopenharmony_ci   radv_cp_wait_mem(ace_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, cmd_buffer->ace_internal.sem.va,
792bf215546Sopenharmony_ci                    cmd_buffer->ace_internal.sem.gfx2ace_value, 0xffffffff);
793bf215546Sopenharmony_ci}
794bf215546Sopenharmony_ci
795bf215546Sopenharmony_cistatic struct radeon_cmdbuf *
796bf215546Sopenharmony_ciradv_ace_internal_create(struct radv_cmd_buffer *cmd_buffer)
797bf215546Sopenharmony_ci{
798bf215546Sopenharmony_ci   assert(!cmd_buffer->ace_internal.cs);
799bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
800bf215546Sopenharmony_ci   struct radeon_cmdbuf *ace_cs = device->ws->cs_create(device->ws, AMD_IP_COMPUTE);
801bf215546Sopenharmony_ci
802bf215546Sopenharmony_ci   if (!ace_cs) {
803bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
804bf215546Sopenharmony_ci   }
805bf215546Sopenharmony_ci
806bf215546Sopenharmony_ci   return ace_cs;
807bf215546Sopenharmony_ci}
808bf215546Sopenharmony_ci
809bf215546Sopenharmony_cistatic VkResult
810bf215546Sopenharmony_ciradv_ace_internal_finalize(struct radv_cmd_buffer *cmd_buffer)
811bf215546Sopenharmony_ci{
812bf215546Sopenharmony_ci   assert(cmd_buffer->ace_internal.cs);
813bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
814bf215546Sopenharmony_ci   struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs;
815bf215546Sopenharmony_ci
816bf215546Sopenharmony_ci   /* Emit pending cache flush. */
817bf215546Sopenharmony_ci   radv_ace_internal_cache_flush(cmd_buffer);
818bf215546Sopenharmony_ci
819bf215546Sopenharmony_ci   /* Clear the ACE semaphore if it exists.
820bf215546Sopenharmony_ci    * This is necessary in case the same cmd buffer is submitted again in the future.
821bf215546Sopenharmony_ci    */
822bf215546Sopenharmony_ci   if (cmd_buffer->ace_internal.sem.va) {
823bf215546Sopenharmony_ci      struct radeon_cmdbuf *main_cs = cmd_buffer->cs;
824bf215546Sopenharmony_ci      uint64_t gfx2ace_va = cmd_buffer->ace_internal.sem.va;
825bf215546Sopenharmony_ci      uint64_t ace2gfx_va = cmd_buffer->ace_internal.sem.va + 4;
826bf215546Sopenharmony_ci
827bf215546Sopenharmony_ci      /* ACE: write 1 to the ACE->GFX semaphore. */
828bf215546Sopenharmony_ci      si_cs_emit_write_event_eop(ace_cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
829bf215546Sopenharmony_ci                                 true, V_028A90_BOTTOM_OF_PIPE_TS, 0, EOP_DST_SEL_MEM,
830bf215546Sopenharmony_ci                                 EOP_DATA_SEL_VALUE_32BIT, ace2gfx_va, 1,
831bf215546Sopenharmony_ci                                 cmd_buffer->gfx9_eop_bug_va);
832bf215546Sopenharmony_ci
833bf215546Sopenharmony_ci      /* Wait for ACE to finish, otherwise we may risk writing 0 to the semaphore
834bf215546Sopenharmony_ci       * when ACE is still waiting for it. This may not happen in practice, but
835bf215546Sopenharmony_ci       * better safe than sorry.
836bf215546Sopenharmony_ci       */
837bf215546Sopenharmony_ci      radv_cp_wait_mem(main_cs, WAIT_REG_MEM_GREATER_OR_EQUAL, ace2gfx_va, 1, 0xffffffff);
838bf215546Sopenharmony_ci
839bf215546Sopenharmony_ci      /* GFX: clear GFX->ACE and ACE->GFX semaphores. */
840bf215546Sopenharmony_ci      radv_emit_clear_data(cmd_buffer, V_370_ME, gfx2ace_va, 8);
841bf215546Sopenharmony_ci   }
842bf215546Sopenharmony_ci
843bf215546Sopenharmony_ci   device->ws->cs_add_buffers(ace_cs, cmd_buffer->cs);
844bf215546Sopenharmony_ci   return device->ws->cs_finalize(ace_cs);
845bf215546Sopenharmony_ci}
846bf215546Sopenharmony_ci
847bf215546Sopenharmony_cistatic void
848bf215546Sopenharmony_ciradv_cmd_buffer_after_draw(struct radv_cmd_buffer *cmd_buffer, enum radv_cmd_flush_bits flags)
849bf215546Sopenharmony_ci{
850bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->thread_trace.bo)) {
851bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
852bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_THREAD_TRACE_MARKER) | EVENT_INDEX(0));
853bf215546Sopenharmony_ci   }
854bf215546Sopenharmony_ci
855bf215546Sopenharmony_ci   if (cmd_buffer->device->instance->debug_flags & RADV_DEBUG_SYNC_SHADERS) {
856bf215546Sopenharmony_ci      enum rgp_flush_bits sqtt_flush_bits = 0;
857bf215546Sopenharmony_ci      assert(flags & (RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH));
858bf215546Sopenharmony_ci
859bf215546Sopenharmony_ci      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4);
860bf215546Sopenharmony_ci
861bf215546Sopenharmony_ci      /* Force wait for graphics or compute engines to be idle. */
862bf215546Sopenharmony_ci      si_cs_emit_cache_flush(cmd_buffer->cs,
863bf215546Sopenharmony_ci                             cmd_buffer->device->physical_device->rad_info.gfx_level,
864bf215546Sopenharmony_ci                             &cmd_buffer->gfx9_fence_idx, cmd_buffer->gfx9_fence_va,
865bf215546Sopenharmony_ci                             radv_cmd_buffer_uses_mec(cmd_buffer), flags, &sqtt_flush_bits,
866bf215546Sopenharmony_ci                             cmd_buffer->gfx9_eop_bug_va);
867bf215546Sopenharmony_ci
868bf215546Sopenharmony_ci      if (cmd_buffer->state.graphics_pipeline && (flags & RADV_CMD_FLAG_PS_PARTIAL_FLUSH) &&
869bf215546Sopenharmony_ci          radv_pipeline_has_stage(cmd_buffer->state.graphics_pipeline, MESA_SHADER_TASK)) {
870bf215546Sopenharmony_ci         /* Force wait for compute engines to be idle on the internal cmdbuf. */
871bf215546Sopenharmony_ci         si_cs_emit_cache_flush(cmd_buffer->ace_internal.cs,
872bf215546Sopenharmony_ci                                cmd_buffer->device->physical_device->rad_info.gfx_level, NULL, 0,
873bf215546Sopenharmony_ci                                true, RADV_CMD_FLAG_CS_PARTIAL_FLUSH, &sqtt_flush_bits, 0);
874bf215546Sopenharmony_ci      }
875bf215546Sopenharmony_ci   }
876bf215546Sopenharmony_ci
877bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
878bf215546Sopenharmony_ci      radv_cmd_buffer_trace_emit(cmd_buffer);
879bf215546Sopenharmony_ci}
880bf215546Sopenharmony_ci
881bf215546Sopenharmony_cistatic void
882bf215546Sopenharmony_ciradv_save_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_pipeline *pipeline)
883bf215546Sopenharmony_ci{
884bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
885bf215546Sopenharmony_ci   enum amd_ip_type ring;
886bf215546Sopenharmony_ci   uint32_t data[2];
887bf215546Sopenharmony_ci   uint64_t va;
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci   va = radv_buffer_get_va(device->trace_bo);
890bf215546Sopenharmony_ci
891bf215546Sopenharmony_ci   ring = radv_queue_family_to_ring(device->physical_device, cmd_buffer->qf);
892bf215546Sopenharmony_ci
893bf215546Sopenharmony_ci   switch (ring) {
894bf215546Sopenharmony_ci   case AMD_IP_GFX:
895bf215546Sopenharmony_ci      va += 8;
896bf215546Sopenharmony_ci      break;
897bf215546Sopenharmony_ci   case AMD_IP_COMPUTE:
898bf215546Sopenharmony_ci      va += 16;
899bf215546Sopenharmony_ci      break;
900bf215546Sopenharmony_ci   default:
901bf215546Sopenharmony_ci      assert(!"invalid IP type");
902bf215546Sopenharmony_ci   }
903bf215546Sopenharmony_ci
904bf215546Sopenharmony_ci   uint64_t pipeline_address = (uintptr_t)pipeline;
905bf215546Sopenharmony_ci   data[0] = pipeline_address;
906bf215546Sopenharmony_ci   data[1] = pipeline_address >> 32;
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
909bf215546Sopenharmony_ci}
910bf215546Sopenharmony_ci
911bf215546Sopenharmony_cistatic void
912bf215546Sopenharmony_ciradv_save_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, uint64_t vb_ptr)
913bf215546Sopenharmony_ci{
914bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
915bf215546Sopenharmony_ci   uint32_t data[2];
916bf215546Sopenharmony_ci   uint64_t va;
917bf215546Sopenharmony_ci
918bf215546Sopenharmony_ci   va = radv_buffer_get_va(device->trace_bo);
919bf215546Sopenharmony_ci   va += 24;
920bf215546Sopenharmony_ci
921bf215546Sopenharmony_ci   data[0] = vb_ptr;
922bf215546Sopenharmony_ci   data[1] = vb_ptr >> 32;
923bf215546Sopenharmony_ci
924bf215546Sopenharmony_ci   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
925bf215546Sopenharmony_ci}
926bf215546Sopenharmony_ci
927bf215546Sopenharmony_cistatic void
928bf215546Sopenharmony_ciradv_save_vs_prolog(struct radv_cmd_buffer *cmd_buffer, const struct radv_shader_part *prolog)
929bf215546Sopenharmony_ci{
930bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
931bf215546Sopenharmony_ci   uint32_t data[2];
932bf215546Sopenharmony_ci   uint64_t va;
933bf215546Sopenharmony_ci
934bf215546Sopenharmony_ci   va = radv_buffer_get_va(device->trace_bo);
935bf215546Sopenharmony_ci   va += 32;
936bf215546Sopenharmony_ci
937bf215546Sopenharmony_ci   uint64_t prolog_address = (uintptr_t)prolog;
938bf215546Sopenharmony_ci   data[0] = prolog_address;
939bf215546Sopenharmony_ci   data[1] = prolog_address >> 32;
940bf215546Sopenharmony_ci
941bf215546Sopenharmony_ci   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, 2, data);
942bf215546Sopenharmony_ci}
943bf215546Sopenharmony_ci
944bf215546Sopenharmony_civoid
945bf215546Sopenharmony_ciradv_set_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
946bf215546Sopenharmony_ci                        struct radv_descriptor_set *set, unsigned idx)
947bf215546Sopenharmony_ci{
948bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
949bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
950bf215546Sopenharmony_ci
951bf215546Sopenharmony_ci   descriptors_state->sets[idx] = set;
952bf215546Sopenharmony_ci
953bf215546Sopenharmony_ci   descriptors_state->valid |= (1u << idx); /* active descriptors */
954bf215546Sopenharmony_ci   descriptors_state->dirty |= (1u << idx);
955bf215546Sopenharmony_ci}
956bf215546Sopenharmony_ci
957bf215546Sopenharmony_cistatic void
958bf215546Sopenharmony_ciradv_save_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
959bf215546Sopenharmony_ci{
960bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
961bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
962bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
963bf215546Sopenharmony_ci   uint32_t data[MAX_SETS * 2] = {0};
964bf215546Sopenharmony_ci   uint64_t va;
965bf215546Sopenharmony_ci   va = radv_buffer_get_va(device->trace_bo) + 40;
966bf215546Sopenharmony_ci
967bf215546Sopenharmony_ci   u_foreach_bit(i, descriptors_state->valid)
968bf215546Sopenharmony_ci   {
969bf215546Sopenharmony_ci      struct radv_descriptor_set *set = descriptors_state->sets[i];
970bf215546Sopenharmony_ci      data[i * 2] = (uint64_t)(uintptr_t)set;
971bf215546Sopenharmony_ci      data[i * 2 + 1] = (uint64_t)(uintptr_t)set >> 32;
972bf215546Sopenharmony_ci   }
973bf215546Sopenharmony_ci
974bf215546Sopenharmony_ci   radv_emit_write_data_packet(cmd_buffer, V_370_ME, va, MAX_SETS * 2, data);
975bf215546Sopenharmony_ci}
976bf215546Sopenharmony_ci
977bf215546Sopenharmony_cistruct radv_userdata_info *
978bf215546Sopenharmony_ciradv_lookup_user_sgpr(struct radv_pipeline *pipeline, gl_shader_stage stage, int idx)
979bf215546Sopenharmony_ci{
980bf215546Sopenharmony_ci   struct radv_shader *shader = radv_get_shader(pipeline, stage);
981bf215546Sopenharmony_ci   return &shader->info.user_sgprs_locs.shader_data[idx];
982bf215546Sopenharmony_ci}
983bf215546Sopenharmony_ci
984bf215546Sopenharmony_cistatic void
985bf215546Sopenharmony_ciradv_emit_userdata_address(struct radv_device *device, struct radeon_cmdbuf *cs,
986bf215546Sopenharmony_ci                           struct radv_pipeline *pipeline, gl_shader_stage stage, int idx,
987bf215546Sopenharmony_ci                           uint64_t va)
988bf215546Sopenharmony_ci{
989bf215546Sopenharmony_ci   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
990bf215546Sopenharmony_ci   uint32_t base_reg = pipeline->user_data_0[stage];
991bf215546Sopenharmony_ci   if (loc->sgpr_idx == -1)
992bf215546Sopenharmony_ci      return;
993bf215546Sopenharmony_ci
994bf215546Sopenharmony_ci   assert(loc->num_sgprs == 1);
995bf215546Sopenharmony_ci
996bf215546Sopenharmony_ci   radv_emit_shader_pointer(device, cs, base_reg + loc->sgpr_idx * 4, va, false);
997bf215546Sopenharmony_ci}
998bf215546Sopenharmony_ci
999bf215546Sopenharmony_cistatic void
1000bf215546Sopenharmony_ciradv_emit_descriptor_pointers(struct radv_device *device, struct radeon_cmdbuf *cs,
1001bf215546Sopenharmony_ci                              struct radv_pipeline *pipeline,
1002bf215546Sopenharmony_ci                              struct radv_descriptor_state *descriptors_state,
1003bf215546Sopenharmony_ci                              gl_shader_stage stage)
1004bf215546Sopenharmony_ci{
1005bf215546Sopenharmony_ci   uint32_t sh_base = pipeline->user_data_0[stage];
1006bf215546Sopenharmony_ci   struct radv_userdata_locations *locs = &pipeline->shaders[stage]->info.user_sgprs_locs;
1007bf215546Sopenharmony_ci   unsigned mask = locs->descriptor_sets_enabled;
1008bf215546Sopenharmony_ci
1009bf215546Sopenharmony_ci   mask &= descriptors_state->dirty & descriptors_state->valid;
1010bf215546Sopenharmony_ci
1011bf215546Sopenharmony_ci   while (mask) {
1012bf215546Sopenharmony_ci      int start, count;
1013bf215546Sopenharmony_ci
1014bf215546Sopenharmony_ci      u_bit_scan_consecutive_range(&mask, &start, &count);
1015bf215546Sopenharmony_ci
1016bf215546Sopenharmony_ci      struct radv_userdata_info *loc = &locs->descriptor_sets[start];
1017bf215546Sopenharmony_ci      unsigned sh_offset = sh_base + loc->sgpr_idx * 4;
1018bf215546Sopenharmony_ci
1019bf215546Sopenharmony_ci      radv_emit_shader_pointer_head(cs, sh_offset, count, true);
1020bf215546Sopenharmony_ci      for (int i = 0; i < count; i++) {
1021bf215546Sopenharmony_ci         struct radv_descriptor_set *set = descriptors_state->sets[start + i];
1022bf215546Sopenharmony_ci
1023bf215546Sopenharmony_ci         radv_emit_shader_pointer_body(device, cs, set->header.va, true);
1024bf215546Sopenharmony_ci      }
1025bf215546Sopenharmony_ci   }
1026bf215546Sopenharmony_ci}
1027bf215546Sopenharmony_ci
1028bf215546Sopenharmony_ci/**
1029bf215546Sopenharmony_ci * Convert the user sample locations to hardware sample locations (the values
1030bf215546Sopenharmony_ci * that will be emitted by PA_SC_AA_SAMPLE_LOCS_PIXEL_*).
1031bf215546Sopenharmony_ci */
1032bf215546Sopenharmony_cistatic void
1033bf215546Sopenharmony_ciradv_convert_user_sample_locs(struct radv_sample_locations_state *state, uint32_t x, uint32_t y,
1034bf215546Sopenharmony_ci                              VkOffset2D *sample_locs)
1035bf215546Sopenharmony_ci{
1036bf215546Sopenharmony_ci   uint32_t x_offset = x % state->grid_size.width;
1037bf215546Sopenharmony_ci   uint32_t y_offset = y % state->grid_size.height;
1038bf215546Sopenharmony_ci   uint32_t num_samples = (uint32_t)state->per_pixel;
1039bf215546Sopenharmony_ci   VkSampleLocationEXT *user_locs;
1040bf215546Sopenharmony_ci   uint32_t pixel_offset;
1041bf215546Sopenharmony_ci
1042bf215546Sopenharmony_ci   pixel_offset = (x_offset + y_offset * state->grid_size.width) * num_samples;
1043bf215546Sopenharmony_ci
1044bf215546Sopenharmony_ci   assert(pixel_offset <= MAX_SAMPLE_LOCATIONS);
1045bf215546Sopenharmony_ci   user_locs = &state->locations[pixel_offset];
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci   for (uint32_t i = 0; i < num_samples; i++) {
1048bf215546Sopenharmony_ci      float shifted_pos_x = user_locs[i].x - 0.5;
1049bf215546Sopenharmony_ci      float shifted_pos_y = user_locs[i].y - 0.5;
1050bf215546Sopenharmony_ci
1051bf215546Sopenharmony_ci      int32_t scaled_pos_x = floorf(shifted_pos_x * 16);
1052bf215546Sopenharmony_ci      int32_t scaled_pos_y = floorf(shifted_pos_y * 16);
1053bf215546Sopenharmony_ci
1054bf215546Sopenharmony_ci      sample_locs[i].x = CLAMP(scaled_pos_x, -8, 7);
1055bf215546Sopenharmony_ci      sample_locs[i].y = CLAMP(scaled_pos_y, -8, 7);
1056bf215546Sopenharmony_ci   }
1057bf215546Sopenharmony_ci}
1058bf215546Sopenharmony_ci
1059bf215546Sopenharmony_ci/**
1060bf215546Sopenharmony_ci * Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask based on hardware sample
1061bf215546Sopenharmony_ci * locations.
1062bf215546Sopenharmony_ci */
1063bf215546Sopenharmony_cistatic void
1064bf215546Sopenharmony_ciradv_compute_sample_locs_pixel(uint32_t num_samples, VkOffset2D *sample_locs,
1065bf215546Sopenharmony_ci                               uint32_t *sample_locs_pixel)
1066bf215546Sopenharmony_ci{
1067bf215546Sopenharmony_ci   for (uint32_t i = 0; i < num_samples; i++) {
1068bf215546Sopenharmony_ci      uint32_t sample_reg_idx = i / 4;
1069bf215546Sopenharmony_ci      uint32_t sample_loc_idx = i % 4;
1070bf215546Sopenharmony_ci      int32_t pos_x = sample_locs[i].x;
1071bf215546Sopenharmony_ci      int32_t pos_y = sample_locs[i].y;
1072bf215546Sopenharmony_ci
1073bf215546Sopenharmony_ci      uint32_t shift_x = 8 * sample_loc_idx;
1074bf215546Sopenharmony_ci      uint32_t shift_y = shift_x + 4;
1075bf215546Sopenharmony_ci
1076bf215546Sopenharmony_ci      sample_locs_pixel[sample_reg_idx] |= (pos_x & 0xf) << shift_x;
1077bf215546Sopenharmony_ci      sample_locs_pixel[sample_reg_idx] |= (pos_y & 0xf) << shift_y;
1078bf215546Sopenharmony_ci   }
1079bf215546Sopenharmony_ci}
1080bf215546Sopenharmony_ci
1081bf215546Sopenharmony_ci/**
1082bf215546Sopenharmony_ci * Compute the PA_SC_CENTROID_PRIORITY_* mask based on the top left hardware
1083bf215546Sopenharmony_ci * sample locations.
1084bf215546Sopenharmony_ci */
1085bf215546Sopenharmony_cistatic uint64_t
1086bf215546Sopenharmony_ciradv_compute_centroid_priority(struct radv_cmd_buffer *cmd_buffer, VkOffset2D *sample_locs,
1087bf215546Sopenharmony_ci                               uint32_t num_samples)
1088bf215546Sopenharmony_ci{
1089bf215546Sopenharmony_ci   uint32_t *centroid_priorities = alloca(num_samples * sizeof(*centroid_priorities));
1090bf215546Sopenharmony_ci   uint32_t sample_mask = num_samples - 1;
1091bf215546Sopenharmony_ci   uint32_t *distances = alloca(num_samples * sizeof(*distances));
1092bf215546Sopenharmony_ci   uint64_t centroid_priority = 0;
1093bf215546Sopenharmony_ci
1094bf215546Sopenharmony_ci   /* Compute the distances from center for each sample. */
1095bf215546Sopenharmony_ci   for (int i = 0; i < num_samples; i++) {
1096bf215546Sopenharmony_ci      distances[i] = (sample_locs[i].x * sample_locs[i].x) + (sample_locs[i].y * sample_locs[i].y);
1097bf215546Sopenharmony_ci   }
1098bf215546Sopenharmony_ci
1099bf215546Sopenharmony_ci   /* Compute the centroid priorities by looking at the distances array. */
1100bf215546Sopenharmony_ci   for (int i = 0; i < num_samples; i++) {
1101bf215546Sopenharmony_ci      uint32_t min_idx = 0;
1102bf215546Sopenharmony_ci
1103bf215546Sopenharmony_ci      for (int j = 1; j < num_samples; j++) {
1104bf215546Sopenharmony_ci         if (distances[j] < distances[min_idx])
1105bf215546Sopenharmony_ci            min_idx = j;
1106bf215546Sopenharmony_ci      }
1107bf215546Sopenharmony_ci
1108bf215546Sopenharmony_ci      centroid_priorities[i] = min_idx;
1109bf215546Sopenharmony_ci      distances[min_idx] = 0xffffffff;
1110bf215546Sopenharmony_ci   }
1111bf215546Sopenharmony_ci
1112bf215546Sopenharmony_ci   /* Compute the final centroid priority. */
1113bf215546Sopenharmony_ci   for (int i = 0; i < 8; i++) {
1114bf215546Sopenharmony_ci      centroid_priority |= centroid_priorities[i & sample_mask] << (i * 4);
1115bf215546Sopenharmony_ci   }
1116bf215546Sopenharmony_ci
1117bf215546Sopenharmony_ci   return centroid_priority << 32 | centroid_priority;
1118bf215546Sopenharmony_ci}
1119bf215546Sopenharmony_ci
1120bf215546Sopenharmony_ci/**
1121bf215546Sopenharmony_ci * Emit the sample locations that are specified with VK_EXT_sample_locations.
1122bf215546Sopenharmony_ci */
1123bf215546Sopenharmony_cistatic void
1124bf215546Sopenharmony_ciradv_emit_sample_locations(struct radv_cmd_buffer *cmd_buffer)
1125bf215546Sopenharmony_ci{
1126bf215546Sopenharmony_ci   struct radv_sample_locations_state *sample_location = &cmd_buffer->state.dynamic.sample_location;
1127bf215546Sopenharmony_ci   uint32_t num_samples = (uint32_t)sample_location->per_pixel;
1128bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
1129bf215546Sopenharmony_ci   uint32_t sample_locs_pixel[4][2] = {0};
1130bf215546Sopenharmony_ci   VkOffset2D sample_locs[4][8]; /* 8 is the max. sample count supported */
1131bf215546Sopenharmony_ci   uint32_t max_sample_dist = 0;
1132bf215546Sopenharmony_ci   uint64_t centroid_priority;
1133bf215546Sopenharmony_ci
1134bf215546Sopenharmony_ci   if (!cmd_buffer->state.dynamic.sample_location.count)
1135bf215546Sopenharmony_ci      return;
1136bf215546Sopenharmony_ci
1137bf215546Sopenharmony_ci   /* Convert the user sample locations to hardware sample locations. */
1138bf215546Sopenharmony_ci   radv_convert_user_sample_locs(sample_location, 0, 0, sample_locs[0]);
1139bf215546Sopenharmony_ci   radv_convert_user_sample_locs(sample_location, 1, 0, sample_locs[1]);
1140bf215546Sopenharmony_ci   radv_convert_user_sample_locs(sample_location, 0, 1, sample_locs[2]);
1141bf215546Sopenharmony_ci   radv_convert_user_sample_locs(sample_location, 1, 1, sample_locs[3]);
1142bf215546Sopenharmony_ci
1143bf215546Sopenharmony_ci   /* Compute the PA_SC_AA_SAMPLE_LOCS_PIXEL_* mask. */
1144bf215546Sopenharmony_ci   for (uint32_t i = 0; i < 4; i++) {
1145bf215546Sopenharmony_ci      radv_compute_sample_locs_pixel(num_samples, sample_locs[i], sample_locs_pixel[i]);
1146bf215546Sopenharmony_ci   }
1147bf215546Sopenharmony_ci
1148bf215546Sopenharmony_ci   /* Compute the PA_SC_CENTROID_PRIORITY_* mask. */
1149bf215546Sopenharmony_ci   centroid_priority = radv_compute_centroid_priority(cmd_buffer, sample_locs[0], num_samples);
1150bf215546Sopenharmony_ci
1151bf215546Sopenharmony_ci   /* Compute the maximum sample distance from the specified locations. */
1152bf215546Sopenharmony_ci   for (unsigned i = 0; i < 4; ++i) {
1153bf215546Sopenharmony_ci      for (uint32_t j = 0; j < num_samples; j++) {
1154bf215546Sopenharmony_ci         VkOffset2D offset = sample_locs[i][j];
1155bf215546Sopenharmony_ci         max_sample_dist = MAX2(max_sample_dist, MAX2(abs(offset.x), abs(offset.y)));
1156bf215546Sopenharmony_ci      }
1157bf215546Sopenharmony_ci   }
1158bf215546Sopenharmony_ci
1159bf215546Sopenharmony_ci   /* Emit the specified user sample locations. */
1160bf215546Sopenharmony_ci   switch (num_samples) {
1161bf215546Sopenharmony_ci   case 2:
1162bf215546Sopenharmony_ci   case 4:
1163bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
1164bf215546Sopenharmony_ci                             sample_locs_pixel[0][0]);
1165bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
1166bf215546Sopenharmony_ci                             sample_locs_pixel[1][0]);
1167bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
1168bf215546Sopenharmony_ci                             sample_locs_pixel[2][0]);
1169bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
1170bf215546Sopenharmony_ci                             sample_locs_pixel[3][0]);
1171bf215546Sopenharmony_ci      break;
1172bf215546Sopenharmony_ci   case 8:
1173bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028BF8_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_0,
1174bf215546Sopenharmony_ci                             sample_locs_pixel[0][0]);
1175bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C08_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_0,
1176bf215546Sopenharmony_ci                             sample_locs_pixel[1][0]);
1177bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C18_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_0,
1178bf215546Sopenharmony_ci                             sample_locs_pixel[2][0]);
1179bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C28_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_0,
1180bf215546Sopenharmony_ci                             sample_locs_pixel[3][0]);
1181bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028BFC_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y0_1,
1182bf215546Sopenharmony_ci                             sample_locs_pixel[0][1]);
1183bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C0C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y0_1,
1184bf215546Sopenharmony_ci                             sample_locs_pixel[1][1]);
1185bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C1C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X0Y1_1,
1186bf215546Sopenharmony_ci                             sample_locs_pixel[2][1]);
1187bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028C2C_PA_SC_AA_SAMPLE_LOCS_PIXEL_X1Y1_1,
1188bf215546Sopenharmony_ci                             sample_locs_pixel[3][1]);
1189bf215546Sopenharmony_ci      break;
1190bf215546Sopenharmony_ci   default:
1191bf215546Sopenharmony_ci      unreachable("invalid number of samples");
1192bf215546Sopenharmony_ci   }
1193bf215546Sopenharmony_ci
1194bf215546Sopenharmony_ci   /* Emit the maximum sample distance and the centroid priority. */
1195bf215546Sopenharmony_ci   radeon_set_context_reg_rmw(cs, R_028BE0_PA_SC_AA_CONFIG,
1196bf215546Sopenharmony_ci                              S_028BE0_MAX_SAMPLE_DIST(max_sample_dist), ~C_028BE0_MAX_SAMPLE_DIST);
1197bf215546Sopenharmony_ci
1198bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cs, R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
1199bf215546Sopenharmony_ci   radeon_emit(cs, centroid_priority);
1200bf215546Sopenharmony_ci   radeon_emit(cs, centroid_priority >> 32);
1201bf215546Sopenharmony_ci
1202bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
1203bf215546Sopenharmony_ci}
1204bf215546Sopenharmony_ci
1205bf215546Sopenharmony_cistatic void
1206bf215546Sopenharmony_ciradv_emit_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs,
1207bf215546Sopenharmony_ci                             struct radv_pipeline *pipeline, gl_shader_stage stage, int idx,
1208bf215546Sopenharmony_ci                             uint32_t *values)
1209bf215546Sopenharmony_ci{
1210bf215546Sopenharmony_ci   struct radv_userdata_info *loc = radv_lookup_user_sgpr(pipeline, stage, idx);
1211bf215546Sopenharmony_ci   uint32_t base_reg = pipeline->user_data_0[stage];
1212bf215546Sopenharmony_ci   if (loc->sgpr_idx == -1)
1213bf215546Sopenharmony_ci      return;
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_ci   radeon_check_space(device->ws, cs, 2 + loc->num_sgprs);
1216bf215546Sopenharmony_ci
1217bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, base_reg + loc->sgpr_idx * 4, loc->num_sgprs);
1218bf215546Sopenharmony_ci   radeon_emit_array(cs, values, loc->num_sgprs);
1219bf215546Sopenharmony_ci}
1220bf215546Sopenharmony_ci
1221bf215546Sopenharmony_cistatic void
1222bf215546Sopenharmony_ciradv_update_multisample_state(struct radv_cmd_buffer *cmd_buffer,
1223bf215546Sopenharmony_ci                              struct radv_graphics_pipeline *pipeline)
1224bf215546Sopenharmony_ci{
1225bf215546Sopenharmony_ci   int num_samples = pipeline->ms.num_samples;
1226bf215546Sopenharmony_ci   struct radv_graphics_pipeline *old_pipeline = cmd_buffer->state.emitted_graphics_pipeline;
1227bf215546Sopenharmony_ci
1228bf215546Sopenharmony_ci   if (pipeline->base.shaders[MESA_SHADER_FRAGMENT]->info.ps.needs_sample_positions)
1229bf215546Sopenharmony_ci      cmd_buffer->sample_positions_needed = true;
1230bf215546Sopenharmony_ci
1231bf215546Sopenharmony_ci   if (old_pipeline && num_samples == old_pipeline->ms.num_samples)
1232bf215546Sopenharmony_ci      return;
1233bf215546Sopenharmony_ci
1234bf215546Sopenharmony_ci   radv_emit_default_sample_locations(cmd_buffer->cs, num_samples);
1235bf215546Sopenharmony_ci
1236bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
1237bf215546Sopenharmony_ci}
1238bf215546Sopenharmony_ci
1239bf215546Sopenharmony_cistatic void
1240bf215546Sopenharmony_ciradv_update_binning_state(struct radv_cmd_buffer *cmd_buffer,
1241bf215546Sopenharmony_ci                          struct radv_graphics_pipeline *pipeline)
1242bf215546Sopenharmony_ci{
1243bf215546Sopenharmony_ci   const struct radv_graphics_pipeline *old_pipeline = cmd_buffer->state.emitted_graphics_pipeline;
1244bf215546Sopenharmony_ci
1245bf215546Sopenharmony_ci   if (pipeline->base.device->physical_device->rad_info.gfx_level < GFX9)
1246bf215546Sopenharmony_ci      return;
1247bf215546Sopenharmony_ci
1248bf215546Sopenharmony_ci   if (old_pipeline &&
1249bf215546Sopenharmony_ci       old_pipeline->binning.pa_sc_binner_cntl_0 ==
1250bf215546Sopenharmony_ci          pipeline->binning.pa_sc_binner_cntl_0)
1251bf215546Sopenharmony_ci      return;
1252bf215546Sopenharmony_ci
1253bf215546Sopenharmony_ci   bool binning_flush = false;
1254bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA12 ||
1255bf215546Sopenharmony_ci       cmd_buffer->device->physical_device->rad_info.family == CHIP_VEGA20 ||
1256bf215546Sopenharmony_ci       cmd_buffer->device->physical_device->rad_info.family == CHIP_RAVEN2 ||
1257bf215546Sopenharmony_ci       cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
1258bf215546Sopenharmony_ci      binning_flush = !old_pipeline ||
1259bf215546Sopenharmony_ci                      G_028C44_BINNING_MODE(old_pipeline->binning.pa_sc_binner_cntl_0) !=
1260bf215546Sopenharmony_ci                         G_028C44_BINNING_MODE(pipeline->binning.pa_sc_binner_cntl_0);
1261bf215546Sopenharmony_ci   }
1262bf215546Sopenharmony_ci
1263bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028C44_PA_SC_BINNER_CNTL_0,
1264bf215546Sopenharmony_ci                          pipeline->binning.pa_sc_binner_cntl_0 |
1265bf215546Sopenharmony_ci                             S_028C44_FLUSH_ON_BINNING_TRANSITION(!!binning_flush));
1266bf215546Sopenharmony_ci
1267bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
1268bf215546Sopenharmony_ci}
1269bf215546Sopenharmony_ci
1270bf215546Sopenharmony_cistatic void
1271bf215546Sopenharmony_ciradv_emit_shader_prefetch(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *shader)
1272bf215546Sopenharmony_ci{
1273bf215546Sopenharmony_ci   uint64_t va;
1274bf215546Sopenharmony_ci
1275bf215546Sopenharmony_ci   if (!shader)
1276bf215546Sopenharmony_ci      return;
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ci   va = radv_shader_get_va(shader);
1279bf215546Sopenharmony_ci
1280bf215546Sopenharmony_ci   si_cp_dma_prefetch(cmd_buffer, va, shader->code_size);
1281bf215546Sopenharmony_ci}
1282bf215546Sopenharmony_ci
1283bf215546Sopenharmony_cistatic void
1284bf215546Sopenharmony_ciradv_emit_prefetch_L2(struct radv_cmd_buffer *cmd_buffer,
1285bf215546Sopenharmony_ci                      struct radv_graphics_pipeline *pipeline, bool first_stage_only)
1286bf215546Sopenharmony_ci{
1287bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
1288bf215546Sopenharmony_ci   uint32_t mask = state->prefetch_L2_mask;
1289bf215546Sopenharmony_ci
1290bf215546Sopenharmony_ci   /* Fast prefetch path for starting draws as soon as possible. */
1291bf215546Sopenharmony_ci   if (first_stage_only)
1292bf215546Sopenharmony_ci      mask &= RADV_PREFETCH_VS | RADV_PREFETCH_VBO_DESCRIPTORS | RADV_PREFETCH_MS;
1293bf215546Sopenharmony_ci
1294bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_VS)
1295bf215546Sopenharmony_ci      radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_VERTEX]);
1296bf215546Sopenharmony_ci
1297bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_MS)
1298bf215546Sopenharmony_ci      radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_MESH]);
1299bf215546Sopenharmony_ci
1300bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_VBO_DESCRIPTORS)
1301bf215546Sopenharmony_ci      si_cp_dma_prefetch(cmd_buffer, state->vb_va, pipeline->vb_desc_alloc_size);
1302bf215546Sopenharmony_ci
1303bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_TCS)
1304bf215546Sopenharmony_ci      radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_CTRL]);
1305bf215546Sopenharmony_ci
1306bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_TES)
1307bf215546Sopenharmony_ci      radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_TESS_EVAL]);
1308bf215546Sopenharmony_ci
1309bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_GS) {
1310bf215546Sopenharmony_ci      radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_GEOMETRY]);
1311bf215546Sopenharmony_ci      if (radv_pipeline_has_gs_copy_shader(&pipeline->base))
1312bf215546Sopenharmony_ci         radv_emit_shader_prefetch(cmd_buffer, pipeline->base.gs_copy_shader);
1313bf215546Sopenharmony_ci   }
1314bf215546Sopenharmony_ci
1315bf215546Sopenharmony_ci   if (mask & RADV_PREFETCH_PS)
1316bf215546Sopenharmony_ci      radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_FRAGMENT]);
1317bf215546Sopenharmony_ci
1318bf215546Sopenharmony_ci   state->prefetch_L2_mask &= ~mask;
1319bf215546Sopenharmony_ci}
1320bf215546Sopenharmony_ci
1321bf215546Sopenharmony_cistatic void
1322bf215546Sopenharmony_ciradv_emit_rbplus_state(struct radv_cmd_buffer *cmd_buffer)
1323bf215546Sopenharmony_ci{
1324bf215546Sopenharmony_ci   if (!cmd_buffer->device->physical_device->rad_info.rbplus_allowed)
1325bf215546Sopenharmony_ci      return;
1326bf215546Sopenharmony_ci
1327bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
1328bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1329bf215546Sopenharmony_ci
1330bf215546Sopenharmony_ci   unsigned sx_ps_downconvert = 0;
1331bf215546Sopenharmony_ci   unsigned sx_blend_opt_epsilon = 0;
1332bf215546Sopenharmony_ci   unsigned sx_blend_opt_control = 0;
1333bf215546Sopenharmony_ci
1334bf215546Sopenharmony_ci   for (unsigned i = 0; i < subpass->color_count; ++i) {
1335bf215546Sopenharmony_ci      unsigned format, swap;
1336bf215546Sopenharmony_ci      bool has_alpha, has_rgb;
1337bf215546Sopenharmony_ci      if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
1338bf215546Sopenharmony_ci         /* We don't set the DISABLE bits, because the HW can't have holes,
1339bf215546Sopenharmony_ci          * so the SPI color format is set to 32-bit 1-component. */
1340bf215546Sopenharmony_ci         sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
1341bf215546Sopenharmony_ci         continue;
1342bf215546Sopenharmony_ci      }
1343bf215546Sopenharmony_ci
1344bf215546Sopenharmony_ci      int idx = subpass->color_attachments[i].attachment;
1345bf215546Sopenharmony_ci      if (cmd_buffer->state.attachments) {
1346bf215546Sopenharmony_ci         struct radv_color_buffer_info *cb = &cmd_buffer->state.attachments[idx].cb;
1347bf215546Sopenharmony_ci
1348bf215546Sopenharmony_ci         format = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11
1349bf215546Sopenharmony_ci                     ? G_028C70_FORMAT_GFX11(cb->cb_color_info)
1350bf215546Sopenharmony_ci                     : G_028C70_FORMAT_GFX6(cb->cb_color_info);
1351bf215546Sopenharmony_ci         swap = G_028C70_COMP_SWAP(cb->cb_color_info);
1352bf215546Sopenharmony_ci         has_alpha = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11
1353bf215546Sopenharmony_ci                        ? !G_028C74_FORCE_DST_ALPHA_1_GFX11(cb->cb_color_attrib)
1354bf215546Sopenharmony_ci                        : !G_028C74_FORCE_DST_ALPHA_1_GFX6(cb->cb_color_attrib);
1355bf215546Sopenharmony_ci      } else {
1356bf215546Sopenharmony_ci         VkFormat fmt = cmd_buffer->state.pass->attachments[idx].format;
1357bf215546Sopenharmony_ci         format = radv_translate_colorformat(fmt);
1358bf215546Sopenharmony_ci         swap = radv_translate_colorswap(fmt, false);
1359bf215546Sopenharmony_ci         has_alpha = vk_format_description(fmt)->swizzle[3] != PIPE_SWIZZLE_1;
1360bf215546Sopenharmony_ci      }
1361bf215546Sopenharmony_ci
1362bf215546Sopenharmony_ci      uint32_t spi_format = (pipeline->col_format >> (i * 4)) & 0xf;
1363bf215546Sopenharmony_ci      uint32_t colormask = (pipeline->cb_target_mask >> (i * 4)) & 0xf;
1364bf215546Sopenharmony_ci
1365bf215546Sopenharmony_ci      if (format == V_028C70_COLOR_8 || format == V_028C70_COLOR_16 || format == V_028C70_COLOR_32)
1366bf215546Sopenharmony_ci         has_rgb = !has_alpha;
1367bf215546Sopenharmony_ci      else
1368bf215546Sopenharmony_ci         has_rgb = true;
1369bf215546Sopenharmony_ci
1370bf215546Sopenharmony_ci      /* Check the colormask and export format. */
1371bf215546Sopenharmony_ci      if (!(colormask & 0x7))
1372bf215546Sopenharmony_ci         has_rgb = false;
1373bf215546Sopenharmony_ci      if (!(colormask & 0x8))
1374bf215546Sopenharmony_ci         has_alpha = false;
1375bf215546Sopenharmony_ci
1376bf215546Sopenharmony_ci      if (spi_format == V_028714_SPI_SHADER_ZERO) {
1377bf215546Sopenharmony_ci         has_rgb = false;
1378bf215546Sopenharmony_ci         has_alpha = false;
1379bf215546Sopenharmony_ci      }
1380bf215546Sopenharmony_ci
1381bf215546Sopenharmony_ci      /* The HW doesn't quite blend correctly with rgb9e5 if we disable the alpha
1382bf215546Sopenharmony_ci       * optimization, even though it has no alpha. */
1383bf215546Sopenharmony_ci      if (has_rgb && format == V_028C70_COLOR_5_9_9_9)
1384bf215546Sopenharmony_ci         has_alpha = true;
1385bf215546Sopenharmony_ci
1386bf215546Sopenharmony_ci      /* Disable value checking for disabled channels. */
1387bf215546Sopenharmony_ci      if (!has_rgb)
1388bf215546Sopenharmony_ci         sx_blend_opt_control |= S_02875C_MRT0_COLOR_OPT_DISABLE(1) << (i * 4);
1389bf215546Sopenharmony_ci      if (!has_alpha)
1390bf215546Sopenharmony_ci         sx_blend_opt_control |= S_02875C_MRT0_ALPHA_OPT_DISABLE(1) << (i * 4);
1391bf215546Sopenharmony_ci
1392bf215546Sopenharmony_ci      /* Enable down-conversion for 32bpp and smaller formats. */
1393bf215546Sopenharmony_ci      switch (format) {
1394bf215546Sopenharmony_ci      case V_028C70_COLOR_8:
1395bf215546Sopenharmony_ci      case V_028C70_COLOR_8_8:
1396bf215546Sopenharmony_ci      case V_028C70_COLOR_8_8_8_8:
1397bf215546Sopenharmony_ci         /* For 1 and 2-channel formats, use the superset thereof. */
1398bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR ||
1399bf215546Sopenharmony_ci             spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
1400bf215546Sopenharmony_ci             spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
1401bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_8_8_8_8 << (i * 4);
1402bf215546Sopenharmony_ci            sx_blend_opt_epsilon |= V_028758_8BIT_FORMAT << (i * 4);
1403bf215546Sopenharmony_ci         }
1404bf215546Sopenharmony_ci         break;
1405bf215546Sopenharmony_ci
1406bf215546Sopenharmony_ci      case V_028C70_COLOR_5_6_5:
1407bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
1408bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_5_6_5 << (i * 4);
1409bf215546Sopenharmony_ci            sx_blend_opt_epsilon |= V_028758_6BIT_FORMAT << (i * 4);
1410bf215546Sopenharmony_ci         }
1411bf215546Sopenharmony_ci         break;
1412bf215546Sopenharmony_ci
1413bf215546Sopenharmony_ci      case V_028C70_COLOR_1_5_5_5:
1414bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
1415bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_1_5_5_5 << (i * 4);
1416bf215546Sopenharmony_ci            sx_blend_opt_epsilon |= V_028758_5BIT_FORMAT << (i * 4);
1417bf215546Sopenharmony_ci         }
1418bf215546Sopenharmony_ci         break;
1419bf215546Sopenharmony_ci
1420bf215546Sopenharmony_ci      case V_028C70_COLOR_4_4_4_4:
1421bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
1422bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_4_4_4_4 << (i * 4);
1423bf215546Sopenharmony_ci            sx_blend_opt_epsilon |= V_028758_4BIT_FORMAT << (i * 4);
1424bf215546Sopenharmony_ci         }
1425bf215546Sopenharmony_ci         break;
1426bf215546Sopenharmony_ci
1427bf215546Sopenharmony_ci      case V_028C70_COLOR_32:
1428bf215546Sopenharmony_ci         if (swap == V_028C70_SWAP_STD && spi_format == V_028714_SPI_SHADER_32_R)
1429bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_R << (i * 4);
1430bf215546Sopenharmony_ci         else if (swap == V_028C70_SWAP_ALT_REV && spi_format == V_028714_SPI_SHADER_32_AR)
1431bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_32_A << (i * 4);
1432bf215546Sopenharmony_ci         break;
1433bf215546Sopenharmony_ci
1434bf215546Sopenharmony_ci      case V_028C70_COLOR_16:
1435bf215546Sopenharmony_ci      case V_028C70_COLOR_16_16:
1436bf215546Sopenharmony_ci         /* For 1-channel formats, use the superset thereof. */
1437bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_UNORM16_ABGR ||
1438bf215546Sopenharmony_ci             spi_format == V_028714_SPI_SHADER_SNORM16_ABGR ||
1439bf215546Sopenharmony_ci             spi_format == V_028714_SPI_SHADER_UINT16_ABGR ||
1440bf215546Sopenharmony_ci             spi_format == V_028714_SPI_SHADER_SINT16_ABGR) {
1441bf215546Sopenharmony_ci            if (swap == V_028C70_SWAP_STD || swap == V_028C70_SWAP_STD_REV)
1442bf215546Sopenharmony_ci               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_GR << (i * 4);
1443bf215546Sopenharmony_ci            else
1444bf215546Sopenharmony_ci               sx_ps_downconvert |= V_028754_SX_RT_EXPORT_16_16_AR << (i * 4);
1445bf215546Sopenharmony_ci         }
1446bf215546Sopenharmony_ci         break;
1447bf215546Sopenharmony_ci
1448bf215546Sopenharmony_ci      case V_028C70_COLOR_10_11_11:
1449bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
1450bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_10_11_11 << (i * 4);
1451bf215546Sopenharmony_ci         break;
1452bf215546Sopenharmony_ci
1453bf215546Sopenharmony_ci      case V_028C70_COLOR_2_10_10_10:
1454bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR) {
1455bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_2_10_10_10 << (i * 4);
1456bf215546Sopenharmony_ci            sx_blend_opt_epsilon |= V_028758_10BIT_FORMAT << (i * 4);
1457bf215546Sopenharmony_ci         }
1458bf215546Sopenharmony_ci         break;
1459bf215546Sopenharmony_ci      case V_028C70_COLOR_5_9_9_9:
1460bf215546Sopenharmony_ci         if (spi_format == V_028714_SPI_SHADER_FP16_ABGR)
1461bf215546Sopenharmony_ci            sx_ps_downconvert |= V_028754_SX_RT_EXPORT_9_9_9_E5 << (i * 4);
1462bf215546Sopenharmony_ci         break;
1463bf215546Sopenharmony_ci      }
1464bf215546Sopenharmony_ci   }
1465bf215546Sopenharmony_ci
1466bf215546Sopenharmony_ci   /* Do not set the DISABLE bits for the unused attachments, as that
1467bf215546Sopenharmony_ci    * breaks dual source blending in SkQP and does not seem to improve
1468bf215546Sopenharmony_ci    * performance. */
1469bf215546Sopenharmony_ci
1470bf215546Sopenharmony_ci   if (sx_ps_downconvert == cmd_buffer->state.last_sx_ps_downconvert &&
1471bf215546Sopenharmony_ci       sx_blend_opt_epsilon == cmd_buffer->state.last_sx_blend_opt_epsilon &&
1472bf215546Sopenharmony_ci       sx_blend_opt_control == cmd_buffer->state.last_sx_blend_opt_control)
1473bf215546Sopenharmony_ci      return;
1474bf215546Sopenharmony_ci
1475bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_028754_SX_PS_DOWNCONVERT, 3);
1476bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, sx_ps_downconvert);
1477bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, sx_blend_opt_epsilon);
1478bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, sx_blend_opt_control);
1479bf215546Sopenharmony_ci
1480bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci   cmd_buffer->state.last_sx_ps_downconvert = sx_ps_downconvert;
1483bf215546Sopenharmony_ci   cmd_buffer->state.last_sx_blend_opt_epsilon = sx_blend_opt_epsilon;
1484bf215546Sopenharmony_ci   cmd_buffer->state.last_sx_blend_opt_control = sx_blend_opt_control;
1485bf215546Sopenharmony_ci}
1486bf215546Sopenharmony_ci
1487bf215546Sopenharmony_cistatic void
1488bf215546Sopenharmony_ciradv_emit_batch_break_on_new_ps(struct radv_cmd_buffer *cmd_buffer)
1489bf215546Sopenharmony_ci{
1490bf215546Sopenharmony_ci   if (!cmd_buffer->device->pbb_allowed)
1491bf215546Sopenharmony_ci      return;
1492bf215546Sopenharmony_ci
1493bf215546Sopenharmony_ci   struct radv_binning_settings settings =
1494bf215546Sopenharmony_ci      radv_get_binning_settings(cmd_buffer->device->physical_device);
1495bf215546Sopenharmony_ci   bool break_for_new_ps =
1496bf215546Sopenharmony_ci      (!cmd_buffer->state.emitted_graphics_pipeline ||
1497bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT] !=
1498bf215546Sopenharmony_ci          cmd_buffer->state.graphics_pipeline->base.shaders[MESA_SHADER_FRAGMENT]) &&
1499bf215546Sopenharmony_ci      (settings.context_states_per_bin > 1 || settings.persistent_states_per_bin > 1);
1500bf215546Sopenharmony_ci   bool break_for_new_cb_target_mask =
1501bf215546Sopenharmony_ci      (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE) &&
1502bf215546Sopenharmony_ci      settings.context_states_per_bin > 1;
1503bf215546Sopenharmony_ci
1504bf215546Sopenharmony_ci   if (!break_for_new_ps && !break_for_new_cb_target_mask)
1505bf215546Sopenharmony_ci      return;
1506bf215546Sopenharmony_ci
1507bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
1508bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, EVENT_TYPE(V_028A90_BREAK_BATCH) | EVENT_INDEX(0));
1509bf215546Sopenharmony_ci}
1510bf215546Sopenharmony_ci
1511bf215546Sopenharmony_cistatic void
1512bf215546Sopenharmony_ciradv_emit_graphics_pipeline(struct radv_cmd_buffer *cmd_buffer)
1513bf215546Sopenharmony_ci{
1514bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
1515bf215546Sopenharmony_ci
1516bf215546Sopenharmony_ci   if (cmd_buffer->state.emitted_graphics_pipeline == pipeline)
1517bf215546Sopenharmony_ci      return;
1518bf215546Sopenharmony_ci
1519bf215546Sopenharmony_ci   radv_update_multisample_state(cmd_buffer, pipeline);
1520bf215546Sopenharmony_ci   radv_update_binning_state(cmd_buffer, pipeline);
1521bf215546Sopenharmony_ci
1522bf215546Sopenharmony_ci   cmd_buffer->scratch_size_per_wave_needed =
1523bf215546Sopenharmony_ci      MAX2(cmd_buffer->scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave);
1524bf215546Sopenharmony_ci   cmd_buffer->scratch_waves_wanted = MAX2(cmd_buffer->scratch_waves_wanted, pipeline->base.max_waves);
1525bf215546Sopenharmony_ci
1526bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1527bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->negative_one_to_one != pipeline->negative_one_to_one ||
1528bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->depth_clamp_mode != pipeline->depth_clamp_mode)
1529bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
1530bf215546Sopenharmony_ci
1531bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1532bf215546Sopenharmony_ci       radv_rast_prim_is_points_or_lines(cmd_buffer->state.emitted_graphics_pipeline->rast_prim) != radv_rast_prim_is_points_or_lines(pipeline->rast_prim) ||
1533bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->line_width != pipeline->line_width)
1534bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
1535bf215546Sopenharmony_ci
1536bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1537bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->pa_su_sc_mode_cntl != pipeline->pa_su_sc_mode_cntl)
1538bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE |
1539bf215546Sopenharmony_ci                                 RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
1540bf215546Sopenharmony_ci                                 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
1541bf215546Sopenharmony_ci
1542bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1543bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->pa_cl_clip_cntl != pipeline->pa_cl_clip_cntl)
1544bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
1545bf215546Sopenharmony_ci
1546bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1547bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->cb_color_control != pipeline->cb_color_control)
1548bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
1549bf215546Sopenharmony_ci
1550bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline)
1551bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY |
1552bf215546Sopenharmony_ci                                 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS |
1553bf215546Sopenharmony_ci                                 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS |
1554bf215546Sopenharmony_ci                                 RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
1555bf215546Sopenharmony_ci
1556bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1557bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->db_depth_control != pipeline->db_depth_control)
1558bf215546Sopenharmony_ci      cmd_buffer->state.dirty |=
1559bf215546Sopenharmony_ci         RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
1560bf215546Sopenharmony_ci         RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
1561bf215546Sopenharmony_ci         RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
1562bf215546Sopenharmony_ci
1563bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline)
1564bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
1565bf215546Sopenharmony_ci
1566bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1567bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->cb_target_mask != pipeline->cb_target_mask) {
1568bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE;
1569bf215546Sopenharmony_ci   }
1570bf215546Sopenharmony_ci
1571bf215546Sopenharmony_ci   radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw);
1572bf215546Sopenharmony_ci
1573bf215546Sopenharmony_ci   if (pipeline->has_ngg_culling &&
1574bf215546Sopenharmony_ci       pipeline->last_vgt_api_stage != MESA_SHADER_GEOMETRY &&
1575bf215546Sopenharmony_ci       !cmd_buffer->state.last_nggc_settings) {
1576bf215546Sopenharmony_ci      /* The already emitted RSRC2 contains the LDS required for NGG culling.
1577bf215546Sopenharmony_ci       * Culling is currently disabled, so re-emit RSRC2 to reduce LDS usage.
1578bf215546Sopenharmony_ci       * API GS always needs LDS, so this isn't useful there.
1579bf215546Sopenharmony_ci       */
1580bf215546Sopenharmony_ci      struct radv_shader *v = pipeline->base.shaders[pipeline->last_vgt_api_stage];
1581bf215546Sopenharmony_ci      radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS,
1582bf215546Sopenharmony_ci                        (v->config.rsrc2 & C_00B22C_LDS_SIZE) |
1583bf215546Sopenharmony_ci                        S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling));
1584bf215546Sopenharmony_ci   }
1585bf215546Sopenharmony_ci
1586bf215546Sopenharmony_ci   if (!cmd_buffer->state.emitted_graphics_pipeline ||
1587bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.cdw != pipeline->base.ctx_cs.cdw ||
1588bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs_hash != pipeline->base.ctx_cs_hash ||
1589bf215546Sopenharmony_ci       memcmp(cmd_buffer->state.emitted_graphics_pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.buf,
1590bf215546Sopenharmony_ci              pipeline->base.ctx_cs.cdw * 4)) {
1591bf215546Sopenharmony_ci      radeon_emit_array(cmd_buffer->cs, pipeline->base.ctx_cs.buf, pipeline->base.ctx_cs.cdw);
1592bf215546Sopenharmony_ci      cmd_buffer->state.context_roll_without_scissor_emitted = true;
1593bf215546Sopenharmony_ci   }
1594bf215546Sopenharmony_ci
1595bf215546Sopenharmony_ci   radv_emit_batch_break_on_new_ps(cmd_buffer);
1596bf215546Sopenharmony_ci
1597bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo);
1598bf215546Sopenharmony_ci
1599bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
1600bf215546Sopenharmony_ci      radv_save_pipeline(cmd_buffer, &pipeline->base);
1601bf215546Sopenharmony_ci
1602bf215546Sopenharmony_ci   cmd_buffer->state.emitted_graphics_pipeline = pipeline;
1603bf215546Sopenharmony_ci
1604bf215546Sopenharmony_ci   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_PIPELINE;
1605bf215546Sopenharmony_ci}
1606bf215546Sopenharmony_ci
1607bf215546Sopenharmony_cistatic void
1608bf215546Sopenharmony_ciradv_emit_viewport(struct radv_cmd_buffer *cmd_buffer)
1609bf215546Sopenharmony_ci{
1610bf215546Sopenharmony_ci   const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
1611bf215546Sopenharmony_ci   const struct radv_viewport_state *viewport = &cmd_buffer->state.dynamic.viewport;
1612bf215546Sopenharmony_ci   int i;
1613bf215546Sopenharmony_ci   const unsigned count = viewport->count;
1614bf215546Sopenharmony_ci
1615bf215546Sopenharmony_ci   assert(count);
1616bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_02843C_PA_CL_VPORT_XSCALE, count * 6);
1617bf215546Sopenharmony_ci
1618bf215546Sopenharmony_ci   for (i = 0; i < count; i++) {
1619bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[0]));
1620bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[0]));
1621bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].scale[1]));
1622bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(viewport->xform[i].translate[1]));
1623bf215546Sopenharmony_ci
1624bf215546Sopenharmony_ci      double scale_z, translate_z;
1625bf215546Sopenharmony_ci      if (pipeline->negative_one_to_one) {
1626bf215546Sopenharmony_ci         scale_z = viewport->xform[i].scale[2] * 0.5f;
1627bf215546Sopenharmony_ci         translate_z = (viewport->xform[i].translate[2] + viewport->viewports[i].maxDepth) * 0.5f;
1628bf215546Sopenharmony_ci      } else {
1629bf215546Sopenharmony_ci         scale_z = viewport->xform[i].scale[2];
1630bf215546Sopenharmony_ci         translate_z = viewport->xform[i].translate[2];
1631bf215546Sopenharmony_ci
1632bf215546Sopenharmony_ci      }
1633bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(scale_z));
1634bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(translate_z));
1635bf215546Sopenharmony_ci   }
1636bf215546Sopenharmony_ci
1637bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_0282D0_PA_SC_VPORT_ZMIN_0, count * 2);
1638bf215546Sopenharmony_ci   for (i = 0; i < count; i++) {
1639bf215546Sopenharmony_ci      float zmin, zmax;
1640bf215546Sopenharmony_ci
1641bf215546Sopenharmony_ci      if (pipeline->depth_clamp_mode == RADV_DEPTH_CLAMP_MODE_ZERO_TO_ONE) {
1642bf215546Sopenharmony_ci         zmin = 0.0f;
1643bf215546Sopenharmony_ci         zmax = 1.0f;
1644bf215546Sopenharmony_ci      } else {
1645bf215546Sopenharmony_ci         zmin = MIN2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth);
1646bf215546Sopenharmony_ci         zmax = MAX2(viewport->viewports[i].minDepth, viewport->viewports[i].maxDepth);
1647bf215546Sopenharmony_ci      }
1648bf215546Sopenharmony_ci
1649bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(zmin));
1650bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, fui(zmax));
1651bf215546Sopenharmony_ci   }
1652bf215546Sopenharmony_ci}
1653bf215546Sopenharmony_ci
1654bf215546Sopenharmony_civoid
1655bf215546Sopenharmony_ciradv_write_scissors(struct radv_cmd_buffer *cmd_buffer, struct radeon_cmdbuf *cs)
1656bf215546Sopenharmony_ci{
1657bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
1658bf215546Sopenharmony_ci   uint32_t count = cmd_buffer->state.dynamic.scissor.count;
1659bf215546Sopenharmony_ci   unsigned rast_prim;
1660bf215546Sopenharmony_ci
1661bf215546Sopenharmony_ci   if (!(pipeline->dynamic_states & RADV_DYNAMIC_PRIMITIVE_TOPOLOGY) ||
1662bf215546Sopenharmony_ci       (pipeline->active_stages & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT |
1663bf215546Sopenharmony_ci                                   VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT |
1664bf215546Sopenharmony_ci                                   VK_SHADER_STAGE_GEOMETRY_BIT |
1665bf215546Sopenharmony_ci                                   VK_SHADER_STAGE_MESH_BIT_NV))) {
1666bf215546Sopenharmony_ci      /* Ignore dynamic primitive topology for TES/GS/MS stages. */
1667bf215546Sopenharmony_ci      rast_prim = pipeline->rast_prim;
1668bf215546Sopenharmony_ci   } else {
1669bf215546Sopenharmony_ci      rast_prim = si_conv_prim_to_gs_out(cmd_buffer->state.dynamic.primitive_topology);
1670bf215546Sopenharmony_ci   }
1671bf215546Sopenharmony_ci
1672bf215546Sopenharmony_ci   si_write_scissors(cs, 0, count, cmd_buffer->state.dynamic.scissor.scissors,
1673bf215546Sopenharmony_ci                     cmd_buffer->state.dynamic.viewport.viewports, rast_prim,
1674bf215546Sopenharmony_ci                     cmd_buffer->state.dynamic.line_width);
1675bf215546Sopenharmony_ci}
1676bf215546Sopenharmony_ci
1677bf215546Sopenharmony_cistatic void
1678bf215546Sopenharmony_ciradv_emit_scissor(struct radv_cmd_buffer *cmd_buffer)
1679bf215546Sopenharmony_ci{
1680bf215546Sopenharmony_ci   radv_write_scissors(cmd_buffer, cmd_buffer->cs);
1681bf215546Sopenharmony_ci
1682bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = false;
1683bf215546Sopenharmony_ci}
1684bf215546Sopenharmony_ci
1685bf215546Sopenharmony_cistatic void
1686bf215546Sopenharmony_ciradv_emit_discard_rectangle(struct radv_cmd_buffer *cmd_buffer)
1687bf215546Sopenharmony_ci{
1688bf215546Sopenharmony_ci   if (!cmd_buffer->state.dynamic.discard_rectangle.count)
1689bf215546Sopenharmony_ci      return;
1690bf215546Sopenharmony_ci
1691bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_028210_PA_SC_CLIPRECT_0_TL,
1692bf215546Sopenharmony_ci                              cmd_buffer->state.dynamic.discard_rectangle.count * 2);
1693bf215546Sopenharmony_ci   for (unsigned i = 0; i < cmd_buffer->state.dynamic.discard_rectangle.count; ++i) {
1694bf215546Sopenharmony_ci      VkRect2D rect = cmd_buffer->state.dynamic.discard_rectangle.rectangles[i];
1695bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028210_TL_X(rect.offset.x) | S_028210_TL_Y(rect.offset.y));
1696bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028214_BR_X(rect.offset.x + rect.extent.width) |
1697bf215546Sopenharmony_ci                                     S_028214_BR_Y(rect.offset.y + rect.extent.height));
1698bf215546Sopenharmony_ci   }
1699bf215546Sopenharmony_ci}
1700bf215546Sopenharmony_ci
1701bf215546Sopenharmony_cistatic void
1702bf215546Sopenharmony_ciradv_emit_line_width(struct radv_cmd_buffer *cmd_buffer)
1703bf215546Sopenharmony_ci{
1704bf215546Sopenharmony_ci   unsigned width = cmd_buffer->state.dynamic.line_width * 8;
1705bf215546Sopenharmony_ci
1706bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028A08_PA_SU_LINE_CNTL,
1707bf215546Sopenharmony_ci                          S_028A08_WIDTH(CLAMP(width, 0, 0xFFFF)));
1708bf215546Sopenharmony_ci}
1709bf215546Sopenharmony_ci
1710bf215546Sopenharmony_cistatic void
1711bf215546Sopenharmony_ciradv_emit_blend_constants(struct radv_cmd_buffer *cmd_buffer)
1712bf215546Sopenharmony_ci{
1713bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_028414_CB_BLEND_RED, 4);
1716bf215546Sopenharmony_ci   radeon_emit_array(cmd_buffer->cs, (uint32_t *)d->blend_constants, 4);
1717bf215546Sopenharmony_ci}
1718bf215546Sopenharmony_ci
1719bf215546Sopenharmony_cistatic void
1720bf215546Sopenharmony_ciradv_emit_stencil(struct radv_cmd_buffer *cmd_buffer)
1721bf215546Sopenharmony_ci{
1722bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1723bf215546Sopenharmony_ci
1724bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_028430_DB_STENCILREFMASK, 2);
1725bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, S_028430_STENCILTESTVAL(d->stencil_reference.front) |
1726bf215546Sopenharmony_ci                                  S_028430_STENCILMASK(d->stencil_compare_mask.front) |
1727bf215546Sopenharmony_ci                                  S_028430_STENCILWRITEMASK(d->stencil_write_mask.front) |
1728bf215546Sopenharmony_ci                                  S_028430_STENCILOPVAL(1));
1729bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, S_028434_STENCILTESTVAL_BF(d->stencil_reference.back) |
1730bf215546Sopenharmony_ci                                  S_028434_STENCILMASK_BF(d->stencil_compare_mask.back) |
1731bf215546Sopenharmony_ci                                  S_028434_STENCILWRITEMASK_BF(d->stencil_write_mask.back) |
1732bf215546Sopenharmony_ci                                  S_028434_STENCILOPVAL_BF(1));
1733bf215546Sopenharmony_ci}
1734bf215546Sopenharmony_ci
1735bf215546Sopenharmony_cistatic void
1736bf215546Sopenharmony_ciradv_emit_depth_bounds(struct radv_cmd_buffer *cmd_buffer)
1737bf215546Sopenharmony_ci{
1738bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1739bf215546Sopenharmony_ci
1740bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_028020_DB_DEPTH_BOUNDS_MIN, 2);
1741bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.min));
1742bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, fui(d->depth_bounds.max));
1743bf215546Sopenharmony_ci}
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_cistatic void
1746bf215546Sopenharmony_ciradv_emit_depth_bias(struct radv_cmd_buffer *cmd_buffer)
1747bf215546Sopenharmony_ci{
1748bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1749bf215546Sopenharmony_ci   unsigned slope = fui(d->depth_bias.slope * 16.0f);
1750bf215546Sopenharmony_ci
1751bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cmd_buffer->cs, R_028B7C_PA_SU_POLY_OFFSET_CLAMP, 5);
1752bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, fui(d->depth_bias.clamp)); /* CLAMP */
1753bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, slope);                    /* FRONT SCALE */
1754bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias));  /* FRONT OFFSET */
1755bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, slope);                    /* BACK SCALE */
1756bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, fui(d->depth_bias.bias));  /* BACK OFFSET */
1757bf215546Sopenharmony_ci}
1758bf215546Sopenharmony_ci
1759bf215546Sopenharmony_cistatic void
1760bf215546Sopenharmony_ciradv_emit_line_stipple(struct radv_cmd_buffer *cmd_buffer)
1761bf215546Sopenharmony_ci{
1762bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1763bf215546Sopenharmony_ci   uint32_t auto_reset_cntl = 1;
1764bf215546Sopenharmony_ci
1765bf215546Sopenharmony_ci   if (d->primitive_topology == V_008958_DI_PT_LINESTRIP)
1766bf215546Sopenharmony_ci      auto_reset_cntl = 2;
1767bf215546Sopenharmony_ci
1768bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028A0C_PA_SC_LINE_STIPPLE,
1769bf215546Sopenharmony_ci                          S_028A0C_LINE_PATTERN(d->line_stipple.pattern) |
1770bf215546Sopenharmony_ci                             S_028A0C_REPEAT_COUNT(d->line_stipple.factor - 1) |
1771bf215546Sopenharmony_ci                             S_028A0C_AUTO_RESET_CNTL(auto_reset_cntl));
1772bf215546Sopenharmony_ci}
1773bf215546Sopenharmony_ci
1774bf215546Sopenharmony_ciuint32_t
1775bf215546Sopenharmony_ciradv_get_pa_su_sc_mode_cntl(const struct radv_cmd_buffer *cmd_buffer)
1776bf215546Sopenharmony_ci{
1777bf215546Sopenharmony_ci   unsigned pa_su_sc_mode_cntl = cmd_buffer->state.graphics_pipeline->pa_su_sc_mode_cntl;
1778bf215546Sopenharmony_ci   const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1779bf215546Sopenharmony_ci
1780bf215546Sopenharmony_ci   pa_su_sc_mode_cntl &= C_028814_CULL_FRONT &
1781bf215546Sopenharmony_ci                         C_028814_CULL_BACK &
1782bf215546Sopenharmony_ci                         C_028814_FACE &
1783bf215546Sopenharmony_ci                         C_028814_POLY_OFFSET_FRONT_ENABLE &
1784bf215546Sopenharmony_ci                         C_028814_POLY_OFFSET_BACK_ENABLE &
1785bf215546Sopenharmony_ci                         C_028814_POLY_OFFSET_PARA_ENABLE;
1786bf215546Sopenharmony_ci
1787bf215546Sopenharmony_ci   pa_su_sc_mode_cntl |= S_028814_CULL_FRONT(!!(d->cull_mode & VK_CULL_MODE_FRONT_BIT)) |
1788bf215546Sopenharmony_ci                         S_028814_CULL_BACK(!!(d->cull_mode & VK_CULL_MODE_BACK_BIT)) |
1789bf215546Sopenharmony_ci                         S_028814_FACE(d->front_face) |
1790bf215546Sopenharmony_ci                         S_028814_POLY_OFFSET_FRONT_ENABLE(d->depth_bias_enable) |
1791bf215546Sopenharmony_ci                         S_028814_POLY_OFFSET_BACK_ENABLE(d->depth_bias_enable) |
1792bf215546Sopenharmony_ci                         S_028814_POLY_OFFSET_PARA_ENABLE(d->depth_bias_enable);
1793bf215546Sopenharmony_ci   return pa_su_sc_mode_cntl;
1794bf215546Sopenharmony_ci}
1795bf215546Sopenharmony_ci
1796bf215546Sopenharmony_cistatic void
1797bf215546Sopenharmony_ciradv_emit_culling(struct radv_cmd_buffer *cmd_buffer, uint64_t states)
1798bf215546Sopenharmony_ci{
1799bf215546Sopenharmony_ci   unsigned pa_su_sc_mode_cntl = radv_get_pa_su_sc_mode_cntl(cmd_buffer);
1800bf215546Sopenharmony_ci
1801bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028814_PA_SU_SC_MODE_CNTL, pa_su_sc_mode_cntl);
1802bf215546Sopenharmony_ci}
1803bf215546Sopenharmony_ci
1804bf215546Sopenharmony_cistatic void
1805bf215546Sopenharmony_ciradv_emit_primitive_topology(struct radv_cmd_buffer *cmd_buffer)
1806bf215546Sopenharmony_ci{
1807bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1808bf215546Sopenharmony_ci
1809bf215546Sopenharmony_ci   assert(!cmd_buffer->state.mesh_shading);
1810bf215546Sopenharmony_ci
1811bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
1812bf215546Sopenharmony_ci      radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cmd_buffer->cs,
1813bf215546Sopenharmony_ci                                 R_030908_VGT_PRIMITIVE_TYPE, 1, d->primitive_topology);
1814bf215546Sopenharmony_ci   } else {
1815bf215546Sopenharmony_ci      radeon_set_config_reg(cmd_buffer->cs, R_008958_VGT_PRIMITIVE_TYPE, d->primitive_topology);
1816bf215546Sopenharmony_ci   }
1817bf215546Sopenharmony_ci}
1818bf215546Sopenharmony_ci
1819bf215546Sopenharmony_cistatic void
1820bf215546Sopenharmony_ciradv_emit_depth_control(struct radv_cmd_buffer *cmd_buffer, uint64_t states)
1821bf215546Sopenharmony_ci{
1822bf215546Sopenharmony_ci   unsigned db_depth_control = cmd_buffer->state.graphics_pipeline->db_depth_control;
1823bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci   db_depth_control &= C_028800_Z_ENABLE &
1826bf215546Sopenharmony_ci                       C_028800_Z_WRITE_ENABLE &
1827bf215546Sopenharmony_ci                       C_028800_ZFUNC &
1828bf215546Sopenharmony_ci                       C_028800_DEPTH_BOUNDS_ENABLE &
1829bf215546Sopenharmony_ci                       C_028800_STENCIL_ENABLE &
1830bf215546Sopenharmony_ci                       C_028800_BACKFACE_ENABLE &
1831bf215546Sopenharmony_ci                       C_028800_STENCILFUNC &
1832bf215546Sopenharmony_ci                       C_028800_STENCILFUNC_BF;
1833bf215546Sopenharmony_ci
1834bf215546Sopenharmony_ci   db_depth_control |= S_028800_Z_ENABLE(d->depth_test_enable ? 1 : 0) |
1835bf215546Sopenharmony_ci                       S_028800_Z_WRITE_ENABLE(d->depth_write_enable ? 1 : 0) |
1836bf215546Sopenharmony_ci                       S_028800_ZFUNC(d->depth_compare_op) |
1837bf215546Sopenharmony_ci                       S_028800_DEPTH_BOUNDS_ENABLE(d->depth_bounds_test_enable ? 1 : 0) |
1838bf215546Sopenharmony_ci                       S_028800_STENCIL_ENABLE(d->stencil_test_enable ? 1 : 0) |
1839bf215546Sopenharmony_ci                       S_028800_BACKFACE_ENABLE(d->stencil_test_enable ? 1 : 0) |
1840bf215546Sopenharmony_ci                       S_028800_STENCILFUNC(d->stencil_op.front.compare_op) |
1841bf215546Sopenharmony_ci                       S_028800_STENCILFUNC_BF(d->stencil_op.back.compare_op);
1842bf215546Sopenharmony_ci
1843bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028800_DB_DEPTH_CONTROL, db_depth_control);
1844bf215546Sopenharmony_ci}
1845bf215546Sopenharmony_ci
1846bf215546Sopenharmony_cistatic void
1847bf215546Sopenharmony_ciradv_emit_stencil_control(struct radv_cmd_buffer *cmd_buffer)
1848bf215546Sopenharmony_ci{
1849bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1850bf215546Sopenharmony_ci
1851bf215546Sopenharmony_ci   radeon_set_context_reg(
1852bf215546Sopenharmony_ci      cmd_buffer->cs, R_02842C_DB_STENCIL_CONTROL,
1853bf215546Sopenharmony_ci      S_02842C_STENCILFAIL(si_translate_stencil_op(d->stencil_op.front.fail_op)) |
1854bf215546Sopenharmony_ci         S_02842C_STENCILZPASS(si_translate_stencil_op(d->stencil_op.front.pass_op)) |
1855bf215546Sopenharmony_ci         S_02842C_STENCILZFAIL(si_translate_stencil_op(d->stencil_op.front.depth_fail_op)) |
1856bf215546Sopenharmony_ci         S_02842C_STENCILFAIL_BF(si_translate_stencil_op(d->stencil_op.back.fail_op)) |
1857bf215546Sopenharmony_ci         S_02842C_STENCILZPASS_BF(si_translate_stencil_op(d->stencil_op.back.pass_op)) |
1858bf215546Sopenharmony_ci         S_02842C_STENCILZFAIL_BF(si_translate_stencil_op(d->stencil_op.back.depth_fail_op)));
1859bf215546Sopenharmony_ci}
1860bf215546Sopenharmony_ci
1861bf215546Sopenharmony_cistatic void
1862bf215546Sopenharmony_ciradv_emit_fragment_shading_rate(struct radv_cmd_buffer *cmd_buffer)
1863bf215546Sopenharmony_ci{
1864bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
1865bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
1866bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1867bf215546Sopenharmony_ci   uint32_t rate_x = MIN2(2, d->fragment_shading_rate.size.width) - 1;
1868bf215546Sopenharmony_ci   uint32_t rate_y = MIN2(2, d->fragment_shading_rate.size.height) - 1;
1869bf215546Sopenharmony_ci   uint32_t pa_cl_vrs_cntl = pipeline->vrs.pa_cl_vrs_cntl;
1870bf215546Sopenharmony_ci   uint32_t pipeline_comb_mode = d->fragment_shading_rate.combiner_ops[0];
1871bf215546Sopenharmony_ci   uint32_t htile_comb_mode = d->fragment_shading_rate.combiner_ops[1];
1872bf215546Sopenharmony_ci
1873bf215546Sopenharmony_ci   assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3);
1874bf215546Sopenharmony_ci
1875bf215546Sopenharmony_ci   if (subpass && !subpass->vrs_attachment) {
1876bf215546Sopenharmony_ci      /* When the current subpass has no VRS attachment, the VRS rates are expected to be 1x1, so we
1877bf215546Sopenharmony_ci       * can cheat by tweaking the different combiner modes.
1878bf215546Sopenharmony_ci       */
1879bf215546Sopenharmony_ci      switch (htile_comb_mode) {
1880bf215546Sopenharmony_ci      case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MIN_KHR:
1881bf215546Sopenharmony_ci         /* The result of min(A, 1x1) is always 1x1. */
1882bf215546Sopenharmony_ci         FALLTHROUGH;
1883bf215546Sopenharmony_ci      case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR:
1884bf215546Sopenharmony_ci         /* Force the per-draw VRS rate to 1x1. */
1885bf215546Sopenharmony_ci         rate_x = rate_y = 0;
1886bf215546Sopenharmony_ci
1887bf215546Sopenharmony_ci         /* As the result of min(A, 1x1) or replace(A, 1x1) are always 1x1, set the vertex rate
1888bf215546Sopenharmony_ci          * combiner mode as passthrough.
1889bf215546Sopenharmony_ci          */
1890bf215546Sopenharmony_ci         pipeline_comb_mode = V_028848_VRS_COMB_MODE_PASSTHRU;
1891bf215546Sopenharmony_ci         break;
1892bf215546Sopenharmony_ci      case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_MAX_KHR:
1893bf215546Sopenharmony_ci         /* The result of max(A, 1x1) is always A. */
1894bf215546Sopenharmony_ci         FALLTHROUGH;
1895bf215546Sopenharmony_ci      case VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR:
1896bf215546Sopenharmony_ci         /* Nothing to do here because the SAMPLE_ITER combiner mode should already be passthrough. */
1897bf215546Sopenharmony_ci         break;
1898bf215546Sopenharmony_ci      default:
1899bf215546Sopenharmony_ci         break;
1900bf215546Sopenharmony_ci      }
1901bf215546Sopenharmony_ci   }
1902bf215546Sopenharmony_ci
1903bf215546Sopenharmony_ci   /* Emit per-draw VRS rate which is the first combiner. */
1904bf215546Sopenharmony_ci   radeon_set_uconfig_reg(cmd_buffer->cs, R_03098C_GE_VRS_RATE,
1905bf215546Sopenharmony_ci                          S_03098C_RATE_X(rate_x) | S_03098C_RATE_Y(rate_y));
1906bf215546Sopenharmony_ci
1907bf215546Sopenharmony_ci   /* VERTEX_RATE_COMBINER_MODE controls the combiner mode between the
1908bf215546Sopenharmony_ci    * draw rate and the vertex rate.
1909bf215546Sopenharmony_ci    */
1910bf215546Sopenharmony_ci   if (cmd_buffer->state.mesh_shading) {
1911bf215546Sopenharmony_ci      pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU) |
1912bf215546Sopenharmony_ci                        S_028848_PRIMITIVE_RATE_COMBINER_MODE(pipeline_comb_mode);
1913bf215546Sopenharmony_ci   } else {
1914bf215546Sopenharmony_ci      pa_cl_vrs_cntl |= S_028848_VERTEX_RATE_COMBINER_MODE(pipeline_comb_mode) |
1915bf215546Sopenharmony_ci                        S_028848_PRIMITIVE_RATE_COMBINER_MODE(V_028848_VRS_COMB_MODE_PASSTHRU);
1916bf215546Sopenharmony_ci   }
1917bf215546Sopenharmony_ci
1918bf215546Sopenharmony_ci   /* HTILE_RATE_COMBINER_MODE controls the combiner mode between the primitive rate and the HTILE
1919bf215546Sopenharmony_ci    * rate.
1920bf215546Sopenharmony_ci    */
1921bf215546Sopenharmony_ci   pa_cl_vrs_cntl |= S_028848_HTILE_RATE_COMBINER_MODE(htile_comb_mode);
1922bf215546Sopenharmony_ci
1923bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028848_PA_CL_VRS_CNTL, pa_cl_vrs_cntl);
1924bf215546Sopenharmony_ci}
1925bf215546Sopenharmony_ci
1926bf215546Sopenharmony_cistatic void
1927bf215546Sopenharmony_ciradv_emit_primitive_restart_enable(struct radv_cmd_buffer *cmd_buffer)
1928bf215546Sopenharmony_ci{
1929bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1930bf215546Sopenharmony_ci
1931bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1932bf215546Sopenharmony_ci      radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_GE_MULTI_PRIM_IB_RESET_EN,
1933bf215546Sopenharmony_ci                             d->primitive_restart_enable);
1934bf215546Sopenharmony_ci   } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
1935bf215546Sopenharmony_ci      radeon_set_uconfig_reg(cmd_buffer->cs, R_03092C_VGT_MULTI_PRIM_IB_RESET_EN,
1936bf215546Sopenharmony_ci                             d->primitive_restart_enable);
1937bf215546Sopenharmony_ci   } else {
1938bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028A94_VGT_MULTI_PRIM_IB_RESET_EN,
1939bf215546Sopenharmony_ci                             d->primitive_restart_enable);
1940bf215546Sopenharmony_ci   }
1941bf215546Sopenharmony_ci}
1942bf215546Sopenharmony_ci
1943bf215546Sopenharmony_cistatic void
1944bf215546Sopenharmony_ciradv_emit_rasterizer_discard_enable(struct radv_cmd_buffer *cmd_buffer)
1945bf215546Sopenharmony_ci{
1946bf215546Sopenharmony_ci   unsigned pa_cl_clip_cntl = cmd_buffer->state.graphics_pipeline->pa_cl_clip_cntl;
1947bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1948bf215546Sopenharmony_ci
1949bf215546Sopenharmony_ci   pa_cl_clip_cntl &= C_028810_DX_RASTERIZATION_KILL;
1950bf215546Sopenharmony_ci   pa_cl_clip_cntl |= S_028810_DX_RASTERIZATION_KILL(d->rasterizer_discard_enable);
1951bf215546Sopenharmony_ci
1952bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028810_PA_CL_CLIP_CNTL, pa_cl_clip_cntl);
1953bf215546Sopenharmony_ci}
1954bf215546Sopenharmony_ci
1955bf215546Sopenharmony_cistatic void
1956bf215546Sopenharmony_ciradv_emit_logic_op(struct radv_cmd_buffer *cmd_buffer)
1957bf215546Sopenharmony_ci{
1958bf215546Sopenharmony_ci   unsigned cb_color_control = cmd_buffer->state.graphics_pipeline->cb_color_control;
1959bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1960bf215546Sopenharmony_ci
1961bf215546Sopenharmony_ci   cb_color_control &= C_028808_ROP3;
1962bf215546Sopenharmony_ci   cb_color_control |= S_028808_ROP3(d->logic_op);
1963bf215546Sopenharmony_ci
1964bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028808_CB_COLOR_CONTROL, cb_color_control);
1965bf215546Sopenharmony_ci}
1966bf215546Sopenharmony_ci
1967bf215546Sopenharmony_cistatic void
1968bf215546Sopenharmony_ciradv_emit_color_write_enable(struct radv_cmd_buffer *cmd_buffer)
1969bf215546Sopenharmony_ci{
1970bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
1971bf215546Sopenharmony_ci   struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
1972bf215546Sopenharmony_ci
1973bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028238_CB_TARGET_MASK,
1974bf215546Sopenharmony_ci                          pipeline->cb_target_mask & d->color_write_enable);
1975bf215546Sopenharmony_ci}
1976bf215546Sopenharmony_ci
1977bf215546Sopenharmony_cistatic void
1978bf215546Sopenharmony_ciradv_emit_fb_color_state(struct radv_cmd_buffer *cmd_buffer, int index,
1979bf215546Sopenharmony_ci                         struct radv_color_buffer_info *cb, struct radv_image_view *iview,
1980bf215546Sopenharmony_ci                         VkImageLayout layout, bool in_render_loop)
1981bf215546Sopenharmony_ci{
1982bf215546Sopenharmony_ci   bool is_vi = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8;
1983bf215546Sopenharmony_ci   uint32_t cb_fdcc_control = cb->cb_dcc_control;
1984bf215546Sopenharmony_ci   uint32_t cb_color_info = cb->cb_color_info;
1985bf215546Sopenharmony_ci   struct radv_image *image = iview->image;
1986bf215546Sopenharmony_ci
1987bf215546Sopenharmony_ci   if (!radv_layout_dcc_compressed(
1988bf215546Sopenharmony_ci          cmd_buffer->device, image, iview->vk.base_mip_level, layout, in_render_loop,
1989bf215546Sopenharmony_ci          radv_image_queue_family_mask(image, cmd_buffer->qf,
1990bf215546Sopenharmony_ci                                       cmd_buffer->qf))) {
1991bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
1992bf215546Sopenharmony_ci         cb_fdcc_control &= C_028C78_FDCC_ENABLE;
1993bf215546Sopenharmony_ci      } else {
1994bf215546Sopenharmony_ci         cb_color_info &= C_028C70_DCC_ENABLE;
1995bf215546Sopenharmony_ci      }
1996bf215546Sopenharmony_ci   }
1997bf215546Sopenharmony_ci
1998bf215546Sopenharmony_ci   if (!radv_layout_fmask_compressed(
1999bf215546Sopenharmony_ci          cmd_buffer->device, image, layout,
2000bf215546Sopenharmony_ci          radv_image_queue_family_mask(image, cmd_buffer->qf,
2001bf215546Sopenharmony_ci                                       cmd_buffer->qf))) {
2002bf215546Sopenharmony_ci      cb_color_info &= C_028C70_COMPRESSION;
2003bf215546Sopenharmony_ci   }
2004bf215546Sopenharmony_ci
2005bf215546Sopenharmony_ci   if (radv_image_is_tc_compat_cmask(image) && (radv_is_fmask_decompress_pipeline(cmd_buffer) ||
2006bf215546Sopenharmony_ci                                                radv_is_dcc_decompress_pipeline(cmd_buffer))) {
2007bf215546Sopenharmony_ci      /* If this bit is set, the FMASK decompression operation
2008bf215546Sopenharmony_ci       * doesn't occur (DCC_COMPRESS also implies FMASK_DECOMPRESS).
2009bf215546Sopenharmony_ci       */
2010bf215546Sopenharmony_ci      cb_color_info &= C_028C70_FMASK_COMPRESS_1FRAG_ONLY;
2011bf215546Sopenharmony_ci   }
2012bf215546Sopenharmony_ci
2013bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
2014bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C6C_CB_COLOR0_VIEW + index * 0x3c, 4);
2015bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_view);                      /* CB_COLOR0_VIEW */
2016bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_info);                      /* CB_COLOR0_INFO */
2017bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);                    /* CB_COLOR0_ATTRIB */
2018bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb_fdcc_control);                        /* CB_COLOR0_FDCC_CONTROL */
2019bf215546Sopenharmony_ci
2020bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, cb->cb_color_base);
2021bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4, cb->cb_color_base >> 32);
2022bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
2023bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4, cb->cb_dcc_base >> 32);
2024bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4, cb->cb_color_attrib2);
2025bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4, cb->cb_color_attrib3);
2026bf215546Sopenharmony_ci   } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
2027bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
2028bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_base);
2029bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
2030bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
2031bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_view);
2032bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb_color_info);
2033bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
2034bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
2035bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
2036bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
2037bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
2038bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
2039bf215546Sopenharmony_ci
2040bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, cb->cb_dcc_base);
2041bf215546Sopenharmony_ci
2042bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028E40_CB_COLOR0_BASE_EXT + index * 4,
2043bf215546Sopenharmony_ci                             cb->cb_color_base >> 32);
2044bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028E60_CB_COLOR0_CMASK_BASE_EXT + index * 4,
2045bf215546Sopenharmony_ci                             cb->cb_color_cmask >> 32);
2046bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028E80_CB_COLOR0_FMASK_BASE_EXT + index * 4,
2047bf215546Sopenharmony_ci                             cb->cb_color_fmask >> 32);
2048bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028EA0_CB_COLOR0_DCC_BASE_EXT + index * 4,
2049bf215546Sopenharmony_ci                             cb->cb_dcc_base >> 32);
2050bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028EC0_CB_COLOR0_ATTRIB2 + index * 4,
2051bf215546Sopenharmony_ci                             cb->cb_color_attrib2);
2052bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028EE0_CB_COLOR0_ATTRIB3 + index * 4,
2053bf215546Sopenharmony_ci                             cb->cb_color_attrib3);
2054bf215546Sopenharmony_ci   } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
2055bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
2056bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_base);
2057bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028C64_BASE_256B(cb->cb_color_base >> 32));
2058bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib2);
2059bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_view);
2060bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb_color_info);
2061bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
2062bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
2063bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
2064bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028C80_BASE_256B(cb->cb_color_cmask >> 32));
2065bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
2066bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028C88_BASE_256B(cb->cb_color_fmask >> 32));
2067bf215546Sopenharmony_ci
2068bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c, 2);
2069bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_dcc_base);
2070bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028C98_BASE_256B(cb->cb_dcc_base >> 32));
2071bf215546Sopenharmony_ci
2072bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_0287A0_CB_MRT0_EPITCH + index * 4,
2073bf215546Sopenharmony_ci                             cb->cb_mrt_epitch);
2074bf215546Sopenharmony_ci   } else {
2075bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028C60_CB_COLOR0_BASE + index * 0x3c, 11);
2076bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_base);
2077bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_pitch);
2078bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_slice);
2079bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_view);
2080bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb_color_info);
2081bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_attrib);
2082bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_dcc_control);
2083bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask);
2084bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_cmask_slice);
2085bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask);
2086bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cb->cb_color_fmask_slice);
2087bf215546Sopenharmony_ci
2088bf215546Sopenharmony_ci      if (is_vi) { /* DCC BASE */
2089bf215546Sopenharmony_ci         radeon_set_context_reg(cmd_buffer->cs, R_028C94_CB_COLOR0_DCC_BASE + index * 0x3c,
2090bf215546Sopenharmony_ci                                cb->cb_dcc_base);
2091bf215546Sopenharmony_ci      }
2092bf215546Sopenharmony_ci   }
2093bf215546Sopenharmony_ci
2094bf215546Sopenharmony_ci   if (G_028C70_DCC_ENABLE(cb_color_info)) {
2095bf215546Sopenharmony_ci      /* Drawing with DCC enabled also compresses colorbuffers. */
2096bf215546Sopenharmony_ci      VkImageSubresourceRange range = {
2097bf215546Sopenharmony_ci         .aspectMask = iview->vk.aspects,
2098bf215546Sopenharmony_ci         .baseMipLevel = iview->vk.base_mip_level,
2099bf215546Sopenharmony_ci         .levelCount = iview->vk.level_count,
2100bf215546Sopenharmony_ci         .baseArrayLayer = iview->vk.base_array_layer,
2101bf215546Sopenharmony_ci         .layerCount = iview->vk.layer_count,
2102bf215546Sopenharmony_ci      };
2103bf215546Sopenharmony_ci
2104bf215546Sopenharmony_ci      radv_update_dcc_metadata(cmd_buffer, image, &range, true);
2105bf215546Sopenharmony_ci   }
2106bf215546Sopenharmony_ci}
2107bf215546Sopenharmony_ci
2108bf215546Sopenharmony_cistatic void
2109bf215546Sopenharmony_ciradv_update_zrange_precision(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
2110bf215546Sopenharmony_ci                             const struct radv_image_view *iview, VkImageLayout layout,
2111bf215546Sopenharmony_ci                             bool in_render_loop, bool requires_cond_exec)
2112bf215546Sopenharmony_ci{
2113bf215546Sopenharmony_ci   const struct radv_image *image = iview->image;
2114bf215546Sopenharmony_ci   uint32_t db_z_info = ds->db_z_info;
2115bf215546Sopenharmony_ci   uint32_t db_z_info_reg;
2116bf215546Sopenharmony_ci
2117bf215546Sopenharmony_ci   if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug ||
2118bf215546Sopenharmony_ci       !radv_image_is_tc_compat_htile(image))
2119bf215546Sopenharmony_ci      return;
2120bf215546Sopenharmony_ci
2121bf215546Sopenharmony_ci   if (!radv_layout_is_htile_compressed(
2122bf215546Sopenharmony_ci          cmd_buffer->device, image, layout, in_render_loop,
2123bf215546Sopenharmony_ci          radv_image_queue_family_mask(image, cmd_buffer->qf,
2124bf215546Sopenharmony_ci                                       cmd_buffer->qf))) {
2125bf215546Sopenharmony_ci      db_z_info &= C_028040_TILE_SURFACE_ENABLE;
2126bf215546Sopenharmony_ci   }
2127bf215546Sopenharmony_ci
2128bf215546Sopenharmony_ci   db_z_info &= C_028040_ZRANGE_PRECISION;
2129bf215546Sopenharmony_ci
2130bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
2131bf215546Sopenharmony_ci      db_z_info_reg = R_028038_DB_Z_INFO;
2132bf215546Sopenharmony_ci   } else {
2133bf215546Sopenharmony_ci      db_z_info_reg = R_028040_DB_Z_INFO;
2134bf215546Sopenharmony_ci   }
2135bf215546Sopenharmony_ci
2136bf215546Sopenharmony_ci   /* When we don't know the last fast clear value we need to emit a
2137bf215546Sopenharmony_ci    * conditional packet that will eventually skip the following
2138bf215546Sopenharmony_ci    * SET_CONTEXT_REG packet.
2139bf215546Sopenharmony_ci    */
2140bf215546Sopenharmony_ci   if (requires_cond_exec) {
2141bf215546Sopenharmony_ci      uint64_t va = radv_get_tc_compat_zrange_va(image, iview->vk.base_mip_level);
2142bf215546Sopenharmony_ci
2143bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, PKT3(PKT3_COND_EXEC, 3, 0));
2144bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, va);
2145bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, va >> 32);
2146bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 0);
2147bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, 3); /* SET_CONTEXT_REG size */
2148bf215546Sopenharmony_ci   }
2149bf215546Sopenharmony_ci
2150bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, db_z_info_reg, db_z_info);
2151bf215546Sopenharmony_ci}
2152bf215546Sopenharmony_ci
2153bf215546Sopenharmony_cistatic void
2154bf215546Sopenharmony_ciradv_emit_fb_ds_state(struct radv_cmd_buffer *cmd_buffer, struct radv_ds_buffer_info *ds,
2155bf215546Sopenharmony_ci                      struct radv_image_view *iview, VkImageLayout layout, bool in_render_loop)
2156bf215546Sopenharmony_ci{
2157bf215546Sopenharmony_ci   const struct radv_image *image = iview->image;
2158bf215546Sopenharmony_ci   uint32_t db_z_info = ds->db_z_info;
2159bf215546Sopenharmony_ci   uint32_t db_stencil_info = ds->db_stencil_info;
2160bf215546Sopenharmony_ci   uint32_t db_htile_surface = ds->db_htile_surface;
2161bf215546Sopenharmony_ci
2162bf215546Sopenharmony_ci   if (!radv_layout_is_htile_compressed(
2163bf215546Sopenharmony_ci          cmd_buffer->device, image, layout, in_render_loop,
2164bf215546Sopenharmony_ci          radv_image_queue_family_mask(image, cmd_buffer->qf,
2165bf215546Sopenharmony_ci                                       cmd_buffer->qf))) {
2166bf215546Sopenharmony_ci      db_z_info &= C_028040_TILE_SURFACE_ENABLE;
2167bf215546Sopenharmony_ci      db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
2168bf215546Sopenharmony_ci   }
2169bf215546Sopenharmony_ci
2170bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3 &&
2171bf215546Sopenharmony_ci       !cmd_buffer->state.subpass->vrs_attachment) {
2172bf215546Sopenharmony_ci      db_htile_surface &= C_028ABC_VRS_HTILE_ENCODING;
2173bf215546Sopenharmony_ci   }
2174bf215546Sopenharmony_ci
2175bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028008_DB_DEPTH_VIEW, ds->db_depth_view);
2176bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028ABC_DB_HTILE_SURFACE, db_htile_surface);
2177bf215546Sopenharmony_ci
2178bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
2179bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
2180bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_02801C_DB_DEPTH_SIZE_XY, ds->db_depth_size);
2181bf215546Sopenharmony_ci
2182bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
2183bf215546Sopenharmony_ci         radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 6);
2184bf215546Sopenharmony_ci      } else {
2185bf215546Sopenharmony_ci         radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 7);
2186bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, S_02803C_RESOURCE_LEVEL(1));
2187bf215546Sopenharmony_ci      }
2188bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, db_z_info);
2189bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, db_stencil_info);
2190bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
2191bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
2192bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_read_base);
2193bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);
2194bf215546Sopenharmony_ci
2195bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_READ_BASE_HI, 5);
2196bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
2197bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
2198bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_read_base >> 32);
2199bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base >> 32);
2200bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_htile_data_base >> 32);
2201bf215546Sopenharmony_ci   } else if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
2202bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, 3);
2203bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_htile_data_base);
2204bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028018_BASE_HI(ds->db_htile_data_base >> 32));
2205bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_depth_size);
2206bf215546Sopenharmony_ci
2207bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 10);
2208bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, db_z_info);          /* DB_Z_INFO */
2209bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, db_stencil_info);    /* DB_STENCIL_INFO */
2210bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_read_base); /* DB_Z_READ_BASE */
2211bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs,
2212bf215546Sopenharmony_ci                  S_028044_BASE_HI(ds->db_z_read_base >> 32)); /* DB_Z_READ_BASE_HI */
2213bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);   /* DB_STENCIL_READ_BASE */
2214bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs,
2215bf215546Sopenharmony_ci                  S_02804C_BASE_HI(ds->db_stencil_read_base >> 32)); /* DB_STENCIL_READ_BASE_HI */
2216bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_write_base);              /* DB_Z_WRITE_BASE */
2217bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs,
2218bf215546Sopenharmony_ci                  S_028054_BASE_HI(ds->db_z_write_base >> 32)); /* DB_Z_WRITE_BASE_HI */
2219bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base);   /* DB_STENCIL_WRITE_BASE */
2220bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs,
2221bf215546Sopenharmony_ci                  S_02805C_BASE_HI(ds->db_stencil_write_base >> 32)); /* DB_STENCIL_WRITE_BASE_HI */
2222bf215546Sopenharmony_ci
2223bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_028068_DB_Z_INFO2, 2);
2224bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_info2);
2225bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_info2);
2226bf215546Sopenharmony_ci   } else {
2227bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028014_DB_HTILE_DATA_BASE, ds->db_htile_data_base);
2228bf215546Sopenharmony_ci
2229bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cmd_buffer->cs, R_02803C_DB_DEPTH_INFO, 9);
2230bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_depth_info);         /* R_02803C_DB_DEPTH_INFO */
2231bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, db_z_info);                 /* R_028040_DB_Z_INFO */
2232bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, db_stencil_info);           /* R_028044_DB_STENCIL_INFO */
2233bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_read_base);        /* R_028048_DB_Z_READ_BASE */
2234bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_read_base);  /* R_02804C_DB_STENCIL_READ_BASE */
2235bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_z_write_base);       /* R_028050_DB_Z_WRITE_BASE */
2236bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_stencil_write_base); /* R_028054_DB_STENCIL_WRITE_BASE */
2237bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_depth_size);         /* R_028058_DB_DEPTH_SIZE */
2238bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, ds->db_depth_slice);        /* R_02805C_DB_DEPTH_SLICE */
2239bf215546Sopenharmony_ci   }
2240bf215546Sopenharmony_ci
2241bf215546Sopenharmony_ci   /* Update the ZRANGE_PRECISION value for the TC-compat bug. */
2242bf215546Sopenharmony_ci   radv_update_zrange_precision(cmd_buffer, ds, iview, layout, in_render_loop, true);
2243bf215546Sopenharmony_ci
2244bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028B78_PA_SU_POLY_OFFSET_DB_FMT_CNTL,
2245bf215546Sopenharmony_ci                          ds->pa_su_poly_offset_db_fmt_cntl);
2246bf215546Sopenharmony_ci}
2247bf215546Sopenharmony_ci
2248bf215546Sopenharmony_ci/**
2249bf215546Sopenharmony_ci * Update the fast clear depth/stencil values if the image is bound as a
2250bf215546Sopenharmony_ci * depth/stencil buffer.
2251bf215546Sopenharmony_ci */
2252bf215546Sopenharmony_cistatic void
2253bf215546Sopenharmony_ciradv_update_bound_fast_clear_ds(struct radv_cmd_buffer *cmd_buffer,
2254bf215546Sopenharmony_ci                                const struct radv_image_view *iview,
2255bf215546Sopenharmony_ci                                VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
2256bf215546Sopenharmony_ci{
2257bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
2258bf215546Sopenharmony_ci   const struct radv_image *image = iview->image;
2259bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2260bf215546Sopenharmony_ci   uint32_t att_idx;
2261bf215546Sopenharmony_ci
2262bf215546Sopenharmony_ci   if (!cmd_buffer->state.attachments || !subpass)
2263bf215546Sopenharmony_ci      return;
2264bf215546Sopenharmony_ci
2265bf215546Sopenharmony_ci   if (!subpass->depth_stencil_attachment)
2266bf215546Sopenharmony_ci      return;
2267bf215546Sopenharmony_ci
2268bf215546Sopenharmony_ci   att_idx = subpass->depth_stencil_attachment->attachment;
2269bf215546Sopenharmony_ci   if (cmd_buffer->state.attachments[att_idx].iview->image != image)
2270bf215546Sopenharmony_ci      return;
2271bf215546Sopenharmony_ci
2272bf215546Sopenharmony_ci   if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2273bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cs, R_028028_DB_STENCIL_CLEAR, 2);
2274bf215546Sopenharmony_ci      radeon_emit(cs, ds_clear_value.stencil);
2275bf215546Sopenharmony_ci      radeon_emit(cs, fui(ds_clear_value.depth));
2276bf215546Sopenharmony_ci   } else if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
2277bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_02802C_DB_DEPTH_CLEAR, fui(ds_clear_value.depth));
2278bf215546Sopenharmony_ci   } else {
2279bf215546Sopenharmony_ci      assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
2280bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028028_DB_STENCIL_CLEAR, ds_clear_value.stencil);
2281bf215546Sopenharmony_ci   }
2282bf215546Sopenharmony_ci
2283bf215546Sopenharmony_ci   /* Update the ZRANGE_PRECISION value for the TC-compat bug. This is
2284bf215546Sopenharmony_ci    * only needed when clearing Z to 0.0.
2285bf215546Sopenharmony_ci    */
2286bf215546Sopenharmony_ci   if ((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && ds_clear_value.depth == 0.0) {
2287bf215546Sopenharmony_ci      VkImageLayout layout = subpass->depth_stencil_attachment->layout;
2288bf215546Sopenharmony_ci      bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
2289bf215546Sopenharmony_ci
2290bf215546Sopenharmony_ci      radv_update_zrange_precision(cmd_buffer, &cmd_buffer->state.attachments[att_idx].ds, iview,
2291bf215546Sopenharmony_ci                                   layout, in_render_loop, false);
2292bf215546Sopenharmony_ci   }
2293bf215546Sopenharmony_ci
2294bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
2295bf215546Sopenharmony_ci}
2296bf215546Sopenharmony_ci
2297bf215546Sopenharmony_ci/**
2298bf215546Sopenharmony_ci * Set the clear depth/stencil values to the image's metadata.
2299bf215546Sopenharmony_ci */
2300bf215546Sopenharmony_cistatic void
2301bf215546Sopenharmony_ciradv_set_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2302bf215546Sopenharmony_ci                           const VkImageSubresourceRange *range,
2303bf215546Sopenharmony_ci                           VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
2304bf215546Sopenharmony_ci{
2305bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2306bf215546Sopenharmony_ci   uint32_t level_count = radv_get_levelCount(image, range);
2307bf215546Sopenharmony_ci
2308bf215546Sopenharmony_ci   if (aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
2309bf215546Sopenharmony_ci      uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel);
2310bf215546Sopenharmony_ci
2311bf215546Sopenharmony_ci      /* Use the fastest way when both aspects are used. */
2312bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + 2 * level_count, cmd_buffer->state.predicating));
2313bf215546Sopenharmony_ci      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
2314bf215546Sopenharmony_ci      radeon_emit(cs, va);
2315bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
2316bf215546Sopenharmony_ci
2317bf215546Sopenharmony_ci      for (uint32_t l = 0; l < level_count; l++) {
2318bf215546Sopenharmony_ci         radeon_emit(cs, ds_clear_value.stencil);
2319bf215546Sopenharmony_ci         radeon_emit(cs, fui(ds_clear_value.depth));
2320bf215546Sopenharmony_ci      }
2321bf215546Sopenharmony_ci   } else {
2322bf215546Sopenharmony_ci      /* Otherwise we need one WRITE_DATA packet per level. */
2323bf215546Sopenharmony_ci      for (uint32_t l = 0; l < level_count; l++) {
2324bf215546Sopenharmony_ci         uint64_t va = radv_get_ds_clear_value_va(image, range->baseMipLevel + l);
2325bf215546Sopenharmony_ci         unsigned value;
2326bf215546Sopenharmony_ci
2327bf215546Sopenharmony_ci         if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT) {
2328bf215546Sopenharmony_ci            value = fui(ds_clear_value.depth);
2329bf215546Sopenharmony_ci            va += 4;
2330bf215546Sopenharmony_ci         } else {
2331bf215546Sopenharmony_ci            assert(aspects == VK_IMAGE_ASPECT_STENCIL_BIT);
2332bf215546Sopenharmony_ci            value = ds_clear_value.stencil;
2333bf215546Sopenharmony_ci         }
2334bf215546Sopenharmony_ci
2335bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, cmd_buffer->state.predicating));
2336bf215546Sopenharmony_ci         radeon_emit(cs,
2337bf215546Sopenharmony_ci                     S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
2338bf215546Sopenharmony_ci         radeon_emit(cs, va);
2339bf215546Sopenharmony_ci         radeon_emit(cs, va >> 32);
2340bf215546Sopenharmony_ci         radeon_emit(cs, value);
2341bf215546Sopenharmony_ci      }
2342bf215546Sopenharmony_ci   }
2343bf215546Sopenharmony_ci}
2344bf215546Sopenharmony_ci
2345bf215546Sopenharmony_ci/**
2346bf215546Sopenharmony_ci * Update the TC-compat metadata value for this image.
2347bf215546Sopenharmony_ci */
2348bf215546Sopenharmony_cistatic void
2349bf215546Sopenharmony_ciradv_set_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2350bf215546Sopenharmony_ci                                   const VkImageSubresourceRange *range, uint32_t value)
2351bf215546Sopenharmony_ci{
2352bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2353bf215546Sopenharmony_ci
2354bf215546Sopenharmony_ci   if (!cmd_buffer->device->physical_device->rad_info.has_tc_compat_zrange_bug)
2355bf215546Sopenharmony_ci      return;
2356bf215546Sopenharmony_ci
2357bf215546Sopenharmony_ci   uint64_t va = radv_get_tc_compat_zrange_va(image, range->baseMipLevel);
2358bf215546Sopenharmony_ci   uint32_t level_count = radv_get_levelCount(image, range);
2359bf215546Sopenharmony_ci
2360bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + level_count, cmd_buffer->state.predicating));
2361bf215546Sopenharmony_ci   radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
2362bf215546Sopenharmony_ci   radeon_emit(cs, va);
2363bf215546Sopenharmony_ci   radeon_emit(cs, va >> 32);
2364bf215546Sopenharmony_ci
2365bf215546Sopenharmony_ci   for (uint32_t l = 0; l < level_count; l++)
2366bf215546Sopenharmony_ci      radeon_emit(cs, value);
2367bf215546Sopenharmony_ci}
2368bf215546Sopenharmony_ci
2369bf215546Sopenharmony_cistatic void
2370bf215546Sopenharmony_ciradv_update_tc_compat_zrange_metadata(struct radv_cmd_buffer *cmd_buffer,
2371bf215546Sopenharmony_ci                                      const struct radv_image_view *iview,
2372bf215546Sopenharmony_ci                                      VkClearDepthStencilValue ds_clear_value)
2373bf215546Sopenharmony_ci{
2374bf215546Sopenharmony_ci   VkImageSubresourceRange range = {
2375bf215546Sopenharmony_ci      .aspectMask = iview->vk.aspects,
2376bf215546Sopenharmony_ci      .baseMipLevel = iview->vk.base_mip_level,
2377bf215546Sopenharmony_ci      .levelCount = iview->vk.level_count,
2378bf215546Sopenharmony_ci      .baseArrayLayer = iview->vk.base_array_layer,
2379bf215546Sopenharmony_ci      .layerCount = iview->vk.layer_count,
2380bf215546Sopenharmony_ci   };
2381bf215546Sopenharmony_ci   uint32_t cond_val;
2382bf215546Sopenharmony_ci
2383bf215546Sopenharmony_ci   /* Conditionally set DB_Z_INFO.ZRANGE_PRECISION to 0 when the last
2384bf215546Sopenharmony_ci    * depth clear value is 0.0f.
2385bf215546Sopenharmony_ci    */
2386bf215546Sopenharmony_ci   cond_val = ds_clear_value.depth == 0.0f ? UINT_MAX : 0;
2387bf215546Sopenharmony_ci
2388bf215546Sopenharmony_ci   radv_set_tc_compat_zrange_metadata(cmd_buffer, iview->image, &range, cond_val);
2389bf215546Sopenharmony_ci}
2390bf215546Sopenharmony_ci
2391bf215546Sopenharmony_ci/**
2392bf215546Sopenharmony_ci * Update the clear depth/stencil values for this image.
2393bf215546Sopenharmony_ci */
2394bf215546Sopenharmony_civoid
2395bf215546Sopenharmony_ciradv_update_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
2396bf215546Sopenharmony_ci                              const struct radv_image_view *iview,
2397bf215546Sopenharmony_ci                              VkClearDepthStencilValue ds_clear_value, VkImageAspectFlags aspects)
2398bf215546Sopenharmony_ci{
2399bf215546Sopenharmony_ci   VkImageSubresourceRange range = {
2400bf215546Sopenharmony_ci      .aspectMask = iview->vk.aspects,
2401bf215546Sopenharmony_ci      .baseMipLevel = iview->vk.base_mip_level,
2402bf215546Sopenharmony_ci      .levelCount = iview->vk.level_count,
2403bf215546Sopenharmony_ci      .baseArrayLayer = iview->vk.base_array_layer,
2404bf215546Sopenharmony_ci      .layerCount = iview->vk.layer_count,
2405bf215546Sopenharmony_ci   };
2406bf215546Sopenharmony_ci   struct radv_image *image = iview->image;
2407bf215546Sopenharmony_ci
2408bf215546Sopenharmony_ci   assert(radv_htile_enabled(image, range.baseMipLevel));
2409bf215546Sopenharmony_ci
2410bf215546Sopenharmony_ci   radv_set_ds_clear_metadata(cmd_buffer, iview->image, &range, ds_clear_value, aspects);
2411bf215546Sopenharmony_ci
2412bf215546Sopenharmony_ci   if (radv_image_is_tc_compat_htile(image) && (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)) {
2413bf215546Sopenharmony_ci      radv_update_tc_compat_zrange_metadata(cmd_buffer, iview, ds_clear_value);
2414bf215546Sopenharmony_ci   }
2415bf215546Sopenharmony_ci
2416bf215546Sopenharmony_ci   radv_update_bound_fast_clear_ds(cmd_buffer, iview, ds_clear_value, aspects);
2417bf215546Sopenharmony_ci}
2418bf215546Sopenharmony_ci
2419bf215546Sopenharmony_ci/**
2420bf215546Sopenharmony_ci * Load the clear depth/stencil values from the image's metadata.
2421bf215546Sopenharmony_ci */
2422bf215546Sopenharmony_cistatic void
2423bf215546Sopenharmony_ciradv_load_ds_clear_metadata(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview)
2424bf215546Sopenharmony_ci{
2425bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2426bf215546Sopenharmony_ci   const struct radv_image *image = iview->image;
2427bf215546Sopenharmony_ci   VkImageAspectFlags aspects = vk_format_aspects(image->vk.format);
2428bf215546Sopenharmony_ci   uint64_t va = radv_get_ds_clear_value_va(image, iview->vk.base_mip_level);
2429bf215546Sopenharmony_ci   unsigned reg_offset = 0, reg_count = 0;
2430bf215546Sopenharmony_ci
2431bf215546Sopenharmony_ci   assert(radv_image_has_htile(image));
2432bf215546Sopenharmony_ci
2433bf215546Sopenharmony_ci   if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {
2434bf215546Sopenharmony_ci      ++reg_count;
2435bf215546Sopenharmony_ci   } else {
2436bf215546Sopenharmony_ci      ++reg_offset;
2437bf215546Sopenharmony_ci      va += 4;
2438bf215546Sopenharmony_ci   }
2439bf215546Sopenharmony_ci   if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)
2440bf215546Sopenharmony_ci      ++reg_count;
2441bf215546Sopenharmony_ci
2442bf215546Sopenharmony_ci   uint32_t reg = R_028028_DB_STENCIL_CLEAR + 4 * reg_offset;
2443bf215546Sopenharmony_ci
2444bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
2445bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
2446bf215546Sopenharmony_ci      radeon_emit(cs, va);
2447bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
2448bf215546Sopenharmony_ci      radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
2449bf215546Sopenharmony_ci      radeon_emit(cs, reg_count);
2450bf215546Sopenharmony_ci   } else {
2451bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
2452bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
2453bf215546Sopenharmony_ci                         (reg_count == 2 ? COPY_DATA_COUNT_SEL : 0));
2454bf215546Sopenharmony_ci      radeon_emit(cs, va);
2455bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
2456bf215546Sopenharmony_ci      radeon_emit(cs, reg >> 2);
2457bf215546Sopenharmony_ci      radeon_emit(cs, 0);
2458bf215546Sopenharmony_ci
2459bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
2460bf215546Sopenharmony_ci      radeon_emit(cs, 0);
2461bf215546Sopenharmony_ci   }
2462bf215546Sopenharmony_ci}
2463bf215546Sopenharmony_ci
2464bf215546Sopenharmony_ci/*
2465bf215546Sopenharmony_ci * With DCC some colors don't require CMASK elimination before being
2466bf215546Sopenharmony_ci * used as a texture. This sets a predicate value to determine if the
2467bf215546Sopenharmony_ci * cmask eliminate is required.
2468bf215546Sopenharmony_ci */
2469bf215546Sopenharmony_civoid
2470bf215546Sopenharmony_ciradv_update_fce_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2471bf215546Sopenharmony_ci                         const VkImageSubresourceRange *range, bool value)
2472bf215546Sopenharmony_ci{
2473bf215546Sopenharmony_ci   if (!image->fce_pred_offset)
2474bf215546Sopenharmony_ci      return;
2475bf215546Sopenharmony_ci
2476bf215546Sopenharmony_ci   uint64_t pred_val = value;
2477bf215546Sopenharmony_ci   uint64_t va = radv_image_get_fce_pred_va(image, range->baseMipLevel);
2478bf215546Sopenharmony_ci   uint32_t level_count = radv_get_levelCount(image, range);
2479bf215546Sopenharmony_ci   uint32_t count = 2 * level_count;
2480bf215546Sopenharmony_ci
2481bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
2482bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs,
2483bf215546Sopenharmony_ci               S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
2484bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, va);
2485bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, va >> 32);
2486bf215546Sopenharmony_ci
2487bf215546Sopenharmony_ci   for (uint32_t l = 0; l < level_count; l++) {
2488bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, pred_val);
2489bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, pred_val >> 32);
2490bf215546Sopenharmony_ci   }
2491bf215546Sopenharmony_ci}
2492bf215546Sopenharmony_ci
2493bf215546Sopenharmony_ci/**
2494bf215546Sopenharmony_ci * Update the DCC predicate to reflect the compression state.
2495bf215546Sopenharmony_ci */
2496bf215546Sopenharmony_civoid
2497bf215546Sopenharmony_ciradv_update_dcc_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2498bf215546Sopenharmony_ci                         const VkImageSubresourceRange *range, bool value)
2499bf215546Sopenharmony_ci{
2500bf215546Sopenharmony_ci   if (image->dcc_pred_offset == 0)
2501bf215546Sopenharmony_ci      return;
2502bf215546Sopenharmony_ci
2503bf215546Sopenharmony_ci   uint64_t pred_val = value;
2504bf215546Sopenharmony_ci   uint64_t va = radv_image_get_dcc_pred_va(image, range->baseMipLevel);
2505bf215546Sopenharmony_ci   uint32_t level_count = radv_get_levelCount(image, range);
2506bf215546Sopenharmony_ci   uint32_t count = 2 * level_count;
2507bf215546Sopenharmony_ci
2508bf215546Sopenharmony_ci   assert(radv_dcc_enabled(image, range->baseMipLevel));
2509bf215546Sopenharmony_ci
2510bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + count, 0));
2511bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs,
2512bf215546Sopenharmony_ci               S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
2513bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, va);
2514bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, va >> 32);
2515bf215546Sopenharmony_ci
2516bf215546Sopenharmony_ci   for (uint32_t l = 0; l < level_count; l++) {
2517bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, pred_val);
2518bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, pred_val >> 32);
2519bf215546Sopenharmony_ci   }
2520bf215546Sopenharmony_ci}
2521bf215546Sopenharmony_ci
2522bf215546Sopenharmony_ci/**
2523bf215546Sopenharmony_ci * Update the fast clear color values if the image is bound as a color buffer.
2524bf215546Sopenharmony_ci */
2525bf215546Sopenharmony_cistatic void
2526bf215546Sopenharmony_ciradv_update_bound_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2527bf215546Sopenharmony_ci                                   int cb_idx, uint32_t color_values[2])
2528bf215546Sopenharmony_ci{
2529bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
2530bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2531bf215546Sopenharmony_ci   uint32_t att_idx;
2532bf215546Sopenharmony_ci
2533bf215546Sopenharmony_ci   if (!cmd_buffer->state.attachments || !subpass)
2534bf215546Sopenharmony_ci      return;
2535bf215546Sopenharmony_ci
2536bf215546Sopenharmony_ci   att_idx = subpass->color_attachments[cb_idx].attachment;
2537bf215546Sopenharmony_ci   if (att_idx == VK_ATTACHMENT_UNUSED)
2538bf215546Sopenharmony_ci      return;
2539bf215546Sopenharmony_ci
2540bf215546Sopenharmony_ci   if (cmd_buffer->state.attachments[att_idx].iview->image != image)
2541bf215546Sopenharmony_ci      return;
2542bf215546Sopenharmony_ci
2543bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cs, R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c, 2);
2544bf215546Sopenharmony_ci   radeon_emit(cs, color_values[0]);
2545bf215546Sopenharmony_ci   radeon_emit(cs, color_values[1]);
2546bf215546Sopenharmony_ci
2547bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
2548bf215546Sopenharmony_ci}
2549bf215546Sopenharmony_ci
2550bf215546Sopenharmony_ci/**
2551bf215546Sopenharmony_ci * Set the clear color values to the image's metadata.
2552bf215546Sopenharmony_ci */
2553bf215546Sopenharmony_cistatic void
2554bf215546Sopenharmony_ciradv_set_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
2555bf215546Sopenharmony_ci                              const VkImageSubresourceRange *range, uint32_t color_values[2])
2556bf215546Sopenharmony_ci{
2557bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2558bf215546Sopenharmony_ci   uint32_t level_count = radv_get_levelCount(image, range);
2559bf215546Sopenharmony_ci   uint32_t count = 2 * level_count;
2560bf215546Sopenharmony_ci
2561bf215546Sopenharmony_ci   assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel));
2562bf215546Sopenharmony_ci
2563bf215546Sopenharmony_ci   if (radv_image_has_clear_value(image)) {
2564bf215546Sopenharmony_ci      uint64_t va = radv_image_get_fast_clear_va(image, range->baseMipLevel);
2565bf215546Sopenharmony_ci
2566bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 2 + count, cmd_buffer->state.predicating));
2567bf215546Sopenharmony_ci      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
2568bf215546Sopenharmony_ci      radeon_emit(cs, va);
2569bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
2570bf215546Sopenharmony_ci
2571bf215546Sopenharmony_ci      for (uint32_t l = 0; l < level_count; l++) {
2572bf215546Sopenharmony_ci         radeon_emit(cs, color_values[0]);
2573bf215546Sopenharmony_ci         radeon_emit(cs, color_values[1]);
2574bf215546Sopenharmony_ci      }
2575bf215546Sopenharmony_ci   } else {
2576bf215546Sopenharmony_ci      /* Some default value we can set in the update. */
2577bf215546Sopenharmony_ci      assert(color_values[0] == 0 && color_values[1] == 0);
2578bf215546Sopenharmony_ci   }
2579bf215546Sopenharmony_ci}
2580bf215546Sopenharmony_ci
2581bf215546Sopenharmony_ci/**
2582bf215546Sopenharmony_ci * Update the clear color values for this image.
2583bf215546Sopenharmony_ci */
2584bf215546Sopenharmony_civoid
2585bf215546Sopenharmony_ciradv_update_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer,
2586bf215546Sopenharmony_ci                                 const struct radv_image_view *iview, int cb_idx,
2587bf215546Sopenharmony_ci                                 uint32_t color_values[2])
2588bf215546Sopenharmony_ci{
2589bf215546Sopenharmony_ci   struct radv_image *image = iview->image;
2590bf215546Sopenharmony_ci   VkImageSubresourceRange range = {
2591bf215546Sopenharmony_ci      .aspectMask = iview->vk.aspects,
2592bf215546Sopenharmony_ci      .baseMipLevel = iview->vk.base_mip_level,
2593bf215546Sopenharmony_ci      .levelCount = iview->vk.level_count,
2594bf215546Sopenharmony_ci      .baseArrayLayer = iview->vk.base_array_layer,
2595bf215546Sopenharmony_ci      .layerCount = iview->vk.layer_count,
2596bf215546Sopenharmony_ci   };
2597bf215546Sopenharmony_ci
2598bf215546Sopenharmony_ci   assert(radv_image_has_cmask(image) || radv_dcc_enabled(image, iview->vk.base_mip_level));
2599bf215546Sopenharmony_ci
2600bf215546Sopenharmony_ci   /* Do not need to update the clear value for images that are fast cleared with the comp-to-single
2601bf215546Sopenharmony_ci    * mode because the hardware gets the value from the image directly.
2602bf215546Sopenharmony_ci    */
2603bf215546Sopenharmony_ci   if (iview->image->support_comp_to_single)
2604bf215546Sopenharmony_ci      return;
2605bf215546Sopenharmony_ci
2606bf215546Sopenharmony_ci   radv_set_color_clear_metadata(cmd_buffer, image, &range, color_values);
2607bf215546Sopenharmony_ci
2608bf215546Sopenharmony_ci   radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values);
2609bf215546Sopenharmony_ci}
2610bf215546Sopenharmony_ci
2611bf215546Sopenharmony_ci/**
2612bf215546Sopenharmony_ci * Load the clear color values from the image's metadata.
2613bf215546Sopenharmony_ci */
2614bf215546Sopenharmony_cistatic void
2615bf215546Sopenharmony_ciradv_load_color_clear_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *iview,
2616bf215546Sopenharmony_ci                               int cb_idx)
2617bf215546Sopenharmony_ci{
2618bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2619bf215546Sopenharmony_ci   struct radv_image *image = iview->image;
2620bf215546Sopenharmony_ci
2621bf215546Sopenharmony_ci   if (!radv_image_has_cmask(image) && !radv_dcc_enabled(image, iview->vk.base_mip_level))
2622bf215546Sopenharmony_ci      return;
2623bf215546Sopenharmony_ci
2624bf215546Sopenharmony_ci   if (iview->image->support_comp_to_single)
2625bf215546Sopenharmony_ci      return;
2626bf215546Sopenharmony_ci
2627bf215546Sopenharmony_ci   if (!radv_image_has_clear_value(image)) {
2628bf215546Sopenharmony_ci      uint32_t color_values[2] = {0, 0};
2629bf215546Sopenharmony_ci      radv_update_bound_fast_clear_color(cmd_buffer, image, cb_idx, color_values);
2630bf215546Sopenharmony_ci      return;
2631bf215546Sopenharmony_ci   }
2632bf215546Sopenharmony_ci
2633bf215546Sopenharmony_ci   uint64_t va = radv_image_get_fast_clear_va(image, iview->vk.base_mip_level);
2634bf215546Sopenharmony_ci   uint32_t reg = R_028C8C_CB_COLOR0_CLEAR_WORD0 + cb_idx * 0x3c;
2635bf215546Sopenharmony_ci
2636bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.has_load_ctx_reg_pkt) {
2637bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, cmd_buffer->state.predicating));
2638bf215546Sopenharmony_ci      radeon_emit(cs, va);
2639bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
2640bf215546Sopenharmony_ci      radeon_emit(cs, (reg - SI_CONTEXT_REG_OFFSET) >> 2);
2641bf215546Sopenharmony_ci      radeon_emit(cs, 2);
2642bf215546Sopenharmony_ci   } else {
2643bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
2644bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
2645bf215546Sopenharmony_ci                         COPY_DATA_COUNT_SEL);
2646bf215546Sopenharmony_ci      radeon_emit(cs, va);
2647bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
2648bf215546Sopenharmony_ci      radeon_emit(cs, reg >> 2);
2649bf215546Sopenharmony_ci      radeon_emit(cs, 0);
2650bf215546Sopenharmony_ci
2651bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
2652bf215546Sopenharmony_ci      radeon_emit(cs, 0);
2653bf215546Sopenharmony_ci   }
2654bf215546Sopenharmony_ci}
2655bf215546Sopenharmony_ci
2656bf215546Sopenharmony_ci/* GFX9+ metadata cache flushing workaround. metadata cache coherency is
2657bf215546Sopenharmony_ci * broken if the CB caches data of multiple mips of the same image at the
2658bf215546Sopenharmony_ci * same time.
2659bf215546Sopenharmony_ci *
2660bf215546Sopenharmony_ci * Insert some flushes to avoid this.
2661bf215546Sopenharmony_ci */
2662bf215546Sopenharmony_cistatic void
2663bf215546Sopenharmony_ciradv_emit_fb_mip_change_flush(struct radv_cmd_buffer *cmd_buffer)
2664bf215546Sopenharmony_ci{
2665bf215546Sopenharmony_ci   struct vk_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
2666bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
2667bf215546Sopenharmony_ci   bool color_mip_changed = false;
2668bf215546Sopenharmony_ci
2669bf215546Sopenharmony_ci   /* Entire workaround is not applicable before GFX9 */
2670bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
2671bf215546Sopenharmony_ci      return;
2672bf215546Sopenharmony_ci
2673bf215546Sopenharmony_ci   if (!framebuffer)
2674bf215546Sopenharmony_ci      return;
2675bf215546Sopenharmony_ci
2676bf215546Sopenharmony_ci   for (int i = 0; i < subpass->color_count; ++i) {
2677bf215546Sopenharmony_ci      int idx = subpass->color_attachments[i].attachment;
2678bf215546Sopenharmony_ci      if (idx == VK_ATTACHMENT_UNUSED)
2679bf215546Sopenharmony_ci         continue;
2680bf215546Sopenharmony_ci
2681bf215546Sopenharmony_ci      struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
2682bf215546Sopenharmony_ci
2683bf215546Sopenharmony_ci      if ((radv_image_has_CB_metadata(iview->image) ||
2684bf215546Sopenharmony_ci           radv_dcc_enabled(iview->image, iview->vk.base_mip_level) ||
2685bf215546Sopenharmony_ci           radv_dcc_enabled(iview->image, cmd_buffer->state.cb_mip[i])) &&
2686bf215546Sopenharmony_ci          cmd_buffer->state.cb_mip[i] != iview->vk.base_mip_level)
2687bf215546Sopenharmony_ci         color_mip_changed = true;
2688bf215546Sopenharmony_ci
2689bf215546Sopenharmony_ci      cmd_buffer->state.cb_mip[i] = iview->vk.base_mip_level;
2690bf215546Sopenharmony_ci   }
2691bf215546Sopenharmony_ci
2692bf215546Sopenharmony_ci   if (color_mip_changed) {
2693bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
2694bf215546Sopenharmony_ci         RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
2695bf215546Sopenharmony_ci   }
2696bf215546Sopenharmony_ci}
2697bf215546Sopenharmony_ci
2698bf215546Sopenharmony_ci/* This function does the flushes for mip changes if the levels are not zero for
2699bf215546Sopenharmony_ci * all render targets. This way we can assume at the start of the next cmd_buffer
2700bf215546Sopenharmony_ci * that rendering to mip 0 doesn't need any flushes. As that is the most common
2701bf215546Sopenharmony_ci * case that saves some flushes. */
2702bf215546Sopenharmony_cistatic void
2703bf215546Sopenharmony_ciradv_emit_mip_change_flush_default(struct radv_cmd_buffer *cmd_buffer)
2704bf215546Sopenharmony_ci{
2705bf215546Sopenharmony_ci   /* Entire workaround is not applicable before GFX9 */
2706bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
2707bf215546Sopenharmony_ci      return;
2708bf215546Sopenharmony_ci
2709bf215546Sopenharmony_ci   bool need_color_mip_flush = false;
2710bf215546Sopenharmony_ci   for (unsigned i = 0; i < 8; ++i) {
2711bf215546Sopenharmony_ci      if (cmd_buffer->state.cb_mip[i]) {
2712bf215546Sopenharmony_ci         need_color_mip_flush = true;
2713bf215546Sopenharmony_ci         break;
2714bf215546Sopenharmony_ci      }
2715bf215546Sopenharmony_ci   }
2716bf215546Sopenharmony_ci
2717bf215546Sopenharmony_ci   if (need_color_mip_flush) {
2718bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
2719bf215546Sopenharmony_ci         RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
2720bf215546Sopenharmony_ci   }
2721bf215546Sopenharmony_ci
2722bf215546Sopenharmony_ci   memset(cmd_buffer->state.cb_mip, 0, sizeof(cmd_buffer->state.cb_mip));
2723bf215546Sopenharmony_ci}
2724bf215546Sopenharmony_ci
2725bf215546Sopenharmony_cistatic struct radv_image *
2726bf215546Sopenharmony_ciradv_cmd_buffer_get_vrs_image(struct radv_cmd_buffer *cmd_buffer)
2727bf215546Sopenharmony_ci{
2728bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
2729bf215546Sopenharmony_ci
2730bf215546Sopenharmony_ci   if (!device->vrs.image) {
2731bf215546Sopenharmony_ci      VkResult result;
2732bf215546Sopenharmony_ci
2733bf215546Sopenharmony_ci      /* The global VRS state is initialized on-demand to avoid wasting VRAM. */
2734bf215546Sopenharmony_ci      result = radv_device_init_vrs_state(device);
2735bf215546Sopenharmony_ci      if (result != VK_SUCCESS) {
2736bf215546Sopenharmony_ci         cmd_buffer->record_result = result;
2737bf215546Sopenharmony_ci         return NULL;
2738bf215546Sopenharmony_ci      }
2739bf215546Sopenharmony_ci   }
2740bf215546Sopenharmony_ci
2741bf215546Sopenharmony_ci   return device->vrs.image;
2742bf215546Sopenharmony_ci}
2743bf215546Sopenharmony_ci
2744bf215546Sopenharmony_cistatic void
2745bf215546Sopenharmony_ciradv_emit_framebuffer_state(struct radv_cmd_buffer *cmd_buffer)
2746bf215546Sopenharmony_ci{
2747bf215546Sopenharmony_ci   int i;
2748bf215546Sopenharmony_ci   struct vk_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
2749bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
2750bf215546Sopenharmony_ci   bool disable_constant_encode_ac01 = false;
2751bf215546Sopenharmony_ci   unsigned color_invalid = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11
2752bf215546Sopenharmony_ci                            ? G_028C70_FORMAT_GFX11(V_028C70_COLOR_INVALID)
2753bf215546Sopenharmony_ci                            : G_028C70_FORMAT_GFX6(V_028C70_COLOR_INVALID);
2754bf215546Sopenharmony_ci
2755bf215546Sopenharmony_ci   for (i = 0; i < subpass->color_count; ++i) {
2756bf215546Sopenharmony_ci      if (subpass->color_attachments[i].attachment == VK_ATTACHMENT_UNUSED) {
2757bf215546Sopenharmony_ci         radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, color_invalid);
2758bf215546Sopenharmony_ci         continue;
2759bf215546Sopenharmony_ci      }
2760bf215546Sopenharmony_ci
2761bf215546Sopenharmony_ci      int idx = subpass->color_attachments[i].attachment;
2762bf215546Sopenharmony_ci      struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
2763bf215546Sopenharmony_ci      VkImageLayout layout = subpass->color_attachments[i].layout;
2764bf215546Sopenharmony_ci      bool in_render_loop = subpass->color_attachments[i].in_render_loop;
2765bf215546Sopenharmony_ci
2766bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, iview->image->bindings[0].bo);
2767bf215546Sopenharmony_ci
2768bf215546Sopenharmony_ci      assert(iview->vk.aspects & (VK_IMAGE_ASPECT_COLOR_BIT | VK_IMAGE_ASPECT_PLANE_0_BIT |
2769bf215546Sopenharmony_ci                                   VK_IMAGE_ASPECT_PLANE_1_BIT | VK_IMAGE_ASPECT_PLANE_2_BIT));
2770bf215546Sopenharmony_ci
2771bf215546Sopenharmony_ci      if (iview->image->disjoint && iview->vk.aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
2772bf215546Sopenharmony_ci         for (uint32_t plane_id = 0; plane_id < iview->image->plane_count; plane_id++) {
2773bf215546Sopenharmony_ci            radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
2774bf215546Sopenharmony_ci                  iview->image->bindings[plane_id].bo);
2775bf215546Sopenharmony_ci         }
2776bf215546Sopenharmony_ci      } else {
2777bf215546Sopenharmony_ci         uint32_t plane_id = iview->image->disjoint ? iview->plane_id : 0;
2778bf215546Sopenharmony_ci         radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
2779bf215546Sopenharmony_ci               iview->image->bindings[plane_id].bo);
2780bf215546Sopenharmony_ci      }
2781bf215546Sopenharmony_ci
2782bf215546Sopenharmony_ci      radv_emit_fb_color_state(cmd_buffer, i, &cmd_buffer->state.attachments[idx].cb, iview, layout,
2783bf215546Sopenharmony_ci                               in_render_loop);
2784bf215546Sopenharmony_ci
2785bf215546Sopenharmony_ci      radv_load_color_clear_metadata(cmd_buffer, iview, i);
2786bf215546Sopenharmony_ci
2787bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9 &&
2788bf215546Sopenharmony_ci          iview->image->dcc_sign_reinterpret) {
2789bf215546Sopenharmony_ci         /* Disable constant encoding with the clear value of "1" with different DCC signedness
2790bf215546Sopenharmony_ci          * because the hardware will fill "1" instead of the clear value.
2791bf215546Sopenharmony_ci          */
2792bf215546Sopenharmony_ci         disable_constant_encode_ac01 = true;
2793bf215546Sopenharmony_ci      }
2794bf215546Sopenharmony_ci   }
2795bf215546Sopenharmony_ci   for (; i < cmd_buffer->state.last_subpass_color_count; i++) {
2796bf215546Sopenharmony_ci      radeon_set_context_reg(cmd_buffer->cs, R_028C70_CB_COLOR0_INFO + i * 0x3C, color_invalid);
2797bf215546Sopenharmony_ci   }
2798bf215546Sopenharmony_ci   cmd_buffer->state.last_subpass_color_count = subpass->color_count;
2799bf215546Sopenharmony_ci
2800bf215546Sopenharmony_ci   if (subpass->depth_stencil_attachment) {
2801bf215546Sopenharmony_ci      int idx = subpass->depth_stencil_attachment->attachment;
2802bf215546Sopenharmony_ci      VkImageLayout layout = subpass->depth_stencil_attachment->layout;
2803bf215546Sopenharmony_ci      bool in_render_loop = subpass->depth_stencil_attachment->in_render_loop;
2804bf215546Sopenharmony_ci      struct radv_image_view *iview = cmd_buffer->state.attachments[idx].iview;
2805bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs,
2806bf215546Sopenharmony_ci                         cmd_buffer->state.attachments[idx].iview->image->bindings[0].bo);
2807bf215546Sopenharmony_ci
2808bf215546Sopenharmony_ci      radv_emit_fb_ds_state(cmd_buffer, &cmd_buffer->state.attachments[idx].ds, iview, layout,
2809bf215546Sopenharmony_ci                            in_render_loop);
2810bf215546Sopenharmony_ci
2811bf215546Sopenharmony_ci      if (radv_layout_is_htile_compressed(
2812bf215546Sopenharmony_ci             cmd_buffer->device, iview->image, layout, in_render_loop,
2813bf215546Sopenharmony_ci             radv_image_queue_family_mask(iview->image, cmd_buffer->qf,
2814bf215546Sopenharmony_ci                                          cmd_buffer->qf))) {
2815bf215546Sopenharmony_ci         /* Only load the depth/stencil fast clear values when
2816bf215546Sopenharmony_ci          * compressed rendering is enabled.
2817bf215546Sopenharmony_ci          */
2818bf215546Sopenharmony_ci         radv_load_ds_clear_metadata(cmd_buffer, iview);
2819bf215546Sopenharmony_ci      }
2820bf215546Sopenharmony_ci   } else if (subpass->vrs_attachment && radv_cmd_buffer_get_vrs_image(cmd_buffer)) {
2821bf215546Sopenharmony_ci      /* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have to
2822bf215546Sopenharmony_ci       * bind our internal depth buffer that contains the VRS data as part of HTILE.
2823bf215546Sopenharmony_ci       */
2824bf215546Sopenharmony_ci      VkImageLayout layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
2825bf215546Sopenharmony_ci      struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer;
2826bf215546Sopenharmony_ci      struct radv_image *image = cmd_buffer->device->vrs.image;
2827bf215546Sopenharmony_ci      struct radv_ds_buffer_info ds;
2828bf215546Sopenharmony_ci      struct radv_image_view iview;
2829bf215546Sopenharmony_ci
2830bf215546Sopenharmony_ci      radv_image_view_init(&iview, cmd_buffer->device,
2831bf215546Sopenharmony_ci                           &(VkImageViewCreateInfo){
2832bf215546Sopenharmony_ci                              .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2833bf215546Sopenharmony_ci                              .image = radv_image_to_handle(image),
2834bf215546Sopenharmony_ci                              .viewType = radv_meta_get_view_type(image),
2835bf215546Sopenharmony_ci                              .format = image->vk.format,
2836bf215546Sopenharmony_ci                              .subresourceRange =
2837bf215546Sopenharmony_ci                                 {
2838bf215546Sopenharmony_ci                                    .aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,
2839bf215546Sopenharmony_ci                                    .baseMipLevel = 0,
2840bf215546Sopenharmony_ci                                    .levelCount = 1,
2841bf215546Sopenharmony_ci                                    .baseArrayLayer = 0,
2842bf215546Sopenharmony_ci                                    .layerCount = 1,
2843bf215546Sopenharmony_ci                                 },
2844bf215546Sopenharmony_ci                           },
2845bf215546Sopenharmony_ci                           0, NULL);
2846bf215546Sopenharmony_ci
2847bf215546Sopenharmony_ci      radv_initialise_vrs_surface(image, htile_buffer, &ds);
2848bf215546Sopenharmony_ci
2849bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, htile_buffer->bo);
2850bf215546Sopenharmony_ci
2851bf215546Sopenharmony_ci      radv_emit_fb_ds_state(cmd_buffer, &ds, &iview, layout, false);
2852bf215546Sopenharmony_ci
2853bf215546Sopenharmony_ci      radv_image_view_finish(&iview);
2854bf215546Sopenharmony_ci   } else {
2855bf215546Sopenharmony_ci      unsigned num_samples = 0;
2856bf215546Sopenharmony_ci
2857bf215546Sopenharmony_ci      /* On GFX11, DB_Z_INFO.NUM_SAMPLES should always match the framebuffer samples. It affects
2858bf215546Sopenharmony_ci       * VRS and occlusion queries if depth and stencil are not bound.
2859bf215546Sopenharmony_ci       */
2860bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX11)
2861bf215546Sopenharmony_ci         num_samples = util_logbase2(subpass->max_sample_count);
2862bf215546Sopenharmony_ci
2863bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9)
2864bf215546Sopenharmony_ci         radeon_set_context_reg_seq(cmd_buffer->cs, R_028038_DB_Z_INFO, 2);
2865bf215546Sopenharmony_ci      else
2866bf215546Sopenharmony_ci         radeon_set_context_reg_seq(cmd_buffer->cs, R_028040_DB_Z_INFO, 2);
2867bf215546Sopenharmony_ci
2868bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028040_FORMAT(V_028040_Z_INVALID) |       /* DB_Z_INFO */
2869bf215546Sopenharmony_ci                                  S_028040_NUM_SAMPLES(num_samples));
2870bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, S_028044_FORMAT(V_028044_STENCIL_INVALID)); /* DB_STENCIL_INFO */
2871bf215546Sopenharmony_ci   }
2872bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028208_PA_SC_WINDOW_SCISSOR_BR,
2873bf215546Sopenharmony_ci                          S_028208_BR_X(framebuffer->width) | S_028208_BR_Y(framebuffer->height));
2874bf215546Sopenharmony_ci
2875bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX8) {
2876bf215546Sopenharmony_ci      bool disable_constant_encode =
2877bf215546Sopenharmony_ci         cmd_buffer->device->physical_device->rad_info.has_dcc_constant_encode;
2878bf215546Sopenharmony_ci      enum amd_gfx_level gfx_level = cmd_buffer->device->physical_device->rad_info.gfx_level;
2879bf215546Sopenharmony_ci      uint8_t watermark = gfx_level >= GFX10 ? 6 : 4;
2880bf215546Sopenharmony_ci
2881bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
2882bf215546Sopenharmony_ci         radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_FDCC_CONTROL,
2883bf215546Sopenharmony_ci                                S_028424_SAMPLE_MASK_TRACKER_WATERMARK(watermark));
2884bf215546Sopenharmony_ci      } else {
2885bf215546Sopenharmony_ci         radeon_set_context_reg(cmd_buffer->cs, R_028424_CB_DCC_CONTROL,
2886bf215546Sopenharmony_ci                                S_028424_OVERWRITE_COMBINER_MRT_SHARING_DISABLE(gfx_level <= GFX9) |
2887bf215546Sopenharmony_ci                                S_028424_OVERWRITE_COMBINER_WATERMARK(watermark) |
2888bf215546Sopenharmony_ci                                S_028424_DISABLE_CONSTANT_ENCODE_AC01(disable_constant_encode_ac01) |
2889bf215546Sopenharmony_ci                                S_028424_DISABLE_CONSTANT_ENCODE_REG(disable_constant_encode));
2890bf215546Sopenharmony_ci      }
2891bf215546Sopenharmony_ci   }
2892bf215546Sopenharmony_ci
2893bf215546Sopenharmony_ci   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_FRAMEBUFFER;
2894bf215546Sopenharmony_ci}
2895bf215546Sopenharmony_ci
2896bf215546Sopenharmony_cistatic void
2897bf215546Sopenharmony_ciradv_emit_index_buffer(struct radv_cmd_buffer *cmd_buffer, bool indirect)
2898bf215546Sopenharmony_ci{
2899bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
2900bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
2901bf215546Sopenharmony_ci
2902bf215546Sopenharmony_ci   /* With indirect generated commands the index buffer bind may be part of the
2903bf215546Sopenharmony_ci    * indirect command buffer, in which case the app may not have bound any yet. */
2904bf215546Sopenharmony_ci   if (state->index_type < 0)
2905bf215546Sopenharmony_ci      return;
2906bf215546Sopenharmony_ci
2907bf215546Sopenharmony_ci   /* For the direct indexed draws we use DRAW_INDEX_2, which includes
2908bf215546Sopenharmony_ci    * the index_va and max_index_count already. */
2909bf215546Sopenharmony_ci   if (!indirect)
2910bf215546Sopenharmony_ci      return;
2911bf215546Sopenharmony_ci
2912bf215546Sopenharmony_ci   if (state->max_index_count ||
2913bf215546Sopenharmony_ci       !cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug) {
2914bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_INDEX_BASE, 1, 0));
2915bf215546Sopenharmony_ci      radeon_emit(cs, state->index_va);
2916bf215546Sopenharmony_ci      radeon_emit(cs, state->index_va >> 32);
2917bf215546Sopenharmony_ci
2918bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_INDEX_BUFFER_SIZE, 0, 0));
2919bf215546Sopenharmony_ci      radeon_emit(cs, state->max_index_count);
2920bf215546Sopenharmony_ci   }
2921bf215546Sopenharmony_ci
2922bf215546Sopenharmony_ci   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_INDEX_BUFFER;
2923bf215546Sopenharmony_ci}
2924bf215546Sopenharmony_ci
2925bf215546Sopenharmony_civoid
2926bf215546Sopenharmony_ciradv_set_db_count_control(struct radv_cmd_buffer *cmd_buffer, bool enable_occlusion_queries)
2927bf215546Sopenharmony_ci{
2928bf215546Sopenharmony_ci   bool has_perfect_queries = cmd_buffer->state.perfect_occlusion_queries_enabled;
2929bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
2930bf215546Sopenharmony_ci   uint32_t pa_sc_mode_cntl_1 = pipeline ? pipeline->ms.pa_sc_mode_cntl_1 : 0;
2931bf215546Sopenharmony_ci   uint32_t db_count_control;
2932bf215546Sopenharmony_ci
2933bf215546Sopenharmony_ci   if (!enable_occlusion_queries) {
2934bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
2935bf215546Sopenharmony_ci         if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
2936bf215546Sopenharmony_ci             pipeline->disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
2937bf215546Sopenharmony_ci            /* Re-enable out-of-order rasterization if the
2938bf215546Sopenharmony_ci             * bound pipeline supports it and if it's has
2939bf215546Sopenharmony_ci             * been disabled before starting any perfect
2940bf215546Sopenharmony_ci             * occlusion queries.
2941bf215546Sopenharmony_ci             */
2942bf215546Sopenharmony_ci            radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1);
2943bf215546Sopenharmony_ci         }
2944bf215546Sopenharmony_ci      }
2945bf215546Sopenharmony_ci      db_count_control = S_028004_ZPASS_INCREMENT_DISABLE(1);
2946bf215546Sopenharmony_ci   } else {
2947bf215546Sopenharmony_ci      const struct radv_subpass *subpass = cmd_buffer->state.subpass;
2948bf215546Sopenharmony_ci      uint32_t sample_rate = subpass ? util_logbase2(subpass->max_sample_count) : 0;
2949bf215546Sopenharmony_ci      bool gfx10_perfect =
2950bf215546Sopenharmony_ci         cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10 && has_perfect_queries;
2951bf215546Sopenharmony_ci
2952bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
2953bf215546Sopenharmony_ci         /* Always enable PERFECT_ZPASS_COUNTS due to issues with partially
2954bf215546Sopenharmony_ci          * covered tiles, discards, and early depth testing. For more details,
2955bf215546Sopenharmony_ci          * see https://gitlab.freedesktop.org/mesa/mesa/-/issues/3218 */
2956bf215546Sopenharmony_ci         db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) |
2957bf215546Sopenharmony_ci                            S_028004_DISABLE_CONSERVATIVE_ZPASS_COUNTS(gfx10_perfect) |
2958bf215546Sopenharmony_ci                            S_028004_SAMPLE_RATE(sample_rate) | S_028004_ZPASS_ENABLE(1) |
2959bf215546Sopenharmony_ci                            S_028004_SLICE_EVEN_ENABLE(1) | S_028004_SLICE_ODD_ENABLE(1);
2960bf215546Sopenharmony_ci
2961bf215546Sopenharmony_ci         if (G_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE(pa_sc_mode_cntl_1) &&
2962bf215546Sopenharmony_ci             pipeline->disable_out_of_order_rast_for_occlusion && has_perfect_queries) {
2963bf215546Sopenharmony_ci            /* If the bound pipeline has enabled
2964bf215546Sopenharmony_ci             * out-of-order rasterization, we should
2965bf215546Sopenharmony_ci             * disable it before starting any perfect
2966bf215546Sopenharmony_ci             * occlusion queries.
2967bf215546Sopenharmony_ci             */
2968bf215546Sopenharmony_ci            pa_sc_mode_cntl_1 &= C_028A4C_OUT_OF_ORDER_PRIMITIVE_ENABLE;
2969bf215546Sopenharmony_ci
2970bf215546Sopenharmony_ci            radeon_set_context_reg(cmd_buffer->cs, R_028A4C_PA_SC_MODE_CNTL_1, pa_sc_mode_cntl_1);
2971bf215546Sopenharmony_ci         }
2972bf215546Sopenharmony_ci      } else {
2973bf215546Sopenharmony_ci         db_count_control = S_028004_PERFECT_ZPASS_COUNTS(1) | S_028004_SAMPLE_RATE(sample_rate);
2974bf215546Sopenharmony_ci      }
2975bf215546Sopenharmony_ci   }
2976bf215546Sopenharmony_ci
2977bf215546Sopenharmony_ci   radeon_set_context_reg(cmd_buffer->cs, R_028004_DB_COUNT_CONTROL, db_count_control);
2978bf215546Sopenharmony_ci
2979bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
2980bf215546Sopenharmony_ci}
2981bf215546Sopenharmony_ci
2982bf215546Sopenharmony_ciunsigned
2983bf215546Sopenharmony_ciradv_instance_rate_prolog_index(unsigned num_attributes, uint32_t instance_rate_inputs)
2984bf215546Sopenharmony_ci{
2985bf215546Sopenharmony_ci   /* instance_rate_vs_prologs is a flattened array of array of arrays of different sizes, or a
2986bf215546Sopenharmony_ci    * single array sorted in ascending order using:
2987bf215546Sopenharmony_ci    * - total number of attributes
2988bf215546Sopenharmony_ci    * - number of instanced attributes
2989bf215546Sopenharmony_ci    * - index of first instanced attribute
2990bf215546Sopenharmony_ci    */
2991bf215546Sopenharmony_ci
2992bf215546Sopenharmony_ci   /* From total number of attributes to offset. */
2993bf215546Sopenharmony_ci   static const uint16_t total_to_offset[16] = {0,   1,   4,   10,  20,  35,  56,  84,
2994bf215546Sopenharmony_ci                                                120, 165, 220, 286, 364, 455, 560, 680};
2995bf215546Sopenharmony_ci   unsigned start_index = total_to_offset[num_attributes - 1];
2996bf215546Sopenharmony_ci
2997bf215546Sopenharmony_ci   /* From number of instanced attributes to offset. This would require a different LUT depending on
2998bf215546Sopenharmony_ci    * the total number of attributes, but we can exploit a pattern to use just the LUT for 16 total
2999bf215546Sopenharmony_ci    * attributes.
3000bf215546Sopenharmony_ci    */
3001bf215546Sopenharmony_ci   static const uint8_t count_to_offset_total16[16] = {0,   16,  31,  45,  58,  70,  81,  91,
3002bf215546Sopenharmony_ci                                                       100, 108, 115, 121, 126, 130, 133, 135};
3003bf215546Sopenharmony_ci   unsigned count = util_bitcount(instance_rate_inputs);
3004bf215546Sopenharmony_ci   unsigned offset_from_start_index =
3005bf215546Sopenharmony_ci      count_to_offset_total16[count - 1] - ((16 - num_attributes) * (count - 1));
3006bf215546Sopenharmony_ci
3007bf215546Sopenharmony_ci   unsigned first = ffs(instance_rate_inputs) - 1;
3008bf215546Sopenharmony_ci   return start_index + offset_from_start_index + first;
3009bf215546Sopenharmony_ci}
3010bf215546Sopenharmony_ci
3011bf215546Sopenharmony_ciunion vs_prolog_key_header {
3012bf215546Sopenharmony_ci   struct {
3013bf215546Sopenharmony_ci      uint32_t key_size : 8;
3014bf215546Sopenharmony_ci      uint32_t num_attributes : 6;
3015bf215546Sopenharmony_ci      uint32_t as_ls : 1;
3016bf215546Sopenharmony_ci      uint32_t is_ngg : 1;
3017bf215546Sopenharmony_ci      uint32_t wave32 : 1;
3018bf215546Sopenharmony_ci      uint32_t next_stage : 3;
3019bf215546Sopenharmony_ci      uint32_t instance_rate_inputs : 1;
3020bf215546Sopenharmony_ci      uint32_t alpha_adjust_lo : 1;
3021bf215546Sopenharmony_ci      uint32_t alpha_adjust_hi : 1;
3022bf215546Sopenharmony_ci      uint32_t misaligned_mask : 1;
3023bf215546Sopenharmony_ci      uint32_t post_shuffle : 1;
3024bf215546Sopenharmony_ci      uint32_t nontrivial_divisors : 1;
3025bf215546Sopenharmony_ci      uint32_t zero_divisors : 1;
3026bf215546Sopenharmony_ci      /* We need this to ensure the padding is zero. It's useful even if it's unused. */
3027bf215546Sopenharmony_ci      uint32_t padding0 : 5;
3028bf215546Sopenharmony_ci   };
3029bf215546Sopenharmony_ci   uint32_t v;
3030bf215546Sopenharmony_ci};
3031bf215546Sopenharmony_ci
3032bf215546Sopenharmony_ciuint32_t
3033bf215546Sopenharmony_ciradv_hash_vs_prolog(const void *key_)
3034bf215546Sopenharmony_ci{
3035bf215546Sopenharmony_ci   const uint32_t *key = key_;
3036bf215546Sopenharmony_ci   union vs_prolog_key_header header;
3037bf215546Sopenharmony_ci   header.v = key[0];
3038bf215546Sopenharmony_ci   return _mesa_hash_data(key, header.key_size);
3039bf215546Sopenharmony_ci}
3040bf215546Sopenharmony_ci
3041bf215546Sopenharmony_cibool
3042bf215546Sopenharmony_ciradv_cmp_vs_prolog(const void *a_, const void *b_)
3043bf215546Sopenharmony_ci{
3044bf215546Sopenharmony_ci   const uint32_t *a = a_;
3045bf215546Sopenharmony_ci   const uint32_t *b = b_;
3046bf215546Sopenharmony_ci   if (a[0] != b[0])
3047bf215546Sopenharmony_ci      return false;
3048bf215546Sopenharmony_ci
3049bf215546Sopenharmony_ci   union vs_prolog_key_header header;
3050bf215546Sopenharmony_ci   header.v = a[0];
3051bf215546Sopenharmony_ci   return memcmp(a, b, header.key_size) == 0;
3052bf215546Sopenharmony_ci}
3053bf215546Sopenharmony_ci
3054bf215546Sopenharmony_cistatic struct radv_shader_part *
3055bf215546Sopenharmony_cilookup_vs_prolog(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shader,
3056bf215546Sopenharmony_ci                 uint32_t *nontrivial_divisors)
3057bf215546Sopenharmony_ci{
3058bf215546Sopenharmony_ci   STATIC_ASSERT(sizeof(union vs_prolog_key_header) == 4);
3059bf215546Sopenharmony_ci   assert(vs_shader->info.vs.dynamic_inputs);
3060bf215546Sopenharmony_ci
3061bf215546Sopenharmony_ci   const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input;
3062bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
3063bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
3064bf215546Sopenharmony_ci
3065bf215546Sopenharmony_ci   unsigned num_attributes = pipeline->last_vertex_attrib_bit;
3066bf215546Sopenharmony_ci   uint32_t attribute_mask = BITFIELD_MASK(num_attributes);
3067bf215546Sopenharmony_ci
3068bf215546Sopenharmony_ci   uint32_t instance_rate_inputs = state->instance_rate_inputs & attribute_mask;
3069bf215546Sopenharmony_ci   uint32_t zero_divisors = state->zero_divisors & attribute_mask;
3070bf215546Sopenharmony_ci   *nontrivial_divisors = state->nontrivial_divisors & attribute_mask;
3071bf215546Sopenharmony_ci   uint32_t misaligned_mask = cmd_buffer->state.vbo_misaligned_mask;
3072bf215546Sopenharmony_ci   if (cmd_buffer->state.vbo_misaligned_mask_invalid) {
3073bf215546Sopenharmony_ci      assert(device->physical_device->rad_info.gfx_level == GFX6 ||
3074bf215546Sopenharmony_ci             device->physical_device->rad_info.gfx_level >= GFX10);
3075bf215546Sopenharmony_ci
3076bf215546Sopenharmony_ci      u_foreach_bit (index, cmd_buffer->state.vbo_misaligned_mask_invalid & attribute_mask) {
3077bf215546Sopenharmony_ci         uint8_t binding = state->bindings[index];
3078bf215546Sopenharmony_ci         if (!(cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(binding)))
3079bf215546Sopenharmony_ci            continue;
3080bf215546Sopenharmony_ci         uint8_t req = state->format_align_req_minus_1[index];
3081bf215546Sopenharmony_ci         struct radv_vertex_binding *vb = &cmd_buffer->vertex_bindings[binding];
3082bf215546Sopenharmony_ci         VkDeviceSize offset = vb->offset + state->offsets[index];
3083bf215546Sopenharmony_ci         if ((offset & req) || (vb->stride & req))
3084bf215546Sopenharmony_ci            misaligned_mask |= BITFIELD_BIT(index);
3085bf215546Sopenharmony_ci      }
3086bf215546Sopenharmony_ci      cmd_buffer->state.vbo_misaligned_mask = misaligned_mask;
3087bf215546Sopenharmony_ci      cmd_buffer->state.vbo_misaligned_mask_invalid &= ~attribute_mask;
3088bf215546Sopenharmony_ci   }
3089bf215546Sopenharmony_ci
3090bf215546Sopenharmony_ci   /* try to use a pre-compiled prolog first */
3091bf215546Sopenharmony_ci   struct radv_shader_part *prolog = NULL;
3092bf215546Sopenharmony_ci   if (pipeline->can_use_simple_input &&
3093bf215546Sopenharmony_ci       (!vs_shader->info.vs.as_ls || !instance_rate_inputs) &&
3094bf215546Sopenharmony_ci       !misaligned_mask && !state->alpha_adjust_lo && !state->alpha_adjust_hi) {
3095bf215546Sopenharmony_ci      if (!instance_rate_inputs) {
3096bf215546Sopenharmony_ci         prolog = device->simple_vs_prologs[num_attributes - 1];
3097bf215546Sopenharmony_ci      } else if (num_attributes <= 16 && !*nontrivial_divisors && !zero_divisors &&
3098bf215546Sopenharmony_ci                 util_bitcount(instance_rate_inputs) ==
3099bf215546Sopenharmony_ci                    (util_last_bit(instance_rate_inputs) - ffs(instance_rate_inputs) + 1)) {
3100bf215546Sopenharmony_ci         unsigned index = radv_instance_rate_prolog_index(num_attributes, instance_rate_inputs);
3101bf215546Sopenharmony_ci         prolog = device->instance_rate_vs_prologs[index];
3102bf215546Sopenharmony_ci      }
3103bf215546Sopenharmony_ci   }
3104bf215546Sopenharmony_ci   if (prolog)
3105bf215546Sopenharmony_ci      return prolog;
3106bf215546Sopenharmony_ci
3107bf215546Sopenharmony_ci   /* if we couldn't use a pre-compiled prolog, find one in the cache or create one */
3108bf215546Sopenharmony_ci   uint32_t key_words[17];
3109bf215546Sopenharmony_ci   unsigned key_size = 1;
3110bf215546Sopenharmony_ci
3111bf215546Sopenharmony_ci   struct radv_vs_prolog_key key;
3112bf215546Sopenharmony_ci   key.state = state;
3113bf215546Sopenharmony_ci   key.num_attributes = num_attributes;
3114bf215546Sopenharmony_ci   key.misaligned_mask = misaligned_mask;
3115bf215546Sopenharmony_ci   /* The instance ID input VGPR is placed differently when as_ls=true. */
3116bf215546Sopenharmony_ci   key.as_ls = vs_shader->info.vs.as_ls && instance_rate_inputs;
3117bf215546Sopenharmony_ci   key.is_ngg = vs_shader->info.is_ngg;
3118bf215546Sopenharmony_ci   key.wave32 = vs_shader->info.wave_size == 32;
3119bf215546Sopenharmony_ci   key.next_stage = pipeline->next_vertex_stage;
3120bf215546Sopenharmony_ci
3121bf215546Sopenharmony_ci   union vs_prolog_key_header header;
3122bf215546Sopenharmony_ci   header.v = 0;
3123bf215546Sopenharmony_ci   header.num_attributes = num_attributes;
3124bf215546Sopenharmony_ci   header.as_ls = key.as_ls;
3125bf215546Sopenharmony_ci   header.is_ngg = key.is_ngg;
3126bf215546Sopenharmony_ci   header.wave32 = key.wave32;
3127bf215546Sopenharmony_ci   header.next_stage = key.next_stage;
3128bf215546Sopenharmony_ci
3129bf215546Sopenharmony_ci   if (instance_rate_inputs & ~*nontrivial_divisors) {
3130bf215546Sopenharmony_ci      header.instance_rate_inputs = true;
3131bf215546Sopenharmony_ci      key_words[key_size++] = instance_rate_inputs;
3132bf215546Sopenharmony_ci   }
3133bf215546Sopenharmony_ci   if (*nontrivial_divisors) {
3134bf215546Sopenharmony_ci      header.nontrivial_divisors = true;
3135bf215546Sopenharmony_ci      key_words[key_size++] = *nontrivial_divisors;
3136bf215546Sopenharmony_ci   }
3137bf215546Sopenharmony_ci   if (zero_divisors) {
3138bf215546Sopenharmony_ci      header.zero_divisors = true;
3139bf215546Sopenharmony_ci      key_words[key_size++] = zero_divisors;
3140bf215546Sopenharmony_ci   }
3141bf215546Sopenharmony_ci   if (misaligned_mask) {
3142bf215546Sopenharmony_ci      header.misaligned_mask = true;
3143bf215546Sopenharmony_ci      key_words[key_size++] = misaligned_mask;
3144bf215546Sopenharmony_ci
3145bf215546Sopenharmony_ci      uint8_t *formats = (uint8_t *)&key_words[key_size];
3146bf215546Sopenharmony_ci      unsigned num_formats = 0;
3147bf215546Sopenharmony_ci      u_foreach_bit(index, misaligned_mask) formats[num_formats++] = state->formats[index];
3148bf215546Sopenharmony_ci      while (num_formats & 0x3)
3149bf215546Sopenharmony_ci         formats[num_formats++] = 0;
3150bf215546Sopenharmony_ci      key_size += num_formats / 4u;
3151bf215546Sopenharmony_ci
3152bf215546Sopenharmony_ci      if (state->post_shuffle & attribute_mask) {
3153bf215546Sopenharmony_ci         header.post_shuffle = true;
3154bf215546Sopenharmony_ci         key_words[key_size++] = state->post_shuffle & attribute_mask;
3155bf215546Sopenharmony_ci      }
3156bf215546Sopenharmony_ci   }
3157bf215546Sopenharmony_ci   if (state->alpha_adjust_lo & attribute_mask) {
3158bf215546Sopenharmony_ci      header.alpha_adjust_lo = true;
3159bf215546Sopenharmony_ci      key_words[key_size++] = state->alpha_adjust_lo & attribute_mask;
3160bf215546Sopenharmony_ci   }
3161bf215546Sopenharmony_ci   if (state->alpha_adjust_hi & attribute_mask) {
3162bf215546Sopenharmony_ci      header.alpha_adjust_hi = true;
3163bf215546Sopenharmony_ci      key_words[key_size++] = state->alpha_adjust_hi & attribute_mask;
3164bf215546Sopenharmony_ci   }
3165bf215546Sopenharmony_ci
3166bf215546Sopenharmony_ci   header.key_size = key_size * sizeof(key_words[0]);
3167bf215546Sopenharmony_ci   key_words[0] = header.v;
3168bf215546Sopenharmony_ci
3169bf215546Sopenharmony_ci   uint32_t hash = radv_hash_vs_prolog(key_words);
3170bf215546Sopenharmony_ci
3171bf215546Sopenharmony_ci   if (cmd_buffer->state.emitted_vs_prolog &&
3172bf215546Sopenharmony_ci       cmd_buffer->state.emitted_vs_prolog_key_hash == hash &&
3173bf215546Sopenharmony_ci       radv_cmp_vs_prolog(key_words, cmd_buffer->state.emitted_vs_prolog_key))
3174bf215546Sopenharmony_ci      return cmd_buffer->state.emitted_vs_prolog;
3175bf215546Sopenharmony_ci
3176bf215546Sopenharmony_ci   u_rwlock_rdlock(&device->vs_prologs_lock);
3177bf215546Sopenharmony_ci   struct hash_entry *prolog_entry =
3178bf215546Sopenharmony_ci      _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words);
3179bf215546Sopenharmony_ci   u_rwlock_rdunlock(&device->vs_prologs_lock);
3180bf215546Sopenharmony_ci
3181bf215546Sopenharmony_ci   if (!prolog_entry) {
3182bf215546Sopenharmony_ci      u_rwlock_wrlock(&device->vs_prologs_lock);
3183bf215546Sopenharmony_ci      prolog_entry = _mesa_hash_table_search_pre_hashed(device->vs_prologs, hash, key_words);
3184bf215546Sopenharmony_ci      if (prolog_entry) {
3185bf215546Sopenharmony_ci         u_rwlock_wrunlock(&device->vs_prologs_lock);
3186bf215546Sopenharmony_ci         return prolog_entry->data;
3187bf215546Sopenharmony_ci      }
3188bf215546Sopenharmony_ci
3189bf215546Sopenharmony_ci      prolog = radv_create_vs_prolog(device, &key);
3190bf215546Sopenharmony_ci      uint32_t *key2 = malloc(key_size * 4);
3191bf215546Sopenharmony_ci      if (!prolog || !key2) {
3192bf215546Sopenharmony_ci         radv_shader_part_destroy(device, prolog);
3193bf215546Sopenharmony_ci         free(key2);
3194bf215546Sopenharmony_ci         u_rwlock_wrunlock(&device->vs_prologs_lock);
3195bf215546Sopenharmony_ci         return NULL;
3196bf215546Sopenharmony_ci      }
3197bf215546Sopenharmony_ci      memcpy(key2, key_words, key_size * 4);
3198bf215546Sopenharmony_ci      _mesa_hash_table_insert_pre_hashed(device->vs_prologs, hash, key2, prolog);
3199bf215546Sopenharmony_ci
3200bf215546Sopenharmony_ci      u_rwlock_wrunlock(&device->vs_prologs_lock);
3201bf215546Sopenharmony_ci      return prolog;
3202bf215546Sopenharmony_ci   }
3203bf215546Sopenharmony_ci
3204bf215546Sopenharmony_ci   return prolog_entry->data;
3205bf215546Sopenharmony_ci}
3206bf215546Sopenharmony_ci
3207bf215546Sopenharmony_cistatic void
3208bf215546Sopenharmony_ciemit_prolog_regs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shader,
3209bf215546Sopenharmony_ci                 struct radv_shader_part *prolog, bool pipeline_is_dirty)
3210bf215546Sopenharmony_ci{
3211bf215546Sopenharmony_ci   /* no need to re-emit anything in this case */
3212bf215546Sopenharmony_ci   if (cmd_buffer->state.emitted_vs_prolog == prolog && !pipeline_is_dirty)
3213bf215546Sopenharmony_ci      return;
3214bf215546Sopenharmony_ci
3215bf215546Sopenharmony_ci   enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
3216bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
3217bf215546Sopenharmony_ci   uint64_t prolog_va = radv_buffer_get_va(prolog->bo) + prolog->alloc->offset;
3218bf215546Sopenharmony_ci
3219bf215546Sopenharmony_ci   assert(cmd_buffer->state.emitted_graphics_pipeline == cmd_buffer->state.graphics_pipeline);
3220bf215546Sopenharmony_ci
3221bf215546Sopenharmony_ci   uint32_t rsrc1 = vs_shader->config.rsrc1;
3222bf215546Sopenharmony_ci   if (chip < GFX10 && G_00B228_SGPRS(prolog->rsrc1) > G_00B228_SGPRS(vs_shader->config.rsrc1))
3223bf215546Sopenharmony_ci      rsrc1 = (rsrc1 & C_00B228_SGPRS) | (prolog->rsrc1 & ~C_00B228_SGPRS);
3224bf215546Sopenharmony_ci
3225bf215546Sopenharmony_ci   /* The main shader must not use less VGPRs than the prolog, otherwise shared vgprs might not
3226bf215546Sopenharmony_ci    * work.
3227bf215546Sopenharmony_ci    */
3228bf215546Sopenharmony_ci   assert(G_00B848_VGPRS(vs_shader->config.rsrc1) >= G_00B848_VGPRS(prolog->rsrc1));
3229bf215546Sopenharmony_ci
3230bf215546Sopenharmony_ci   unsigned pgm_lo_reg = R_00B120_SPI_SHADER_PGM_LO_VS;
3231bf215546Sopenharmony_ci   unsigned rsrc1_reg = R_00B128_SPI_SHADER_PGM_RSRC1_VS;
3232bf215546Sopenharmony_ci   if (vs_shader->info.is_ngg || pipeline->base.shaders[MESA_SHADER_GEOMETRY] == vs_shader) {
3233bf215546Sopenharmony_ci      pgm_lo_reg = chip >= GFX10 ? R_00B320_SPI_SHADER_PGM_LO_ES : R_00B210_SPI_SHADER_PGM_LO_ES;
3234bf215546Sopenharmony_ci      rsrc1_reg = R_00B228_SPI_SHADER_PGM_RSRC1_GS;
3235bf215546Sopenharmony_ci   } else if (pipeline->base.shaders[MESA_SHADER_TESS_CTRL] == vs_shader) {
3236bf215546Sopenharmony_ci      pgm_lo_reg = chip >= GFX10 ? R_00B520_SPI_SHADER_PGM_LO_LS : R_00B410_SPI_SHADER_PGM_LO_LS;
3237bf215546Sopenharmony_ci      rsrc1_reg = R_00B428_SPI_SHADER_PGM_RSRC1_HS;
3238bf215546Sopenharmony_ci   } else if (vs_shader->info.vs.as_ls) {
3239bf215546Sopenharmony_ci      pgm_lo_reg = R_00B520_SPI_SHADER_PGM_LO_LS;
3240bf215546Sopenharmony_ci      rsrc1_reg = R_00B528_SPI_SHADER_PGM_RSRC1_LS;
3241bf215546Sopenharmony_ci   } else if (vs_shader->info.vs.as_es) {
3242bf215546Sopenharmony_ci      pgm_lo_reg = R_00B320_SPI_SHADER_PGM_LO_ES;
3243bf215546Sopenharmony_ci      rsrc1_reg = R_00B328_SPI_SHADER_PGM_RSRC1_ES;
3244bf215546Sopenharmony_ci   }
3245bf215546Sopenharmony_ci
3246bf215546Sopenharmony_ci   radeon_set_sh_reg(cmd_buffer->cs, pgm_lo_reg, prolog_va >> 8);
3247bf215546Sopenharmony_ci
3248bf215546Sopenharmony_ci   if (chip < GFX10)
3249bf215546Sopenharmony_ci      radeon_set_sh_reg(cmd_buffer->cs, rsrc1_reg, rsrc1);
3250bf215546Sopenharmony_ci   else
3251bf215546Sopenharmony_ci      assert(rsrc1 == vs_shader->config.rsrc1);
3252bf215546Sopenharmony_ci
3253bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, prolog->bo);
3254bf215546Sopenharmony_ci}
3255bf215546Sopenharmony_ci
3256bf215546Sopenharmony_cistatic void
3257bf215546Sopenharmony_ciemit_prolog_inputs(struct radv_cmd_buffer *cmd_buffer, struct radv_shader *vs_shader,
3258bf215546Sopenharmony_ci                   uint32_t nontrivial_divisors, bool pipeline_is_dirty)
3259bf215546Sopenharmony_ci{
3260bf215546Sopenharmony_ci   /* no need to re-emit anything in this case */
3261bf215546Sopenharmony_ci   if (!nontrivial_divisors && !pipeline_is_dirty && cmd_buffer->state.emitted_vs_prolog &&
3262bf215546Sopenharmony_ci       !cmd_buffer->state.emitted_vs_prolog->nontrivial_divisors)
3263bf215546Sopenharmony_ci      return;
3264bf215546Sopenharmony_ci
3265bf215546Sopenharmony_ci   const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input;
3266bf215546Sopenharmony_ci   uint64_t input_va = radv_shader_get_va(vs_shader);
3267bf215546Sopenharmony_ci
3268bf215546Sopenharmony_ci   if (nontrivial_divisors) {
3269bf215546Sopenharmony_ci      unsigned inputs_offset;
3270bf215546Sopenharmony_ci      uint32_t *inputs;
3271bf215546Sopenharmony_ci      unsigned size = 8 + util_bitcount(nontrivial_divisors) * 8;
3272bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &inputs_offset, (void **)&inputs))
3273bf215546Sopenharmony_ci         return;
3274bf215546Sopenharmony_ci
3275bf215546Sopenharmony_ci      *(inputs++) = input_va;
3276bf215546Sopenharmony_ci      *(inputs++) = input_va >> 32;
3277bf215546Sopenharmony_ci
3278bf215546Sopenharmony_ci      u_foreach_bit(index, nontrivial_divisors)
3279bf215546Sopenharmony_ci      {
3280bf215546Sopenharmony_ci         uint32_t div = state->divisors[index];
3281bf215546Sopenharmony_ci         if (div == 0) {
3282bf215546Sopenharmony_ci            *(inputs++) = 0;
3283bf215546Sopenharmony_ci            *(inputs++) = 1;
3284bf215546Sopenharmony_ci         } else if (util_is_power_of_two_or_zero(div)) {
3285bf215546Sopenharmony_ci            *(inputs++) = util_logbase2(div) | (1 << 8);
3286bf215546Sopenharmony_ci            *(inputs++) = 0xffffffffu;
3287bf215546Sopenharmony_ci         } else {
3288bf215546Sopenharmony_ci            struct util_fast_udiv_info info = util_compute_fast_udiv_info(div, 32, 32);
3289bf215546Sopenharmony_ci            *(inputs++) = info.pre_shift | (info.increment << 8) | (info.post_shift << 16);
3290bf215546Sopenharmony_ci            *(inputs++) = info.multiplier;
3291bf215546Sopenharmony_ci         }
3292bf215546Sopenharmony_ci      }
3293bf215546Sopenharmony_ci
3294bf215546Sopenharmony_ci      input_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + inputs_offset;
3295bf215546Sopenharmony_ci   }
3296bf215546Sopenharmony_ci
3297bf215546Sopenharmony_ci   struct radv_userdata_info *loc =
3298bf215546Sopenharmony_ci      &vs_shader->info.user_sgprs_locs.shader_data[AC_UD_VS_PROLOG_INPUTS];
3299bf215546Sopenharmony_ci   uint32_t base_reg = cmd_buffer->state.graphics_pipeline->base.user_data_0[MESA_SHADER_VERTEX];
3300bf215546Sopenharmony_ci   assert(loc->sgpr_idx != -1);
3301bf215546Sopenharmony_ci   assert(loc->num_sgprs == 2);
3302bf215546Sopenharmony_ci   radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
3303bf215546Sopenharmony_ci                            input_va, true);
3304bf215546Sopenharmony_ci}
3305bf215546Sopenharmony_ci
3306bf215546Sopenharmony_cistatic void
3307bf215546Sopenharmony_ciradv_emit_vertex_input(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
3308bf215546Sopenharmony_ci{
3309bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
3310bf215546Sopenharmony_ci   struct radv_shader *vs_shader = radv_get_shader(&pipeline->base, MESA_SHADER_VERTEX);
3311bf215546Sopenharmony_ci
3312bf215546Sopenharmony_ci   assert(!cmd_buffer->state.mesh_shading);
3313bf215546Sopenharmony_ci
3314bf215546Sopenharmony_ci   if (!vs_shader->info.vs.has_prolog)
3315bf215546Sopenharmony_ci      return;
3316bf215546Sopenharmony_ci
3317bf215546Sopenharmony_ci   uint32_t nontrivial_divisors;
3318bf215546Sopenharmony_ci   struct radv_shader_part *prolog =
3319bf215546Sopenharmony_ci      lookup_vs_prolog(cmd_buffer, vs_shader, &nontrivial_divisors);
3320bf215546Sopenharmony_ci   if (!prolog) {
3321bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
3322bf215546Sopenharmony_ci      return;
3323bf215546Sopenharmony_ci   }
3324bf215546Sopenharmony_ci   emit_prolog_regs(cmd_buffer, vs_shader, prolog, pipeline_is_dirty);
3325bf215546Sopenharmony_ci   emit_prolog_inputs(cmd_buffer, vs_shader, nontrivial_divisors, pipeline_is_dirty);
3326bf215546Sopenharmony_ci
3327bf215546Sopenharmony_ci   cmd_buffer->state.emitted_vs_prolog = prolog;
3328bf215546Sopenharmony_ci
3329bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
3330bf215546Sopenharmony_ci      radv_save_vs_prolog(cmd_buffer, prolog);
3331bf215546Sopenharmony_ci}
3332bf215546Sopenharmony_ci
3333bf215546Sopenharmony_cistatic void
3334bf215546Sopenharmony_ciradv_cmd_buffer_flush_dynamic_state(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
3335bf215546Sopenharmony_ci{
3336bf215546Sopenharmony_ci   uint64_t states =
3337bf215546Sopenharmony_ci      cmd_buffer->state.dirty & cmd_buffer->state.emitted_graphics_pipeline->needed_dynamic_state;
3338bf215546Sopenharmony_ci
3339bf215546Sopenharmony_ci   if (states & (RADV_CMD_DIRTY_DYNAMIC_VIEWPORT))
3340bf215546Sopenharmony_ci      radv_emit_viewport(cmd_buffer);
3341bf215546Sopenharmony_ci
3342bf215546Sopenharmony_ci   if (states & (RADV_CMD_DIRTY_DYNAMIC_SCISSOR | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT) &&
3343bf215546Sopenharmony_ci       !cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
3344bf215546Sopenharmony_ci      radv_emit_scissor(cmd_buffer);
3345bf215546Sopenharmony_ci
3346bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH)
3347bf215546Sopenharmony_ci      radv_emit_line_width(cmd_buffer);
3348bf215546Sopenharmony_ci
3349bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)
3350bf215546Sopenharmony_ci      radv_emit_blend_constants(cmd_buffer);
3351bf215546Sopenharmony_ci
3352bf215546Sopenharmony_ci   if (states &
3353bf215546Sopenharmony_ci       (RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK |
3354bf215546Sopenharmony_ci        RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK))
3355bf215546Sopenharmony_ci      radv_emit_stencil(cmd_buffer);
3356bf215546Sopenharmony_ci
3357bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS)
3358bf215546Sopenharmony_ci      radv_emit_depth_bounds(cmd_buffer);
3359bf215546Sopenharmony_ci
3360bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS)
3361bf215546Sopenharmony_ci      radv_emit_depth_bias(cmd_buffer);
3362bf215546Sopenharmony_ci
3363bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE)
3364bf215546Sopenharmony_ci      radv_emit_discard_rectangle(cmd_buffer);
3365bf215546Sopenharmony_ci
3366bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS)
3367bf215546Sopenharmony_ci      radv_emit_sample_locations(cmd_buffer);
3368bf215546Sopenharmony_ci
3369bf215546Sopenharmony_ci   if (states & (RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE))
3370bf215546Sopenharmony_ci      radv_emit_line_stipple(cmd_buffer);
3371bf215546Sopenharmony_ci
3372bf215546Sopenharmony_ci   if (states & (RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
3373bf215546Sopenharmony_ci                 RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE))
3374bf215546Sopenharmony_ci      radv_emit_culling(cmd_buffer, states);
3375bf215546Sopenharmony_ci
3376bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY)
3377bf215546Sopenharmony_ci      radv_emit_primitive_topology(cmd_buffer);
3378bf215546Sopenharmony_ci
3379bf215546Sopenharmony_ci   if (states &
3380bf215546Sopenharmony_ci       (RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE |
3381bf215546Sopenharmony_ci        RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP | RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE |
3382bf215546Sopenharmony_ci        RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE | RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP))
3383bf215546Sopenharmony_ci      radv_emit_depth_control(cmd_buffer, states);
3384bf215546Sopenharmony_ci
3385bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP)
3386bf215546Sopenharmony_ci      radv_emit_stencil_control(cmd_buffer);
3387bf215546Sopenharmony_ci
3388bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE)
3389bf215546Sopenharmony_ci      radv_emit_fragment_shading_rate(cmd_buffer);
3390bf215546Sopenharmony_ci
3391bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE)
3392bf215546Sopenharmony_ci      radv_emit_primitive_restart_enable(cmd_buffer);
3393bf215546Sopenharmony_ci
3394bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE)
3395bf215546Sopenharmony_ci      radv_emit_rasterizer_discard_enable(cmd_buffer);
3396bf215546Sopenharmony_ci
3397bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP)
3398bf215546Sopenharmony_ci      radv_emit_logic_op(cmd_buffer);
3399bf215546Sopenharmony_ci
3400bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE)
3401bf215546Sopenharmony_ci      radv_emit_color_write_enable(cmd_buffer);
3402bf215546Sopenharmony_ci
3403bf215546Sopenharmony_ci   if (states & RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT)
3404bf215546Sopenharmony_ci      radv_emit_vertex_input(cmd_buffer, pipeline_is_dirty);
3405bf215546Sopenharmony_ci
3406bf215546Sopenharmony_ci   cmd_buffer->state.dirty &= ~states;
3407bf215546Sopenharmony_ci}
3408bf215546Sopenharmony_ci
3409bf215546Sopenharmony_cistatic void
3410bf215546Sopenharmony_ciradv_flush_push_descriptors(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
3411bf215546Sopenharmony_ci{
3412bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
3413bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
3414bf215546Sopenharmony_ci   struct radv_descriptor_set *set = (struct radv_descriptor_set *)&descriptors_state->push_set.set;
3415bf215546Sopenharmony_ci   unsigned bo_offset;
3416bf215546Sopenharmony_ci
3417bf215546Sopenharmony_ci   if (!radv_cmd_buffer_upload_data(cmd_buffer, set->header.size, set->header.mapped_ptr,
3418bf215546Sopenharmony_ci                                    &bo_offset))
3419bf215546Sopenharmony_ci      return;
3420bf215546Sopenharmony_ci
3421bf215546Sopenharmony_ci   set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
3422bf215546Sopenharmony_ci   set->header.va += bo_offset;
3423bf215546Sopenharmony_ci}
3424bf215546Sopenharmony_ci
3425bf215546Sopenharmony_cistatic void
3426bf215546Sopenharmony_ciradv_flush_indirect_descriptor_sets(struct radv_cmd_buffer *cmd_buffer,
3427bf215546Sopenharmony_ci                                    struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point)
3428bf215546Sopenharmony_ci{
3429bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
3430bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
3431bf215546Sopenharmony_ci   uint32_t size = MAX_SETS * 4;
3432bf215546Sopenharmony_ci   uint32_t offset;
3433bf215546Sopenharmony_ci   void *ptr;
3434bf215546Sopenharmony_ci
3435bf215546Sopenharmony_ci   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, size, &offset, &ptr))
3436bf215546Sopenharmony_ci      return;
3437bf215546Sopenharmony_ci
3438bf215546Sopenharmony_ci   for (unsigned i = 0; i < MAX_SETS; i++) {
3439bf215546Sopenharmony_ci      uint32_t *uptr = ((uint32_t *)ptr) + i;
3440bf215546Sopenharmony_ci      uint64_t set_va = 0;
3441bf215546Sopenharmony_ci      struct radv_descriptor_set *set = descriptors_state->sets[i];
3442bf215546Sopenharmony_ci      if (descriptors_state->valid & (1u << i))
3443bf215546Sopenharmony_ci         set_va = set->header.va;
3444bf215546Sopenharmony_ci      uptr[0] = set_va & 0xffffffff;
3445bf215546Sopenharmony_ci   }
3446bf215546Sopenharmony_ci
3447bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
3448bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
3449bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
3450bf215546Sopenharmony_ci   va += offset;
3451bf215546Sopenharmony_ci
3452bf215546Sopenharmony_ci   if (bind_point == VK_PIPELINE_BIND_POINT_GRAPHICS) {
3453bf215546Sopenharmony_ci      struct radv_graphics_pipeline *graphics_pipeline = radv_pipeline_to_graphics(pipeline);
3454bf215546Sopenharmony_ci
3455bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_VERTEX])
3456bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_VERTEX,
3457bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3458bf215546Sopenharmony_ci
3459bf215546Sopenharmony_ci      if (pipeline->shaders[MESA_SHADER_FRAGMENT])
3460bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_FRAGMENT,
3461bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3462bf215546Sopenharmony_ci
3463bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH))
3464bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_MESH,
3465bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3466bf215546Sopenharmony_ci
3467bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TASK))
3468bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cmd_buffer->ace_internal.cs, pipeline, MESA_SHADER_TASK,
3469bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3470bf215546Sopenharmony_ci
3471bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_GEOMETRY))
3472bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_GEOMETRY,
3473bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3474bf215546Sopenharmony_ci
3475bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL))
3476bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_TESS_CTRL,
3477bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3478bf215546Sopenharmony_ci
3479bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL))
3480bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_TESS_EVAL,
3481bf215546Sopenharmony_ci                                    AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3482bf215546Sopenharmony_ci   } else {
3483bf215546Sopenharmony_ci      radv_emit_userdata_address(device, cs, pipeline, MESA_SHADER_COMPUTE,
3484bf215546Sopenharmony_ci                                 AC_UD_INDIRECT_DESCRIPTOR_SETS, va);
3485bf215546Sopenharmony_ci   }
3486bf215546Sopenharmony_ci}
3487bf215546Sopenharmony_ci
3488bf215546Sopenharmony_cistatic void
3489bf215546Sopenharmony_ciradv_flush_descriptors(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages,
3490bf215546Sopenharmony_ci                       struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point)
3491bf215546Sopenharmony_ci{
3492bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
3493bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
3494bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
3495bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
3496bf215546Sopenharmony_ci   bool flush_indirect_descriptors;
3497bf215546Sopenharmony_ci
3498bf215546Sopenharmony_ci   if (!descriptors_state->dirty)
3499bf215546Sopenharmony_ci      return;
3500bf215546Sopenharmony_ci
3501bf215546Sopenharmony_ci   if (descriptors_state->push_dirty)
3502bf215546Sopenharmony_ci      radv_flush_push_descriptors(cmd_buffer, bind_point);
3503bf215546Sopenharmony_ci
3504bf215546Sopenharmony_ci   flush_indirect_descriptors = pipeline->need_indirect_descriptor_sets;
3505bf215546Sopenharmony_ci
3506bf215546Sopenharmony_ci   if (flush_indirect_descriptors)
3507bf215546Sopenharmony_ci      radv_flush_indirect_descriptor_sets(cmd_buffer, pipeline, bind_point);
3508bf215546Sopenharmony_ci
3509bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max =
3510bf215546Sopenharmony_ci      radeon_check_space(device->ws, cs, MAX_SETS * MESA_VULKAN_SHADER_STAGES * 4);
3511bf215546Sopenharmony_ci
3512bf215546Sopenharmony_ci   if (stages & VK_SHADER_STAGE_COMPUTE_BIT) {
3513bf215546Sopenharmony_ci      radv_emit_descriptor_pointers(device, cs, pipeline, descriptors_state, MESA_SHADER_COMPUTE);
3514bf215546Sopenharmony_ci   } else {
3515bf215546Sopenharmony_ci      radv_foreach_stage(stage, stages & ~VK_SHADER_STAGE_TASK_BIT_NV)
3516bf215546Sopenharmony_ci      {
3517bf215546Sopenharmony_ci         if (!cmd_buffer->state.graphics_pipeline->base.shaders[stage])
3518bf215546Sopenharmony_ci            continue;
3519bf215546Sopenharmony_ci
3520bf215546Sopenharmony_ci         radv_emit_descriptor_pointers(device, cs, pipeline, descriptors_state, stage);
3521bf215546Sopenharmony_ci      }
3522bf215546Sopenharmony_ci
3523bf215546Sopenharmony_ci      if (stages & VK_SHADER_STAGE_TASK_BIT_NV) {
3524bf215546Sopenharmony_ci         radv_emit_descriptor_pointers(device, cmd_buffer->ace_internal.cs, pipeline,
3525bf215546Sopenharmony_ci                                       descriptors_state, MESA_SHADER_TASK);
3526bf215546Sopenharmony_ci      }
3527bf215546Sopenharmony_ci   }
3528bf215546Sopenharmony_ci
3529bf215546Sopenharmony_ci   descriptors_state->dirty = 0;
3530bf215546Sopenharmony_ci   descriptors_state->push_dirty = false;
3531bf215546Sopenharmony_ci
3532bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
3533bf215546Sopenharmony_ci
3534bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
3535bf215546Sopenharmony_ci      radv_save_descriptors(cmd_buffer, bind_point);
3536bf215546Sopenharmony_ci}
3537bf215546Sopenharmony_ci
3538bf215546Sopenharmony_cistatic bool
3539bf215546Sopenharmony_ciradv_shader_loads_push_constants(struct radv_pipeline *pipeline, gl_shader_stage stage)
3540bf215546Sopenharmony_ci{
3541bf215546Sopenharmony_ci   struct radv_userdata_info *loc =
3542bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, stage, AC_UD_PUSH_CONSTANTS);
3543bf215546Sopenharmony_ci   return loc->sgpr_idx != -1;
3544bf215546Sopenharmony_ci}
3545bf215546Sopenharmony_ci
3546bf215546Sopenharmony_cistatic void
3547bf215546Sopenharmony_ciradv_emit_all_inline_push_consts(struct radv_device *device, struct radeon_cmdbuf *cs,
3548bf215546Sopenharmony_ci                                 struct radv_pipeline *pipeline, gl_shader_stage stage,
3549bf215546Sopenharmony_ci                                 uint32_t *values, bool *need_push_constants)
3550bf215546Sopenharmony_ci{
3551bf215546Sopenharmony_ci   const struct radv_shader *shader = radv_get_shader(pipeline, stage);
3552bf215546Sopenharmony_ci   if (!shader)
3553bf215546Sopenharmony_ci      return;
3554bf215546Sopenharmony_ci
3555bf215546Sopenharmony_ci   *need_push_constants |= radv_shader_loads_push_constants(pipeline, stage);
3556bf215546Sopenharmony_ci
3557bf215546Sopenharmony_ci   const uint64_t mask = shader->info.inline_push_constant_mask;
3558bf215546Sopenharmony_ci   if (!mask)
3559bf215546Sopenharmony_ci      return;
3560bf215546Sopenharmony_ci
3561bf215546Sopenharmony_ci   const uint8_t base = ffs(mask) - 1;
3562bf215546Sopenharmony_ci   if (mask == u_bit_consecutive64(base, util_last_bit64(mask) - base)) {
3563bf215546Sopenharmony_ci      /* consecutive inline push constants */
3564bf215546Sopenharmony_ci      radv_emit_inline_push_consts(device, cs, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS,
3565bf215546Sopenharmony_ci                                   values + base);
3566bf215546Sopenharmony_ci   } else {
3567bf215546Sopenharmony_ci      /* sparse inline push constants */
3568bf215546Sopenharmony_ci      uint32_t consts[AC_MAX_INLINE_PUSH_CONSTS];
3569bf215546Sopenharmony_ci      unsigned num_consts = 0;
3570bf215546Sopenharmony_ci      u_foreach_bit64 (idx, mask)
3571bf215546Sopenharmony_ci         consts[num_consts++] = values[idx];
3572bf215546Sopenharmony_ci      radv_emit_inline_push_consts(device, cs, pipeline, stage, AC_UD_INLINE_PUSH_CONSTANTS,
3573bf215546Sopenharmony_ci                                   consts);
3574bf215546Sopenharmony_ci   }
3575bf215546Sopenharmony_ci}
3576bf215546Sopenharmony_ci
3577bf215546Sopenharmony_cistatic void
3578bf215546Sopenharmony_ciradv_flush_constants(struct radv_cmd_buffer *cmd_buffer, VkShaderStageFlags stages,
3579bf215546Sopenharmony_ci                     struct radv_pipeline *pipeline, VkPipelineBindPoint bind_point)
3580bf215546Sopenharmony_ci{
3581bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
3582bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
3583bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
3584bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
3585bf215546Sopenharmony_ci   struct radv_shader *shader, *prev_shader;
3586bf215546Sopenharmony_ci   bool need_push_constants = false;
3587bf215546Sopenharmony_ci   unsigned offset;
3588bf215546Sopenharmony_ci   void *ptr;
3589bf215546Sopenharmony_ci   uint64_t va;
3590bf215546Sopenharmony_ci   uint32_t internal_stages;
3591bf215546Sopenharmony_ci   uint32_t dirty_stages = 0;
3592bf215546Sopenharmony_ci
3593bf215546Sopenharmony_ci   stages &= cmd_buffer->push_constant_stages;
3594bf215546Sopenharmony_ci   if (!stages || (!pipeline->push_constant_size && !pipeline->dynamic_offset_count))
3595bf215546Sopenharmony_ci      return;
3596bf215546Sopenharmony_ci
3597bf215546Sopenharmony_ci   internal_stages = stages;
3598bf215546Sopenharmony_ci   switch (bind_point) {
3599bf215546Sopenharmony_ci   case VK_PIPELINE_BIND_POINT_GRAPHICS:
3600bf215546Sopenharmony_ci      break;
3601bf215546Sopenharmony_ci   case VK_PIPELINE_BIND_POINT_COMPUTE:
3602bf215546Sopenharmony_ci      dirty_stages = RADV_RT_STAGE_BITS;
3603bf215546Sopenharmony_ci      break;
3604bf215546Sopenharmony_ci   case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR:
3605bf215546Sopenharmony_ci      internal_stages = VK_SHADER_STAGE_COMPUTE_BIT;
3606bf215546Sopenharmony_ci      dirty_stages = VK_SHADER_STAGE_COMPUTE_BIT;
3607bf215546Sopenharmony_ci      break;
3608bf215546Sopenharmony_ci   default:
3609bf215546Sopenharmony_ci      unreachable("Unhandled bind point");
3610bf215546Sopenharmony_ci   }
3611bf215546Sopenharmony_ci
3612bf215546Sopenharmony_ci   radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_NV)
3613bf215546Sopenharmony_ci   {
3614bf215546Sopenharmony_ci      radv_emit_all_inline_push_consts(
3615bf215546Sopenharmony_ci         device, cs, pipeline, stage, (uint32_t *)cmd_buffer->push_constants, &need_push_constants);
3616bf215546Sopenharmony_ci   }
3617bf215546Sopenharmony_ci
3618bf215546Sopenharmony_ci   if (internal_stages & VK_SHADER_STAGE_TASK_BIT_NV) {
3619bf215546Sopenharmony_ci      radv_emit_all_inline_push_consts(device, cmd_buffer->ace_internal.cs, pipeline,
3620bf215546Sopenharmony_ci                                       MESA_SHADER_TASK, (uint32_t *)cmd_buffer->push_constants,
3621bf215546Sopenharmony_ci                                       &need_push_constants);
3622bf215546Sopenharmony_ci   }
3623bf215546Sopenharmony_ci
3624bf215546Sopenharmony_ci   if (need_push_constants) {
3625bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_alloc(
3626bf215546Sopenharmony_ci             cmd_buffer, pipeline->push_constant_size + 16 * pipeline->dynamic_offset_count, &offset,
3627bf215546Sopenharmony_ci             &ptr))
3628bf215546Sopenharmony_ci         return;
3629bf215546Sopenharmony_ci
3630bf215546Sopenharmony_ci      memcpy(ptr, cmd_buffer->push_constants, pipeline->push_constant_size);
3631bf215546Sopenharmony_ci      memcpy((char *)ptr + pipeline->push_constant_size, descriptors_state->dynamic_buffers,
3632bf215546Sopenharmony_ci             16 * pipeline->dynamic_offset_count);
3633bf215546Sopenharmony_ci
3634bf215546Sopenharmony_ci      va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
3635bf215546Sopenharmony_ci      va += offset;
3636bf215546Sopenharmony_ci
3637bf215546Sopenharmony_ci      ASSERTED unsigned cdw_max =
3638bf215546Sopenharmony_ci         radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, MESA_VULKAN_SHADER_STAGES * 4);
3639bf215546Sopenharmony_ci
3640bf215546Sopenharmony_ci      prev_shader = NULL;
3641bf215546Sopenharmony_ci      radv_foreach_stage(stage, internal_stages & ~VK_SHADER_STAGE_TASK_BIT_NV)
3642bf215546Sopenharmony_ci      {
3643bf215546Sopenharmony_ci         shader = radv_get_shader(pipeline, stage);
3644bf215546Sopenharmony_ci
3645bf215546Sopenharmony_ci         /* Avoid redundantly emitting the address for merged stages. */
3646bf215546Sopenharmony_ci         if (shader && shader != prev_shader) {
3647bf215546Sopenharmony_ci            radv_emit_userdata_address(device, cs, pipeline, stage, AC_UD_PUSH_CONSTANTS, va);
3648bf215546Sopenharmony_ci
3649bf215546Sopenharmony_ci            prev_shader = shader;
3650bf215546Sopenharmony_ci         }
3651bf215546Sopenharmony_ci      }
3652bf215546Sopenharmony_ci
3653bf215546Sopenharmony_ci      if (internal_stages & VK_SHADER_STAGE_TASK_BIT_NV) {
3654bf215546Sopenharmony_ci         radv_emit_userdata_address(device, cmd_buffer->ace_internal.cs, pipeline, MESA_SHADER_TASK,
3655bf215546Sopenharmony_ci                                    AC_UD_PUSH_CONSTANTS, va);
3656bf215546Sopenharmony_ci      }
3657bf215546Sopenharmony_ci
3658bf215546Sopenharmony_ci      assert(cmd_buffer->cs->cdw <= cdw_max);
3659bf215546Sopenharmony_ci   }
3660bf215546Sopenharmony_ci
3661bf215546Sopenharmony_ci   cmd_buffer->push_constant_stages &= ~stages;
3662bf215546Sopenharmony_ci   cmd_buffer->push_constant_stages |= dirty_stages;
3663bf215546Sopenharmony_ci}
3664bf215546Sopenharmony_ci
3665bf215546Sopenharmony_cienum radv_dst_sel {
3666bf215546Sopenharmony_ci   DST_SEL_0001 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_0) |
3667bf215546Sopenharmony_ci                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1),
3668bf215546Sopenharmony_ci   DST_SEL_X001 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_0) |
3669bf215546Sopenharmony_ci                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1),
3670bf215546Sopenharmony_ci   DST_SEL_XY01 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3671bf215546Sopenharmony_ci                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_0) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1),
3672bf215546Sopenharmony_ci   DST_SEL_XYZ1 = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3673bf215546Sopenharmony_ci                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_1),
3674bf215546Sopenharmony_ci   DST_SEL_XYZW = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3675bf215546Sopenharmony_ci                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W),
3676bf215546Sopenharmony_ci   DST_SEL_ZYXW = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3677bf215546Sopenharmony_ci                  S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W),
3678bf215546Sopenharmony_ci};
3679bf215546Sopenharmony_ci
3680bf215546Sopenharmony_cistatic const uint32_t data_format_dst_sel[] = {
3681bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_INVALID] = DST_SEL_0001,
3682bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_8] = DST_SEL_X001,
3683bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_16] = DST_SEL_X001,
3684bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_8_8] = DST_SEL_XY01,
3685bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_32] = DST_SEL_X001,
3686bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_16_16] = DST_SEL_XY01,
3687bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_10_11_11] = DST_SEL_XYZ1,
3688bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_11_11_10] = DST_SEL_XYZ1,
3689bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_10_10_10_2] = DST_SEL_XYZW,
3690bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_2_10_10_10] = DST_SEL_XYZW,
3691bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_8_8_8_8] = DST_SEL_XYZW,
3692bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_32_32] = DST_SEL_XY01,
3693bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_16_16_16_16] = DST_SEL_XYZW,
3694bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_32_32_32] = DST_SEL_XYZ1,
3695bf215546Sopenharmony_ci   [V_008F0C_BUF_DATA_FORMAT_32_32_32_32] = DST_SEL_XYZW,
3696bf215546Sopenharmony_ci};
3697bf215546Sopenharmony_ci
3698bf215546Sopenharmony_civoid
3699bf215546Sopenharmony_ciradv_write_vertex_descriptors(const struct radv_cmd_buffer *cmd_buffer,
3700bf215546Sopenharmony_ci                              const struct radv_graphics_pipeline *pipeline,
3701bf215546Sopenharmony_ci                              bool full_null_descriptors, void *vb_ptr)
3702bf215546Sopenharmony_ci{
3703bf215546Sopenharmony_ci   struct radv_shader *vs_shader = radv_get_shader(&pipeline->base, MESA_SHADER_VERTEX);
3704bf215546Sopenharmony_ci   enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
3705bf215546Sopenharmony_ci   unsigned desc_index = 0;
3706bf215546Sopenharmony_ci   uint32_t mask = pipeline->vb_desc_usage_mask;
3707bf215546Sopenharmony_ci   uint64_t va;
3708bf215546Sopenharmony_ci   const struct radv_vs_input_state *vs_state =
3709bf215546Sopenharmony_ci      vs_shader->info.vs.dynamic_inputs ? &cmd_buffer->state.dynamic_vs_input : NULL;
3710bf215546Sopenharmony_ci   assert(!vs_state || pipeline->use_per_attribute_vb_descs);
3711bf215546Sopenharmony_ci
3712bf215546Sopenharmony_ci   while (mask) {
3713bf215546Sopenharmony_ci      unsigned i = u_bit_scan(&mask);
3714bf215546Sopenharmony_ci      uint32_t *desc = &((uint32_t *)vb_ptr)[desc_index++ * 4];
3715bf215546Sopenharmony_ci      uint32_t offset, rsrc_word3;
3716bf215546Sopenharmony_ci      unsigned binding =
3717bf215546Sopenharmony_ci         vs_state ? cmd_buffer->state.dynamic_vs_input.bindings[i]
3718bf215546Sopenharmony_ci                  : (pipeline->use_per_attribute_vb_descs ? pipeline->attrib_bindings[i] : i);
3719bf215546Sopenharmony_ci      struct radv_buffer *buffer = cmd_buffer->vertex_binding_buffers[binding];
3720bf215546Sopenharmony_ci      unsigned num_records;
3721bf215546Sopenharmony_ci      unsigned stride;
3722bf215546Sopenharmony_ci
3723bf215546Sopenharmony_ci      if (vs_state) {
3724bf215546Sopenharmony_ci         unsigned format = vs_state->formats[i];
3725bf215546Sopenharmony_ci         unsigned dfmt = format & 0xf;
3726bf215546Sopenharmony_ci         unsigned nfmt = (format >> 4) & 0x7;
3727bf215546Sopenharmony_ci
3728bf215546Sopenharmony_ci         rsrc_word3 = vs_state->post_shuffle & (1u << i) ? DST_SEL_ZYXW : data_format_dst_sel[dfmt];
3729bf215546Sopenharmony_ci
3730bf215546Sopenharmony_ci         if (chip >= GFX10)
3731bf215546Sopenharmony_ci            rsrc_word3 |= S_008F0C_FORMAT(ac_get_tbuffer_format(chip, dfmt, nfmt));
3732bf215546Sopenharmony_ci         else
3733bf215546Sopenharmony_ci            rsrc_word3 |= S_008F0C_NUM_FORMAT(nfmt) | S_008F0C_DATA_FORMAT(dfmt);
3734bf215546Sopenharmony_ci      } else {
3735bf215546Sopenharmony_ci         if (chip >= GFX10)
3736bf215546Sopenharmony_ci            rsrc_word3 = DST_SEL_XYZW | S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_UINT);
3737bf215546Sopenharmony_ci         else
3738bf215546Sopenharmony_ci            rsrc_word3 = DST_SEL_XYZW | S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_UINT) |
3739bf215546Sopenharmony_ci                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3740bf215546Sopenharmony_ci      }
3741bf215546Sopenharmony_ci
3742bf215546Sopenharmony_ci      if (pipeline->uses_dynamic_stride) {
3743bf215546Sopenharmony_ci         stride = cmd_buffer->vertex_bindings[binding].stride;
3744bf215546Sopenharmony_ci      } else {
3745bf215546Sopenharmony_ci         stride = pipeline->binding_stride[binding];
3746bf215546Sopenharmony_ci      }
3747bf215546Sopenharmony_ci
3748bf215546Sopenharmony_ci      if (!buffer) {
3749bf215546Sopenharmony_ci         if (full_null_descriptors) {
3750bf215546Sopenharmony_ci            /* Put all the info in for the DGC generation shader in case the VBO gets overridden. */
3751bf215546Sopenharmony_ci            desc[0] = 0;
3752bf215546Sopenharmony_ci            desc[1] = S_008F04_STRIDE(stride);
3753bf215546Sopenharmony_ci            desc[2] = 0;
3754bf215546Sopenharmony_ci            desc[3] = rsrc_word3;
3755bf215546Sopenharmony_ci         } else if (vs_state) {
3756bf215546Sopenharmony_ci            /* Stride needs to be non-zero on GFX9, or else bounds checking is disabled. We need
3757bf215546Sopenharmony_ci             * to include the format/word3 so that the alpha channel is 1 for formats without an
3758bf215546Sopenharmony_ci             * alpha channel.
3759bf215546Sopenharmony_ci             */
3760bf215546Sopenharmony_ci            desc[0] = 0;
3761bf215546Sopenharmony_ci            desc[1] = S_008F04_STRIDE(16);
3762bf215546Sopenharmony_ci            desc[2] = 0;
3763bf215546Sopenharmony_ci            desc[3] = rsrc_word3;
3764bf215546Sopenharmony_ci         } else {
3765bf215546Sopenharmony_ci            memset(desc, 0, 4 * 4);
3766bf215546Sopenharmony_ci         }
3767bf215546Sopenharmony_ci
3768bf215546Sopenharmony_ci         continue;
3769bf215546Sopenharmony_ci      }
3770bf215546Sopenharmony_ci
3771bf215546Sopenharmony_ci      va = radv_buffer_get_va(buffer->bo);
3772bf215546Sopenharmony_ci
3773bf215546Sopenharmony_ci      offset = cmd_buffer->vertex_bindings[binding].offset;
3774bf215546Sopenharmony_ci      va += offset + buffer->offset;
3775bf215546Sopenharmony_ci      if (vs_state)
3776bf215546Sopenharmony_ci         va += vs_state->offsets[i];
3777bf215546Sopenharmony_ci
3778bf215546Sopenharmony_ci      if (cmd_buffer->vertex_bindings[binding].size) {
3779bf215546Sopenharmony_ci         num_records = cmd_buffer->vertex_bindings[binding].size;
3780bf215546Sopenharmony_ci      } else {
3781bf215546Sopenharmony_ci         num_records = vk_buffer_range(&buffer->vk, offset, VK_WHOLE_SIZE);
3782bf215546Sopenharmony_ci      }
3783bf215546Sopenharmony_ci
3784bf215546Sopenharmony_ci      if (pipeline->use_per_attribute_vb_descs) {
3785bf215546Sopenharmony_ci         uint32_t attrib_end =
3786bf215546Sopenharmony_ci            vs_state ? vs_state->offsets[i] + vs_state->format_sizes[i] : pipeline->attrib_ends[i];
3787bf215546Sopenharmony_ci
3788bf215546Sopenharmony_ci         if (num_records < attrib_end) {
3789bf215546Sopenharmony_ci            num_records = 0; /* not enough space for one vertex */
3790bf215546Sopenharmony_ci         } else if (stride == 0) {
3791bf215546Sopenharmony_ci            num_records = 1; /* only one vertex */
3792bf215546Sopenharmony_ci         } else {
3793bf215546Sopenharmony_ci            num_records = (num_records - attrib_end) / stride + 1;
3794bf215546Sopenharmony_ci            /* If attrib_offset>stride, then the compiler will increase the vertex index by
3795bf215546Sopenharmony_ci             * attrib_offset/stride and decrease the offset by attrib_offset%stride. This is
3796bf215546Sopenharmony_ci             * only allowed with static strides.
3797bf215546Sopenharmony_ci             */
3798bf215546Sopenharmony_ci            num_records += pipeline->attrib_index_offset[i];
3799bf215546Sopenharmony_ci         }
3800bf215546Sopenharmony_ci
3801bf215546Sopenharmony_ci         /* GFX10 uses OOB_SELECT_RAW if stride==0, so convert num_records from elements into
3802bf215546Sopenharmony_ci          * into bytes in that case. GFX8 always uses bytes.
3803bf215546Sopenharmony_ci          */
3804bf215546Sopenharmony_ci         if (num_records && (chip == GFX8 || (chip != GFX9 && !stride))) {
3805bf215546Sopenharmony_ci            num_records = (num_records - 1) * stride + attrib_end;
3806bf215546Sopenharmony_ci         } else if (!num_records) {
3807bf215546Sopenharmony_ci            /* On GFX9, it seems bounds checking is disabled if both
3808bf215546Sopenharmony_ci             * num_records and stride are zero. This doesn't seem necessary on GFX8, GFX10 and
3809bf215546Sopenharmony_ci             * GFX10.3 but it doesn't hurt.
3810bf215546Sopenharmony_ci             */
3811bf215546Sopenharmony_ci            if (full_null_descriptors) {
3812bf215546Sopenharmony_ci               /* Put all the info in for the DGC generation shader in case the VBO gets overridden.
3813bf215546Sopenharmony_ci                */
3814bf215546Sopenharmony_ci               desc[0] = 0;
3815bf215546Sopenharmony_ci               desc[1] = S_008F04_STRIDE(stride);
3816bf215546Sopenharmony_ci               desc[2] = 0;
3817bf215546Sopenharmony_ci               desc[3] = rsrc_word3;
3818bf215546Sopenharmony_ci            } else if (vs_state) {
3819bf215546Sopenharmony_ci               desc[0] = 0;
3820bf215546Sopenharmony_ci               desc[1] = S_008F04_STRIDE(16);
3821bf215546Sopenharmony_ci               desc[2] = 0;
3822bf215546Sopenharmony_ci               desc[3] = rsrc_word3;
3823bf215546Sopenharmony_ci            } else {
3824bf215546Sopenharmony_ci               memset(desc, 0, 16);
3825bf215546Sopenharmony_ci            }
3826bf215546Sopenharmony_ci
3827bf215546Sopenharmony_ci            continue;
3828bf215546Sopenharmony_ci         }
3829bf215546Sopenharmony_ci      } else {
3830bf215546Sopenharmony_ci         if (chip != GFX8 && stride)
3831bf215546Sopenharmony_ci            num_records = DIV_ROUND_UP(num_records, stride);
3832bf215546Sopenharmony_ci      }
3833bf215546Sopenharmony_ci
3834bf215546Sopenharmony_ci      if (chip >= GFX10) {
3835bf215546Sopenharmony_ci         /* OOB_SELECT chooses the out-of-bounds check:
3836bf215546Sopenharmony_ci          * - 1: index >= NUM_RECORDS (Structured)
3837bf215546Sopenharmony_ci          * - 3: offset >= NUM_RECORDS (Raw)
3838bf215546Sopenharmony_ci          */
3839bf215546Sopenharmony_ci         int oob_select = stride ? V_008F0C_OOB_SELECT_STRUCTURED : V_008F0C_OOB_SELECT_RAW;
3840bf215546Sopenharmony_ci         rsrc_word3 |= S_008F0C_OOB_SELECT(oob_select) | S_008F0C_RESOURCE_LEVEL(chip < GFX11);
3841bf215546Sopenharmony_ci      }
3842bf215546Sopenharmony_ci
3843bf215546Sopenharmony_ci      desc[0] = va;
3844bf215546Sopenharmony_ci      desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
3845bf215546Sopenharmony_ci      desc[2] = num_records;
3846bf215546Sopenharmony_ci      desc[3] = rsrc_word3;
3847bf215546Sopenharmony_ci   }
3848bf215546Sopenharmony_ci}
3849bf215546Sopenharmony_ci
3850bf215546Sopenharmony_cistatic void
3851bf215546Sopenharmony_ciradv_flush_vertex_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
3852bf215546Sopenharmony_ci{
3853bf215546Sopenharmony_ci   if ((pipeline_is_dirty || (cmd_buffer->state.dirty & RADV_CMD_DIRTY_VERTEX_BUFFER)) &&
3854bf215546Sopenharmony_ci       cmd_buffer->state.graphics_pipeline->vb_desc_usage_mask) {
3855bf215546Sopenharmony_ci      /* Mesh shaders don't have vertex descriptors. */
3856bf215546Sopenharmony_ci      assert(!cmd_buffer->state.mesh_shading);
3857bf215546Sopenharmony_ci
3858bf215546Sopenharmony_ci      struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
3859bf215546Sopenharmony_ci      unsigned vb_offset;
3860bf215546Sopenharmony_ci      void *vb_ptr;
3861bf215546Sopenharmony_ci      uint64_t va;
3862bf215546Sopenharmony_ci
3863bf215546Sopenharmony_ci      /* allocate some descriptor state for vertex buffers */
3864bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_alloc(cmd_buffer, pipeline->vb_desc_alloc_size, &vb_offset,
3865bf215546Sopenharmony_ci                                        &vb_ptr))
3866bf215546Sopenharmony_ci         return;
3867bf215546Sopenharmony_ci
3868bf215546Sopenharmony_ci      radv_write_vertex_descriptors(cmd_buffer, pipeline, false, vb_ptr);
3869bf215546Sopenharmony_ci
3870bf215546Sopenharmony_ci      va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
3871bf215546Sopenharmony_ci      va += vb_offset;
3872bf215546Sopenharmony_ci
3873bf215546Sopenharmony_ci      radv_emit_userdata_address(cmd_buffer->device, cmd_buffer->cs, &pipeline->base,
3874bf215546Sopenharmony_ci                                 MESA_SHADER_VERTEX, AC_UD_VS_VERTEX_BUFFERS, va);
3875bf215546Sopenharmony_ci
3876bf215546Sopenharmony_ci      cmd_buffer->state.vb_va = va;
3877bf215546Sopenharmony_ci      cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_VBO_DESCRIPTORS;
3878bf215546Sopenharmony_ci
3879bf215546Sopenharmony_ci      if (unlikely(cmd_buffer->device->trace_bo))
3880bf215546Sopenharmony_ci         radv_save_vertex_descriptors(cmd_buffer, (uintptr_t)vb_ptr);
3881bf215546Sopenharmony_ci   }
3882bf215546Sopenharmony_ci   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_VERTEX_BUFFER;
3883bf215546Sopenharmony_ci}
3884bf215546Sopenharmony_ci
3885bf215546Sopenharmony_cistatic void
3886bf215546Sopenharmony_ciradv_emit_streamout_buffers(struct radv_cmd_buffer *cmd_buffer, uint64_t va)
3887bf215546Sopenharmony_ci{
3888bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
3889bf215546Sopenharmony_ci   struct radv_userdata_info *loc;
3890bf215546Sopenharmony_ci   uint32_t base_reg;
3891bf215546Sopenharmony_ci
3892bf215546Sopenharmony_ci   for (unsigned stage = 0; stage < MESA_VULKAN_SHADER_STAGES; ++stage) {
3893bf215546Sopenharmony_ci      if (!radv_get_shader(&pipeline->base, stage))
3894bf215546Sopenharmony_ci         continue;
3895bf215546Sopenharmony_ci
3896bf215546Sopenharmony_ci      loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_STREAMOUT_BUFFERS);
3897bf215546Sopenharmony_ci      if (loc->sgpr_idx == -1)
3898bf215546Sopenharmony_ci         continue;
3899bf215546Sopenharmony_ci
3900bf215546Sopenharmony_ci      base_reg = pipeline->base.user_data_0[stage];
3901bf215546Sopenharmony_ci
3902bf215546Sopenharmony_ci      radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, va,
3903bf215546Sopenharmony_ci                               false);
3904bf215546Sopenharmony_ci   }
3905bf215546Sopenharmony_ci
3906bf215546Sopenharmony_ci   if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) {
3907bf215546Sopenharmony_ci      loc = &pipeline->base.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_STREAMOUT_BUFFERS];
3908bf215546Sopenharmony_ci      if (loc->sgpr_idx != -1) {
3909bf215546Sopenharmony_ci         base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
3910bf215546Sopenharmony_ci
3911bf215546Sopenharmony_ci         radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, base_reg + loc->sgpr_idx * 4,
3912bf215546Sopenharmony_ci                                  va, false);
3913bf215546Sopenharmony_ci      }
3914bf215546Sopenharmony_ci   }
3915bf215546Sopenharmony_ci}
3916bf215546Sopenharmony_ci
3917bf215546Sopenharmony_cistatic void
3918bf215546Sopenharmony_ciradv_flush_streamout_descriptors(struct radv_cmd_buffer *cmd_buffer)
3919bf215546Sopenharmony_ci{
3920bf215546Sopenharmony_ci   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_STREAMOUT_BUFFER) {
3921bf215546Sopenharmony_ci      struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
3922bf215546Sopenharmony_ci      struct radv_streamout_state *so = &cmd_buffer->state.streamout;
3923bf215546Sopenharmony_ci      unsigned so_offset;
3924bf215546Sopenharmony_ci      void *so_ptr;
3925bf215546Sopenharmony_ci      uint64_t va;
3926bf215546Sopenharmony_ci
3927bf215546Sopenharmony_ci      /* Allocate some descriptor state for streamout buffers. */
3928bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_alloc(cmd_buffer, MAX_SO_BUFFERS * 16, &so_offset, &so_ptr))
3929bf215546Sopenharmony_ci         return;
3930bf215546Sopenharmony_ci
3931bf215546Sopenharmony_ci      for (uint32_t i = 0; i < MAX_SO_BUFFERS; i++) {
3932bf215546Sopenharmony_ci         struct radv_buffer *buffer = sb[i].buffer;
3933bf215546Sopenharmony_ci         uint32_t *desc = &((uint32_t *)so_ptr)[i * 4];
3934bf215546Sopenharmony_ci
3935bf215546Sopenharmony_ci         if (!(so->enabled_mask & (1 << i)))
3936bf215546Sopenharmony_ci            continue;
3937bf215546Sopenharmony_ci
3938bf215546Sopenharmony_ci         va = radv_buffer_get_va(buffer->bo) + buffer->offset;
3939bf215546Sopenharmony_ci
3940bf215546Sopenharmony_ci         va += sb[i].offset;
3941bf215546Sopenharmony_ci
3942bf215546Sopenharmony_ci         /* Set the descriptor.
3943bf215546Sopenharmony_ci          *
3944bf215546Sopenharmony_ci          * On GFX8, the format must be non-INVALID, otherwise
3945bf215546Sopenharmony_ci          * the buffer will be considered not bound and store
3946bf215546Sopenharmony_ci          * instructions will be no-ops.
3947bf215546Sopenharmony_ci          */
3948bf215546Sopenharmony_ci         uint32_t size = 0xffffffff;
3949bf215546Sopenharmony_ci
3950bf215546Sopenharmony_ci         /* Compute the correct buffer size for NGG streamout
3951bf215546Sopenharmony_ci          * because it's used to determine the max emit per
3952bf215546Sopenharmony_ci          * buffer.
3953bf215546Sopenharmony_ci          */
3954bf215546Sopenharmony_ci         if (cmd_buffer->device->physical_device->use_ngg_streamout)
3955bf215546Sopenharmony_ci            size = buffer->vk.size - sb[i].offset;
3956bf215546Sopenharmony_ci
3957bf215546Sopenharmony_ci         uint32_t rsrc_word3 =
3958bf215546Sopenharmony_ci            S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3959bf215546Sopenharmony_ci            S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3960bf215546Sopenharmony_ci
3961bf215546Sopenharmony_ci         if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
3962bf215546Sopenharmony_ci            rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
3963bf215546Sopenharmony_ci                          S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
3964bf215546Sopenharmony_ci         } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
3965bf215546Sopenharmony_ci            rsrc_word3 |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3966bf215546Sopenharmony_ci                          S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3967bf215546Sopenharmony_ci         } else {
3968bf215546Sopenharmony_ci            rsrc_word3 |= S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3969bf215546Sopenharmony_ci         }
3970bf215546Sopenharmony_ci
3971bf215546Sopenharmony_ci         desc[0] = va;
3972bf215546Sopenharmony_ci         desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
3973bf215546Sopenharmony_ci         desc[2] = size;
3974bf215546Sopenharmony_ci         desc[3] = rsrc_word3;
3975bf215546Sopenharmony_ci      }
3976bf215546Sopenharmony_ci
3977bf215546Sopenharmony_ci      va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
3978bf215546Sopenharmony_ci      va += so_offset;
3979bf215546Sopenharmony_ci
3980bf215546Sopenharmony_ci      radv_emit_streamout_buffers(cmd_buffer, va);
3981bf215546Sopenharmony_ci   }
3982bf215546Sopenharmony_ci
3983bf215546Sopenharmony_ci   cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_STREAMOUT_BUFFER;
3984bf215546Sopenharmony_ci}
3985bf215546Sopenharmony_ci
3986bf215546Sopenharmony_cistatic void
3987bf215546Sopenharmony_ciradv_flush_ngg_query_state(struct radv_cmd_buffer *cmd_buffer)
3988bf215546Sopenharmony_ci{
3989bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
3990bf215546Sopenharmony_ci   const unsigned stage = pipeline->last_vgt_api_stage;
3991bf215546Sopenharmony_ci   struct radv_userdata_info *loc;
3992bf215546Sopenharmony_ci   uint32_t ngg_query_state = 0;
3993bf215546Sopenharmony_ci   uint32_t base_reg;
3994bf215546Sopenharmony_ci
3995bf215546Sopenharmony_ci   loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_NGG_QUERY_STATE);
3996bf215546Sopenharmony_ci   if (loc->sgpr_idx == -1)
3997bf215546Sopenharmony_ci      return;
3998bf215546Sopenharmony_ci
3999bf215546Sopenharmony_ci   assert(pipeline->is_ngg);
4000bf215546Sopenharmony_ci
4001bf215546Sopenharmony_ci   /* By default NGG queries are disabled but they are enabled if the command buffer has active GDS
4002bf215546Sopenharmony_ci    * queries or if it's a secondary command buffer that inherits the number of generated
4003bf215546Sopenharmony_ci    * primitives.
4004bf215546Sopenharmony_ci    */
4005bf215546Sopenharmony_ci   if (cmd_buffer->state.active_pipeline_gds_queries ||
4006bf215546Sopenharmony_ci       (cmd_buffer->state.inherited_pipeline_statistics &
4007bf215546Sopenharmony_ci        VK_QUERY_PIPELINE_STATISTIC_GEOMETRY_SHADER_PRIMITIVES_BIT))
4008bf215546Sopenharmony_ci      ngg_query_state = 1;
4009bf215546Sopenharmony_ci
4010bf215546Sopenharmony_ci   base_reg = pipeline->base.user_data_0[stage];
4011bf215546Sopenharmony_ci   assert(loc->sgpr_idx != -1);
4012bf215546Sopenharmony_ci
4013bf215546Sopenharmony_ci   radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, ngg_query_state);
4014bf215546Sopenharmony_ci}
4015bf215546Sopenharmony_ci
4016bf215546Sopenharmony_cistatic void
4017bf215546Sopenharmony_ciradv_flush_force_vrs_state(struct radv_cmd_buffer *cmd_buffer)
4018bf215546Sopenharmony_ci{
4019bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
4020bf215546Sopenharmony_ci   enum amd_gfx_level gfx_level = pipeline->base.device->physical_device->rad_info.gfx_level;
4021bf215546Sopenharmony_ci   const unsigned stage = pipeline->last_vgt_api_stage;
4022bf215546Sopenharmony_ci   struct radv_userdata_info *loc;
4023bf215546Sopenharmony_ci   uint32_t vrs_rates = 0;
4024bf215546Sopenharmony_ci   uint32_t base_reg;
4025bf215546Sopenharmony_ci
4026bf215546Sopenharmony_ci   if (!pipeline->force_vrs_per_vertex) {
4027bf215546Sopenharmony_ci      /* Un-set the SGPR index so we know to re-emit it later. */
4028bf215546Sopenharmony_ci      cmd_buffer->state.last_vrs_rates_sgpr_idx = -1;
4029bf215546Sopenharmony_ci      return;
4030bf215546Sopenharmony_ci   }
4031bf215546Sopenharmony_ci
4032bf215546Sopenharmony_ci   loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_FORCE_VRS_RATES);
4033bf215546Sopenharmony_ci   assert(loc->sgpr_idx != -1);
4034bf215546Sopenharmony_ci
4035bf215546Sopenharmony_ci   base_reg = pipeline->base.user_data_0[stage];
4036bf215546Sopenharmony_ci
4037bf215546Sopenharmony_ci   switch (cmd_buffer->device->force_vrs) {
4038bf215546Sopenharmony_ci   case RADV_FORCE_VRS_2x2:
4039bf215546Sopenharmony_ci      vrs_rates = gfx_level >= GFX11 ? V_0283D0_VRS_SHADING_RATE_2X2 : (1u << 2) | (1u << 4);
4040bf215546Sopenharmony_ci      break;
4041bf215546Sopenharmony_ci   case RADV_FORCE_VRS_2x1:
4042bf215546Sopenharmony_ci      vrs_rates = gfx_level >= GFX11 ? V_0283D0_VRS_SHADING_RATE_2X1 : (1u << 2) | (0u << 4);
4043bf215546Sopenharmony_ci      break;
4044bf215546Sopenharmony_ci   case RADV_FORCE_VRS_1x2:
4045bf215546Sopenharmony_ci      vrs_rates = gfx_level >= GFX11 ? V_0283D0_VRS_SHADING_RATE_1X2 : (0u << 2) | (1u << 4);
4046bf215546Sopenharmony_ci      break;
4047bf215546Sopenharmony_ci   default:
4048bf215546Sopenharmony_ci      break;
4049bf215546Sopenharmony_ci   }
4050bf215546Sopenharmony_ci
4051bf215546Sopenharmony_ci   if (cmd_buffer->state.last_vrs_rates != vrs_rates ||
4052bf215546Sopenharmony_ci       cmd_buffer->state.last_vrs_rates_sgpr_idx != loc->sgpr_idx) {
4053bf215546Sopenharmony_ci      radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, vrs_rates);
4054bf215546Sopenharmony_ci   }
4055bf215546Sopenharmony_ci
4056bf215546Sopenharmony_ci   cmd_buffer->state.last_vrs_rates = vrs_rates;
4057bf215546Sopenharmony_ci   cmd_buffer->state.last_vrs_rates_sgpr_idx = loc->sgpr_idx;
4058bf215546Sopenharmony_ci}
4059bf215546Sopenharmony_ci
4060bf215546Sopenharmony_cistatic void
4061bf215546Sopenharmony_ciradv_upload_graphics_shader_descriptors(struct radv_cmd_buffer *cmd_buffer, bool pipeline_is_dirty)
4062bf215546Sopenharmony_ci{
4063bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
4064bf215546Sopenharmony_ci
4065bf215546Sopenharmony_ci   radv_flush_vertex_descriptors(cmd_buffer, pipeline_is_dirty);
4066bf215546Sopenharmony_ci   radv_flush_streamout_descriptors(cmd_buffer);
4067bf215546Sopenharmony_ci
4068bf215546Sopenharmony_ci   VkShaderStageFlags stages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_MESH_BIT_NV;
4069bf215546Sopenharmony_ci   radv_flush_descriptors(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS);
4070bf215546Sopenharmony_ci   radv_flush_constants(cmd_buffer, stages, &pipeline->base, VK_PIPELINE_BIND_POINT_GRAPHICS);
4071bf215546Sopenharmony_ci   radv_flush_ngg_query_state(cmd_buffer);
4072bf215546Sopenharmony_ci   radv_flush_force_vrs_state(cmd_buffer);
4073bf215546Sopenharmony_ci}
4074bf215546Sopenharmony_ci
4075bf215546Sopenharmony_cistruct radv_draw_info {
4076bf215546Sopenharmony_ci   /**
4077bf215546Sopenharmony_ci    * Number of vertices.
4078bf215546Sopenharmony_ci    */
4079bf215546Sopenharmony_ci   uint32_t count;
4080bf215546Sopenharmony_ci
4081bf215546Sopenharmony_ci   /**
4082bf215546Sopenharmony_ci    * First instance id.
4083bf215546Sopenharmony_ci    */
4084bf215546Sopenharmony_ci   uint32_t first_instance;
4085bf215546Sopenharmony_ci
4086bf215546Sopenharmony_ci   /**
4087bf215546Sopenharmony_ci    * Number of instances.
4088bf215546Sopenharmony_ci    */
4089bf215546Sopenharmony_ci   uint32_t instance_count;
4090bf215546Sopenharmony_ci
4091bf215546Sopenharmony_ci   /**
4092bf215546Sopenharmony_ci    * Whether it's an indexed draw.
4093bf215546Sopenharmony_ci    */
4094bf215546Sopenharmony_ci   bool indexed;
4095bf215546Sopenharmony_ci
4096bf215546Sopenharmony_ci   /**
4097bf215546Sopenharmony_ci    * Indirect draw parameters resource.
4098bf215546Sopenharmony_ci    */
4099bf215546Sopenharmony_ci   struct radv_buffer *indirect;
4100bf215546Sopenharmony_ci   uint64_t indirect_offset;
4101bf215546Sopenharmony_ci   uint32_t stride;
4102bf215546Sopenharmony_ci
4103bf215546Sopenharmony_ci   /**
4104bf215546Sopenharmony_ci    * Draw count parameters resource.
4105bf215546Sopenharmony_ci    */
4106bf215546Sopenharmony_ci   struct radv_buffer *count_buffer;
4107bf215546Sopenharmony_ci   uint64_t count_buffer_offset;
4108bf215546Sopenharmony_ci
4109bf215546Sopenharmony_ci   /**
4110bf215546Sopenharmony_ci    * Stream output parameters resource.
4111bf215546Sopenharmony_ci    */
4112bf215546Sopenharmony_ci   struct radv_buffer *strmout_buffer;
4113bf215546Sopenharmony_ci   uint64_t strmout_buffer_offset;
4114bf215546Sopenharmony_ci};
4115bf215546Sopenharmony_ci
4116bf215546Sopenharmony_cistatic uint32_t
4117bf215546Sopenharmony_ciradv_get_primitive_reset_index(struct radv_cmd_buffer *cmd_buffer)
4118bf215546Sopenharmony_ci{
4119bf215546Sopenharmony_ci   uint32_t index_type = G_028A7C_INDEX_TYPE(cmd_buffer->state.index_type);
4120bf215546Sopenharmony_ci   switch (index_type) {
4121bf215546Sopenharmony_ci   case V_028A7C_VGT_INDEX_8:
4122bf215546Sopenharmony_ci      return 0xffu;
4123bf215546Sopenharmony_ci   case V_028A7C_VGT_INDEX_16:
4124bf215546Sopenharmony_ci      return 0xffffu;
4125bf215546Sopenharmony_ci   case V_028A7C_VGT_INDEX_32:
4126bf215546Sopenharmony_ci      return 0xffffffffu;
4127bf215546Sopenharmony_ci   default:
4128bf215546Sopenharmony_ci      unreachable("invalid index type");
4129bf215546Sopenharmony_ci   }
4130bf215546Sopenharmony_ci}
4131bf215546Sopenharmony_ci
4132bf215546Sopenharmony_cistatic void
4133bf215546Sopenharmony_cisi_emit_ia_multi_vgt_param(struct radv_cmd_buffer *cmd_buffer, bool instanced_draw,
4134bf215546Sopenharmony_ci                           bool indirect_draw, bool count_from_stream_output,
4135bf215546Sopenharmony_ci                           uint32_t draw_vertex_count)
4136bf215546Sopenharmony_ci{
4137bf215546Sopenharmony_ci   struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
4138bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
4139bf215546Sopenharmony_ci   unsigned topology = state->dynamic.primitive_topology;
4140bf215546Sopenharmony_ci   bool prim_restart_enable = state->dynamic.primitive_restart_enable;
4141bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
4142bf215546Sopenharmony_ci   unsigned ia_multi_vgt_param;
4143bf215546Sopenharmony_ci
4144bf215546Sopenharmony_ci   ia_multi_vgt_param =
4145bf215546Sopenharmony_ci      si_get_ia_multi_vgt_param(cmd_buffer, instanced_draw, indirect_draw, count_from_stream_output,
4146bf215546Sopenharmony_ci                                draw_vertex_count, topology, prim_restart_enable);
4147bf215546Sopenharmony_ci
4148bf215546Sopenharmony_ci   if (state->last_ia_multi_vgt_param != ia_multi_vgt_param) {
4149bf215546Sopenharmony_ci      if (info->gfx_level == GFX9) {
4150bf215546Sopenharmony_ci         radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
4151bf215546Sopenharmony_ci                                    R_030960_IA_MULTI_VGT_PARAM, 4, ia_multi_vgt_param);
4152bf215546Sopenharmony_ci      } else if (info->gfx_level >= GFX7) {
4153bf215546Sopenharmony_ci         radeon_set_context_reg_idx(cs, R_028AA8_IA_MULTI_VGT_PARAM, 1, ia_multi_vgt_param);
4154bf215546Sopenharmony_ci      } else {
4155bf215546Sopenharmony_ci         radeon_set_context_reg(cs, R_028AA8_IA_MULTI_VGT_PARAM, ia_multi_vgt_param);
4156bf215546Sopenharmony_ci      }
4157bf215546Sopenharmony_ci      state->last_ia_multi_vgt_param = ia_multi_vgt_param;
4158bf215546Sopenharmony_ci   }
4159bf215546Sopenharmony_ci}
4160bf215546Sopenharmony_ci
4161bf215546Sopenharmony_cistatic void
4162bf215546Sopenharmony_ciradv_emit_draw_registers(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
4163bf215546Sopenharmony_ci{
4164bf215546Sopenharmony_ci   struct radeon_info *info = &cmd_buffer->device->physical_device->rad_info;
4165bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
4166bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
4167bf215546Sopenharmony_ci   uint32_t topology = state->dynamic.primitive_topology;
4168bf215546Sopenharmony_ci   bool disable_instance_packing = false;
4169bf215546Sopenharmony_ci
4170bf215546Sopenharmony_ci   /* Draw state. */
4171bf215546Sopenharmony_ci   if (info->gfx_level < GFX10) {
4172bf215546Sopenharmony_ci      si_emit_ia_multi_vgt_param(cmd_buffer, draw_info->instance_count > 1, draw_info->indirect,
4173bf215546Sopenharmony_ci                                 !!draw_info->strmout_buffer,
4174bf215546Sopenharmony_ci                                 draw_info->indirect ? 0 : draw_info->count);
4175bf215546Sopenharmony_ci   }
4176bf215546Sopenharmony_ci
4177bf215546Sopenharmony_ci   if (state->dynamic.primitive_restart_enable) {
4178bf215546Sopenharmony_ci      uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
4179bf215546Sopenharmony_ci
4180bf215546Sopenharmony_ci      if (primitive_reset_index != state->last_primitive_reset_index) {
4181bf215546Sopenharmony_ci         radeon_set_context_reg(cs, R_02840C_VGT_MULTI_PRIM_IB_RESET_INDX, primitive_reset_index);
4182bf215546Sopenharmony_ci         state->last_primitive_reset_index = primitive_reset_index;
4183bf215546Sopenharmony_ci      }
4184bf215546Sopenharmony_ci   }
4185bf215546Sopenharmony_ci
4186bf215546Sopenharmony_ci   if (draw_info->strmout_buffer) {
4187bf215546Sopenharmony_ci      uint64_t va = radv_buffer_get_va(draw_info->strmout_buffer->bo);
4188bf215546Sopenharmony_ci
4189bf215546Sopenharmony_ci      va += draw_info->strmout_buffer->offset + draw_info->strmout_buffer_offset;
4190bf215546Sopenharmony_ci
4191bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE, draw_info->stride);
4192bf215546Sopenharmony_ci
4193bf215546Sopenharmony_ci      if (info->gfx_level >= GFX10) {
4194bf215546Sopenharmony_ci         /* Emitting a COPY_DATA packet should be enough because RADV doesn't support preemption
4195bf215546Sopenharmony_ci          * (shadow memory) but for unknown reasons, it can lead to GPU hangs on GFX10+.
4196bf215546Sopenharmony_ci          */
4197bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
4198bf215546Sopenharmony_ci         radeon_emit(cs, 0);
4199bf215546Sopenharmony_ci
4200bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_LOAD_CONTEXT_REG_INDEX, 3, 0));
4201bf215546Sopenharmony_ci         radeon_emit(cs, va);
4202bf215546Sopenharmony_ci         radeon_emit(cs, va >> 32);
4203bf215546Sopenharmony_ci         radeon_emit(cs, (R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE - SI_CONTEXT_REG_OFFSET) >> 2);
4204bf215546Sopenharmony_ci         radeon_emit(cs, 1); /* 1 DWORD */
4205bf215546Sopenharmony_ci      } else {
4206bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
4207bf215546Sopenharmony_ci         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_REG) |
4208bf215546Sopenharmony_ci                         COPY_DATA_WR_CONFIRM);
4209bf215546Sopenharmony_ci         radeon_emit(cs, va);
4210bf215546Sopenharmony_ci         radeon_emit(cs, va >> 32);
4211bf215546Sopenharmony_ci         radeon_emit(cs, R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2);
4212bf215546Sopenharmony_ci         radeon_emit(cs, 0); /* unused */
4213bf215546Sopenharmony_ci      }
4214bf215546Sopenharmony_ci
4215bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cs, draw_info->strmout_buffer->bo);
4216bf215546Sopenharmony_ci   }
4217bf215546Sopenharmony_ci
4218bf215546Sopenharmony_ci   /* RDNA2 is affected by a hardware bug when instance packing is enabled for adjacent primitive
4219bf215546Sopenharmony_ci    * topologies and instance_count > 1, pipeline stats generated by GE are incorrect. It needs to
4220bf215546Sopenharmony_ci    * be applied for indexed and non-indexed draws.
4221bf215546Sopenharmony_ci    */
4222bf215546Sopenharmony_ci   if (info->gfx_level == GFX10_3 && state->active_pipeline_queries > 0 &&
4223bf215546Sopenharmony_ci       (draw_info->instance_count > 1 || draw_info->indirect) &&
4224bf215546Sopenharmony_ci       (topology == V_008958_DI_PT_LINELIST_ADJ || topology == V_008958_DI_PT_LINESTRIP_ADJ ||
4225bf215546Sopenharmony_ci        topology == V_008958_DI_PT_TRILIST_ADJ || topology == V_008958_DI_PT_TRISTRIP_ADJ)) {
4226bf215546Sopenharmony_ci      disable_instance_packing = true;
4227bf215546Sopenharmony_ci   }
4228bf215546Sopenharmony_ci
4229bf215546Sopenharmony_ci   if ((draw_info->indexed && state->index_type != state->last_index_type) ||
4230bf215546Sopenharmony_ci       (info->gfx_level == GFX10_3 &&
4231bf215546Sopenharmony_ci        (state->last_index_type == -1 ||
4232bf215546Sopenharmony_ci         disable_instance_packing != G_028A7C_DISABLE_INSTANCE_PACKING(state->last_index_type)))) {
4233bf215546Sopenharmony_ci      uint32_t index_type = state->index_type | S_028A7C_DISABLE_INSTANCE_PACKING(disable_instance_packing);
4234bf215546Sopenharmony_ci
4235bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
4236bf215546Sopenharmony_ci         radeon_set_uconfig_reg_idx(cmd_buffer->device->physical_device, cs,
4237bf215546Sopenharmony_ci                                    R_03090C_VGT_INDEX_TYPE, 2, index_type);
4238bf215546Sopenharmony_ci      } else {
4239bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_INDEX_TYPE, 0, 0));
4240bf215546Sopenharmony_ci         radeon_emit(cs, index_type);
4241bf215546Sopenharmony_ci      }
4242bf215546Sopenharmony_ci
4243bf215546Sopenharmony_ci      state->last_index_type = index_type;
4244bf215546Sopenharmony_ci   }
4245bf215546Sopenharmony_ci}
4246bf215546Sopenharmony_ci
4247bf215546Sopenharmony_cistatic void
4248bf215546Sopenharmony_ciradv_stage_flush(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 src_stage_mask)
4249bf215546Sopenharmony_ci{
4250bf215546Sopenharmony_ci   /* For simplicity, if the barrier wants to wait for the task shader,
4251bf215546Sopenharmony_ci    * just make it wait for the mesh shader too.
4252bf215546Sopenharmony_ci    */
4253bf215546Sopenharmony_ci   if (src_stage_mask & VK_PIPELINE_STAGE_2_TASK_SHADER_BIT_NV)
4254bf215546Sopenharmony_ci      src_stage_mask |= VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV;
4255bf215546Sopenharmony_ci
4256bf215546Sopenharmony_ci   if (src_stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT |
4257bf215546Sopenharmony_ci                         VK_PIPELINE_STAGE_2_RESOLVE_BIT |
4258bf215546Sopenharmony_ci                         VK_PIPELINE_STAGE_2_BLIT_BIT |
4259bf215546Sopenharmony_ci                         VK_PIPELINE_STAGE_2_CLEAR_BIT)) {
4260bf215546Sopenharmony_ci      /* Be conservative for now. */
4261bf215546Sopenharmony_ci      src_stage_mask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT;
4262bf215546Sopenharmony_ci   }
4263bf215546Sopenharmony_ci
4264bf215546Sopenharmony_ci   if (src_stage_mask &
4265bf215546Sopenharmony_ci       (VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT | VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT |
4266bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_BUILD_BIT_KHR |
4267bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ACCELERATION_STRUCTURE_COPY_BIT_KHR |
4268bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_RAY_TRACING_SHADER_BIT_KHR | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
4269bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
4270bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
4271bf215546Sopenharmony_ci   }
4272bf215546Sopenharmony_ci
4273bf215546Sopenharmony_ci   if (src_stage_mask &
4274bf215546Sopenharmony_ci       (VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT | VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT |
4275bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_LATE_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_COLOR_ATTACHMENT_OUTPUT_BIT |
4276bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
4277bf215546Sopenharmony_ci        VK_PIPELINE_STAGE_2_ALL_GRAPHICS_BIT | VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT)) {
4278bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
4279bf215546Sopenharmony_ci   } else if (src_stage_mask &
4280bf215546Sopenharmony_ci              (VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT |
4281bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
4282bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
4283bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT |
4284bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
4285bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV |
4286bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
4287bf215546Sopenharmony_ci               VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT)) {
4288bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
4289bf215546Sopenharmony_ci   }
4290bf215546Sopenharmony_ci}
4291bf215546Sopenharmony_ci
4292bf215546Sopenharmony_cistatic bool
4293bf215546Sopenharmony_cican_skip_buffer_l2_flushes(struct radv_device *device)
4294bf215546Sopenharmony_ci{
4295bf215546Sopenharmony_ci   return device->physical_device->rad_info.gfx_level == GFX9 ||
4296bf215546Sopenharmony_ci          (device->physical_device->rad_info.gfx_level >= GFX10 &&
4297bf215546Sopenharmony_ci           !device->physical_device->rad_info.tcc_rb_non_coherent);
4298bf215546Sopenharmony_ci}
4299bf215546Sopenharmony_ci
4300bf215546Sopenharmony_ci/*
4301bf215546Sopenharmony_ci * In vulkan barriers have two kinds of operations:
4302bf215546Sopenharmony_ci *
4303bf215546Sopenharmony_ci * - visibility (implemented with radv_src_access_flush)
4304bf215546Sopenharmony_ci * - availability (implemented with radv_dst_access_flush)
4305bf215546Sopenharmony_ci *
4306bf215546Sopenharmony_ci * for a memory operation to observe the result of a previous memory operation
4307bf215546Sopenharmony_ci * one needs to do a visibility operation from the source memory and then an
4308bf215546Sopenharmony_ci * availability operation to the target memory.
4309bf215546Sopenharmony_ci *
4310bf215546Sopenharmony_ci * The complication is the availability and visibility operations do not need to
4311bf215546Sopenharmony_ci * be in the same barrier.
4312bf215546Sopenharmony_ci *
4313bf215546Sopenharmony_ci * The cleanest way to implement this is to define the visibility operation to
4314bf215546Sopenharmony_ci * bring the caches to a "state of rest", which none of the caches below that
4315bf215546Sopenharmony_ci * level dirty.
4316bf215546Sopenharmony_ci *
4317bf215546Sopenharmony_ci * For GFX8 and earlier this would be VRAM/GTT with none of the caches dirty.
4318bf215546Sopenharmony_ci *
4319bf215546Sopenharmony_ci * For GFX9+ we can define the state at rest to be L2 instead of VRAM for all
4320bf215546Sopenharmony_ci * buffers and for images marked as coherent, and VRAM/GTT for non-coherent
4321bf215546Sopenharmony_ci * images. However, given the existence of memory barriers which do not specify
4322bf215546Sopenharmony_ci * the image/buffer it often devolves to just VRAM/GTT anyway.
4323bf215546Sopenharmony_ci *
4324bf215546Sopenharmony_ci * To help reducing the invalidations for GPUs that have L2 coherency between the
4325bf215546Sopenharmony_ci * RB and the shader caches, we always invalidate L2 on the src side, as we can
4326bf215546Sopenharmony_ci * use our knowledge of past usage to optimize flushes away.
4327bf215546Sopenharmony_ci */
4328bf215546Sopenharmony_ci
4329bf215546Sopenharmony_cienum radv_cmd_flush_bits
4330bf215546Sopenharmony_ciradv_src_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 src_flags,
4331bf215546Sopenharmony_ci                      const struct radv_image *image)
4332bf215546Sopenharmony_ci{
4333bf215546Sopenharmony_ci   bool has_CB_meta = true, has_DB_meta = true;
4334bf215546Sopenharmony_ci   bool image_is_coherent = image ? image->l2_coherent : false;
4335bf215546Sopenharmony_ci   enum radv_cmd_flush_bits flush_bits = 0;
4336bf215546Sopenharmony_ci
4337bf215546Sopenharmony_ci   if (image) {
4338bf215546Sopenharmony_ci      if (!radv_image_has_CB_metadata(image))
4339bf215546Sopenharmony_ci         has_CB_meta = false;
4340bf215546Sopenharmony_ci      if (!radv_image_has_htile(image))
4341bf215546Sopenharmony_ci         has_DB_meta = false;
4342bf215546Sopenharmony_ci   }
4343bf215546Sopenharmony_ci
4344bf215546Sopenharmony_ci   u_foreach_bit64(b, src_flags)
4345bf215546Sopenharmony_ci   {
4346bf215546Sopenharmony_ci      switch ((VkAccessFlags2)(1 << b)) {
4347bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_WRITE_BIT:
4348bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
4349bf215546Sopenharmony_ci         /* since the STORAGE bit isn't set we know that this is a meta operation.
4350bf215546Sopenharmony_ci          * on the dst flush side we skip CB/DB flushes without the STORAGE bit, so
4351bf215546Sopenharmony_ci          * set it here. */
4352bf215546Sopenharmony_ci         if (image && !(image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
4353bf215546Sopenharmony_ci            if (vk_format_is_depth_or_stencil(image->vk.format)) {
4354bf215546Sopenharmony_ci               flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
4355bf215546Sopenharmony_ci            } else {
4356bf215546Sopenharmony_ci               flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
4357bf215546Sopenharmony_ci            }
4358bf215546Sopenharmony_ci         }
4359bf215546Sopenharmony_ci
4360bf215546Sopenharmony_ci         if (!image_is_coherent)
4361bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4362bf215546Sopenharmony_ci         break;
4363bf215546Sopenharmony_ci      case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
4364bf215546Sopenharmony_ci      case VK_ACCESS_2_TRANSFORM_FEEDBACK_WRITE_BIT_EXT:
4365bf215546Sopenharmony_ci      case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
4366bf215546Sopenharmony_ci         if (!image_is_coherent)
4367bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_WB_L2;
4368bf215546Sopenharmony_ci         break;
4369bf215546Sopenharmony_ci      case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
4370bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
4371bf215546Sopenharmony_ci         if (has_CB_meta)
4372bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
4373bf215546Sopenharmony_ci         break;
4374bf215546Sopenharmony_ci      case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
4375bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
4376bf215546Sopenharmony_ci         if (has_DB_meta)
4377bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
4378bf215546Sopenharmony_ci         break;
4379bf215546Sopenharmony_ci      case VK_ACCESS_2_TRANSFER_WRITE_BIT:
4380bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
4381bf215546Sopenharmony_ci
4382bf215546Sopenharmony_ci         if (!image_is_coherent)
4383bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4384bf215546Sopenharmony_ci         if (has_CB_meta)
4385bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
4386bf215546Sopenharmony_ci         if (has_DB_meta)
4387bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
4388bf215546Sopenharmony_ci         break;
4389bf215546Sopenharmony_ci      case VK_ACCESS_2_MEMORY_WRITE_BIT:
4390bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB;
4391bf215546Sopenharmony_ci
4392bf215546Sopenharmony_ci         if (!image_is_coherent)
4393bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4394bf215546Sopenharmony_ci         if (has_CB_meta)
4395bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
4396bf215546Sopenharmony_ci         if (has_DB_meta)
4397bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
4398bf215546Sopenharmony_ci         break;
4399bf215546Sopenharmony_ci      default:
4400bf215546Sopenharmony_ci         break;
4401bf215546Sopenharmony_ci      }
4402bf215546Sopenharmony_ci   }
4403bf215546Sopenharmony_ci   return flush_bits;
4404bf215546Sopenharmony_ci}
4405bf215546Sopenharmony_ci
4406bf215546Sopenharmony_cienum radv_cmd_flush_bits
4407bf215546Sopenharmony_ciradv_dst_access_flush(struct radv_cmd_buffer *cmd_buffer, VkAccessFlags2 dst_flags,
4408bf215546Sopenharmony_ci                      const struct radv_image *image)
4409bf215546Sopenharmony_ci{
4410bf215546Sopenharmony_ci   bool has_CB_meta = true, has_DB_meta = true;
4411bf215546Sopenharmony_ci   enum radv_cmd_flush_bits flush_bits = 0;
4412bf215546Sopenharmony_ci   bool flush_CB = true, flush_DB = true;
4413bf215546Sopenharmony_ci   bool image_is_coherent = image ? image->l2_coherent : false;
4414bf215546Sopenharmony_ci
4415bf215546Sopenharmony_ci   if (image) {
4416bf215546Sopenharmony_ci      if (!(image->vk.usage & VK_IMAGE_USAGE_STORAGE_BIT)) {
4417bf215546Sopenharmony_ci         flush_CB = false;
4418bf215546Sopenharmony_ci         flush_DB = false;
4419bf215546Sopenharmony_ci      }
4420bf215546Sopenharmony_ci
4421bf215546Sopenharmony_ci      if (!radv_image_has_CB_metadata(image))
4422bf215546Sopenharmony_ci         has_CB_meta = false;
4423bf215546Sopenharmony_ci      if (!radv_image_has_htile(image))
4424bf215546Sopenharmony_ci         has_DB_meta = false;
4425bf215546Sopenharmony_ci   }
4426bf215546Sopenharmony_ci
4427bf215546Sopenharmony_ci   /* All the L2 invalidations below are not the CB/DB. So if there are no incoherent images
4428bf215546Sopenharmony_ci    * in the L2 cache in CB/DB mode then they are already usable from all the other L2 clients. */
4429bf215546Sopenharmony_ci   image_is_coherent |=
4430bf215546Sopenharmony_ci      can_skip_buffer_l2_flushes(cmd_buffer->device) && !cmd_buffer->state.rb_noncoherent_dirty;
4431bf215546Sopenharmony_ci
4432bf215546Sopenharmony_ci   u_foreach_bit64(b, dst_flags)
4433bf215546Sopenharmony_ci   {
4434bf215546Sopenharmony_ci      switch ((VkAccessFlags2)(1 << b)) {
4435bf215546Sopenharmony_ci      case VK_ACCESS_2_INDIRECT_COMMAND_READ_BIT:
4436bf215546Sopenharmony_ci         /* SMEM loads are used to read compute dispatch size in shaders */
4437bf215546Sopenharmony_ci         if (!cmd_buffer->device->load_grid_size_from_user_sgpr)
4438bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
4439bf215546Sopenharmony_ci
4440bf215546Sopenharmony_ci         /* Ensure the DGC meta shader can read the commands. */
4441bf215546Sopenharmony_ci         if (cmd_buffer->device->uses_device_generated_commands) {
4442bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE;
4443bf215546Sopenharmony_ci
4444bf215546Sopenharmony_ci            if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
4445bf215546Sopenharmony_ci               flush_bits |= RADV_CMD_FLAG_INV_L2;
4446bf215546Sopenharmony_ci         }
4447bf215546Sopenharmony_ci
4448bf215546Sopenharmony_ci         break;
4449bf215546Sopenharmony_ci      case VK_ACCESS_2_INDEX_READ_BIT:
4450bf215546Sopenharmony_ci      case VK_ACCESS_2_TRANSFORM_FEEDBACK_COUNTER_WRITE_BIT_EXT:
4451bf215546Sopenharmony_ci         break;
4452bf215546Sopenharmony_ci      case VK_ACCESS_2_UNIFORM_READ_BIT:
4453bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
4454bf215546Sopenharmony_ci         break;
4455bf215546Sopenharmony_ci      case VK_ACCESS_2_VERTEX_ATTRIBUTE_READ_BIT:
4456bf215546Sopenharmony_ci      case VK_ACCESS_2_INPUT_ATTACHMENT_READ_BIT:
4457bf215546Sopenharmony_ci      case VK_ACCESS_2_TRANSFER_READ_BIT:
4458bf215546Sopenharmony_ci      case VK_ACCESS_2_TRANSFER_WRITE_BIT:
4459bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
4460bf215546Sopenharmony_ci
4461bf215546Sopenharmony_ci         if (has_CB_meta || has_DB_meta)
4462bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
4463bf215546Sopenharmony_ci         if (!image_is_coherent)
4464bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4465bf215546Sopenharmony_ci         break;
4466bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_BINDING_TABLE_READ_BIT_KHR:
4467bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_READ_BIT:
4468bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_STORAGE_READ_BIT:
4469bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
4470bf215546Sopenharmony_ci         /* Unlike LLVM, ACO uses SMEM for SSBOs and we have to
4471bf215546Sopenharmony_ci          * invalidate the scalar cache. */
4472bf215546Sopenharmony_ci         if (!cmd_buffer->device->physical_device->use_llvm && !image)
4473bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_SCACHE;
4474bf215546Sopenharmony_ci
4475bf215546Sopenharmony_ci         if (has_CB_meta || has_DB_meta)
4476bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2_METADATA;
4477bf215546Sopenharmony_ci         if (!image_is_coherent)
4478bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4479bf215546Sopenharmony_ci         break;
4480bf215546Sopenharmony_ci      case VK_ACCESS_2_ACCELERATION_STRUCTURE_READ_BIT_KHR:
4481bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_INV_VCACHE;
4482bf215546Sopenharmony_ci         if (cmd_buffer->device->physical_device->rad_info.gfx_level < GFX9)
4483bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4484bf215546Sopenharmony_ci         break;
4485bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_WRITE_BIT:
4486bf215546Sopenharmony_ci      case VK_ACCESS_2_SHADER_STORAGE_WRITE_BIT:
4487bf215546Sopenharmony_ci      case VK_ACCESS_2_ACCELERATION_STRUCTURE_WRITE_BIT_KHR:
4488bf215546Sopenharmony_ci         break;
4489bf215546Sopenharmony_ci      case VK_ACCESS_2_COLOR_ATTACHMENT_READ_BIT:
4490bf215546Sopenharmony_ci      case VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT:
4491bf215546Sopenharmony_ci         if (flush_CB)
4492bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
4493bf215546Sopenharmony_ci         if (has_CB_meta)
4494bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
4495bf215546Sopenharmony_ci         break;
4496bf215546Sopenharmony_ci      case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_READ_BIT:
4497bf215546Sopenharmony_ci      case VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT:
4498bf215546Sopenharmony_ci         if (flush_DB)
4499bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
4500bf215546Sopenharmony_ci         if (has_DB_meta)
4501bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
4502bf215546Sopenharmony_ci         break;
4503bf215546Sopenharmony_ci      case VK_ACCESS_2_MEMORY_READ_BIT:
4504bf215546Sopenharmony_ci      case VK_ACCESS_2_MEMORY_WRITE_BIT:
4505bf215546Sopenharmony_ci         flush_bits |= RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_SCACHE;
4506bf215546Sopenharmony_ci         if (!image_is_coherent)
4507bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_INV_L2;
4508bf215546Sopenharmony_ci         if (flush_CB)
4509bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB;
4510bf215546Sopenharmony_ci         if (has_CB_meta)
4511bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
4512bf215546Sopenharmony_ci         if (flush_DB)
4513bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;
4514bf215546Sopenharmony_ci         if (has_DB_meta)
4515bf215546Sopenharmony_ci            flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
4516bf215546Sopenharmony_ci         break;
4517bf215546Sopenharmony_ci      default:
4518bf215546Sopenharmony_ci         break;
4519bf215546Sopenharmony_ci      }
4520bf215546Sopenharmony_ci   }
4521bf215546Sopenharmony_ci   return flush_bits;
4522bf215546Sopenharmony_ci}
4523bf215546Sopenharmony_ci
4524bf215546Sopenharmony_civoid
4525bf215546Sopenharmony_ciradv_emit_subpass_barrier(struct radv_cmd_buffer *cmd_buffer,
4526bf215546Sopenharmony_ci                          const struct radv_subpass_barrier *barrier)
4527bf215546Sopenharmony_ci{
4528bf215546Sopenharmony_ci   struct radv_render_pass *pass = cmd_buffer->state.pass;
4529bf215546Sopenharmony_ci
4530bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pass->attachment_count; i++) {
4531bf215546Sopenharmony_ci      struct radv_image_view *iview = cmd_buffer->state.attachments[i].iview;
4532bf215546Sopenharmony_ci
4533bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
4534bf215546Sopenharmony_ci         radv_src_access_flush(cmd_buffer, barrier->src_access_mask, iview->image);
4535bf215546Sopenharmony_ci   }
4536bf215546Sopenharmony_ci
4537bf215546Sopenharmony_ci   radv_stage_flush(cmd_buffer, barrier->src_stage_mask);
4538bf215546Sopenharmony_ci
4539bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pass->attachment_count; i++) {
4540bf215546Sopenharmony_ci      struct radv_image_view *iview = cmd_buffer->state.attachments[i].iview;
4541bf215546Sopenharmony_ci
4542bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
4543bf215546Sopenharmony_ci         radv_dst_access_flush(cmd_buffer, barrier->dst_access_mask, iview->image);
4544bf215546Sopenharmony_ci   }
4545bf215546Sopenharmony_ci
4546bf215546Sopenharmony_ci   radv_ace_internal_barrier(cmd_buffer, barrier->src_stage_mask, barrier->dst_stage_mask);
4547bf215546Sopenharmony_ci}
4548bf215546Sopenharmony_ci
4549bf215546Sopenharmony_ciuint32_t
4550bf215546Sopenharmony_ciradv_get_subpass_id(struct radv_cmd_buffer *cmd_buffer)
4551bf215546Sopenharmony_ci{
4552bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
4553bf215546Sopenharmony_ci   uint32_t subpass_id = state->subpass - state->pass->subpasses;
4554bf215546Sopenharmony_ci
4555bf215546Sopenharmony_ci   /* The id of this subpass shouldn't exceed the number of subpasses in
4556bf215546Sopenharmony_ci    * this render pass minus 1.
4557bf215546Sopenharmony_ci    */
4558bf215546Sopenharmony_ci   assert(subpass_id < state->pass->subpass_count);
4559bf215546Sopenharmony_ci   return subpass_id;
4560bf215546Sopenharmony_ci}
4561bf215546Sopenharmony_ci
4562bf215546Sopenharmony_cistatic struct radv_sample_locations_state *
4563bf215546Sopenharmony_ciradv_get_attachment_sample_locations(struct radv_cmd_buffer *cmd_buffer, uint32_t att_idx,
4564bf215546Sopenharmony_ci                                     bool begin_subpass)
4565bf215546Sopenharmony_ci{
4566bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
4567bf215546Sopenharmony_ci   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
4568bf215546Sopenharmony_ci   struct radv_image_view *view = state->attachments[att_idx].iview;
4569bf215546Sopenharmony_ci
4570bf215546Sopenharmony_ci   if (view->image->info.samples == 1)
4571bf215546Sopenharmony_ci      return NULL;
4572bf215546Sopenharmony_ci
4573bf215546Sopenharmony_ci   if (state->pass->attachments[att_idx].first_subpass_idx == subpass_id) {
4574bf215546Sopenharmony_ci      /* Return the initial sample locations if this is the initial
4575bf215546Sopenharmony_ci       * layout transition of the given subpass attachemnt.
4576bf215546Sopenharmony_ci       */
4577bf215546Sopenharmony_ci      if (state->attachments[att_idx].sample_location.count > 0)
4578bf215546Sopenharmony_ci         return &state->attachments[att_idx].sample_location;
4579bf215546Sopenharmony_ci   } else {
4580bf215546Sopenharmony_ci      /* Otherwise return the subpass sample locations if defined. */
4581bf215546Sopenharmony_ci      if (state->subpass_sample_locs) {
4582bf215546Sopenharmony_ci         /* Because the driver sets the current subpass before
4583bf215546Sopenharmony_ci          * initial layout transitions, we should use the sample
4584bf215546Sopenharmony_ci          * locations from the previous subpass to avoid an
4585bf215546Sopenharmony_ci          * off-by-one problem. Otherwise, use the sample
4586bf215546Sopenharmony_ci          * locations for the current subpass for final layout
4587bf215546Sopenharmony_ci          * transitions.
4588bf215546Sopenharmony_ci          */
4589bf215546Sopenharmony_ci         if (begin_subpass)
4590bf215546Sopenharmony_ci            subpass_id--;
4591bf215546Sopenharmony_ci
4592bf215546Sopenharmony_ci         for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {
4593bf215546Sopenharmony_ci            if (state->subpass_sample_locs[i].subpass_idx == subpass_id)
4594bf215546Sopenharmony_ci               return &state->subpass_sample_locs[i].sample_location;
4595bf215546Sopenharmony_ci         }
4596bf215546Sopenharmony_ci      }
4597bf215546Sopenharmony_ci   }
4598bf215546Sopenharmony_ci
4599bf215546Sopenharmony_ci   return NULL;
4600bf215546Sopenharmony_ci}
4601bf215546Sopenharmony_ci
4602bf215546Sopenharmony_cistatic void
4603bf215546Sopenharmony_ciradv_handle_subpass_image_transition(struct radv_cmd_buffer *cmd_buffer,
4604bf215546Sopenharmony_ci                                     struct radv_subpass_attachment att, bool begin_subpass)
4605bf215546Sopenharmony_ci{
4606bf215546Sopenharmony_ci   unsigned idx = att.attachment;
4607bf215546Sopenharmony_ci   struct radv_image_view *view = cmd_buffer->state.attachments[idx].iview;
4608bf215546Sopenharmony_ci   struct radv_sample_locations_state *sample_locs;
4609bf215546Sopenharmony_ci   VkImageSubresourceRange range;
4610bf215546Sopenharmony_ci   range.aspectMask = view->vk.aspects;
4611bf215546Sopenharmony_ci   range.baseMipLevel = view->vk.base_mip_level;
4612bf215546Sopenharmony_ci   range.levelCount = 1;
4613bf215546Sopenharmony_ci   range.baseArrayLayer = view->vk.base_array_layer;
4614bf215546Sopenharmony_ci   range.layerCount = cmd_buffer->state.framebuffer->layers;
4615bf215546Sopenharmony_ci
4616bf215546Sopenharmony_ci   if (cmd_buffer->state.subpass->view_mask) {
4617bf215546Sopenharmony_ci      /* If the current subpass uses multiview, the driver might have
4618bf215546Sopenharmony_ci       * performed a fast color/depth clear to the whole image
4619bf215546Sopenharmony_ci       * (including all layers). To make sure the driver will
4620bf215546Sopenharmony_ci       * decompress the image correctly (if needed), we have to
4621bf215546Sopenharmony_ci       * account for the "real" number of layers. If the view mask is
4622bf215546Sopenharmony_ci       * sparse, this will decompress more layers than needed.
4623bf215546Sopenharmony_ci       */
4624bf215546Sopenharmony_ci      range.layerCount = util_last_bit(cmd_buffer->state.subpass->view_mask);
4625bf215546Sopenharmony_ci   }
4626bf215546Sopenharmony_ci
4627bf215546Sopenharmony_ci   /* Get the subpass sample locations for the given attachment, if NULL
4628bf215546Sopenharmony_ci    * is returned the driver will use the default HW locations.
4629bf215546Sopenharmony_ci    */
4630bf215546Sopenharmony_ci   sample_locs = radv_get_attachment_sample_locations(cmd_buffer, idx, begin_subpass);
4631bf215546Sopenharmony_ci
4632bf215546Sopenharmony_ci   /* Determine if the subpass uses separate depth/stencil layouts. */
4633bf215546Sopenharmony_ci   bool uses_separate_depth_stencil_layouts = false;
4634bf215546Sopenharmony_ci   if ((cmd_buffer->state.attachments[idx].current_layout !=
4635bf215546Sopenharmony_ci        cmd_buffer->state.attachments[idx].current_stencil_layout) ||
4636bf215546Sopenharmony_ci       (att.layout != att.stencil_layout)) {
4637bf215546Sopenharmony_ci      uses_separate_depth_stencil_layouts = true;
4638bf215546Sopenharmony_ci   }
4639bf215546Sopenharmony_ci
4640bf215546Sopenharmony_ci   /* For separate layouts, perform depth and stencil transitions
4641bf215546Sopenharmony_ci    * separately.
4642bf215546Sopenharmony_ci    */
4643bf215546Sopenharmony_ci   if (uses_separate_depth_stencil_layouts &&
4644bf215546Sopenharmony_ci       (range.aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
4645bf215546Sopenharmony_ci      /* Depth-only transitions. */
4646bf215546Sopenharmony_ci      range.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
4647bf215546Sopenharmony_ci      radv_handle_image_transition(cmd_buffer, view->image,
4648bf215546Sopenharmony_ci                                   cmd_buffer->state.attachments[idx].current_layout,
4649bf215546Sopenharmony_ci                                   cmd_buffer->state.attachments[idx].current_in_render_loop,
4650bf215546Sopenharmony_ci                                   att.layout, att.in_render_loop, 0, 0, &range, sample_locs);
4651bf215546Sopenharmony_ci
4652bf215546Sopenharmony_ci      /* Stencil-only transitions. */
4653bf215546Sopenharmony_ci      range.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT;
4654bf215546Sopenharmony_ci      radv_handle_image_transition(
4655bf215546Sopenharmony_ci         cmd_buffer, view->image, cmd_buffer->state.attachments[idx].current_stencil_layout,
4656bf215546Sopenharmony_ci         cmd_buffer->state.attachments[idx].current_in_render_loop, att.stencil_layout,
4657bf215546Sopenharmony_ci         att.in_render_loop, 0, 0, &range, sample_locs);
4658bf215546Sopenharmony_ci   } else {
4659bf215546Sopenharmony_ci      radv_handle_image_transition(cmd_buffer, view->image,
4660bf215546Sopenharmony_ci                                   cmd_buffer->state.attachments[idx].current_layout,
4661bf215546Sopenharmony_ci                                   cmd_buffer->state.attachments[idx].current_in_render_loop,
4662bf215546Sopenharmony_ci                                   att.layout, att.in_render_loop, 0, 0, &range, sample_locs);
4663bf215546Sopenharmony_ci   }
4664bf215546Sopenharmony_ci
4665bf215546Sopenharmony_ci   cmd_buffer->state.attachments[idx].current_layout = att.layout;
4666bf215546Sopenharmony_ci   cmd_buffer->state.attachments[idx].current_stencil_layout = att.stencil_layout;
4667bf215546Sopenharmony_ci   cmd_buffer->state.attachments[idx].current_in_render_loop = att.in_render_loop;
4668bf215546Sopenharmony_ci}
4669bf215546Sopenharmony_ci
4670bf215546Sopenharmony_civoid
4671bf215546Sopenharmony_ciradv_cmd_buffer_set_subpass(struct radv_cmd_buffer *cmd_buffer, const struct radv_subpass *subpass)
4672bf215546Sopenharmony_ci{
4673bf215546Sopenharmony_ci   cmd_buffer->state.subpass = subpass;
4674bf215546Sopenharmony_ci
4675bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
4676bf215546Sopenharmony_ci}
4677bf215546Sopenharmony_ci
4678bf215546Sopenharmony_cistatic VkResult
4679bf215546Sopenharmony_ciradv_cmd_state_setup_sample_locations(struct radv_cmd_buffer *cmd_buffer,
4680bf215546Sopenharmony_ci                                      struct radv_render_pass *pass,
4681bf215546Sopenharmony_ci                                      const VkRenderPassBeginInfo *info)
4682bf215546Sopenharmony_ci{
4683bf215546Sopenharmony_ci   const struct VkRenderPassSampleLocationsBeginInfoEXT *sample_locs =
4684bf215546Sopenharmony_ci      vk_find_struct_const(info->pNext, RENDER_PASS_SAMPLE_LOCATIONS_BEGIN_INFO_EXT);
4685bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
4686bf215546Sopenharmony_ci
4687bf215546Sopenharmony_ci   if (!sample_locs) {
4688bf215546Sopenharmony_ci      state->subpass_sample_locs = NULL;
4689bf215546Sopenharmony_ci      return VK_SUCCESS;
4690bf215546Sopenharmony_ci   }
4691bf215546Sopenharmony_ci
4692bf215546Sopenharmony_ci   for (uint32_t i = 0; i < sample_locs->attachmentInitialSampleLocationsCount; i++) {
4693bf215546Sopenharmony_ci      const VkAttachmentSampleLocationsEXT *att_sample_locs =
4694bf215546Sopenharmony_ci         &sample_locs->pAttachmentInitialSampleLocations[i];
4695bf215546Sopenharmony_ci      uint32_t att_idx = att_sample_locs->attachmentIndex;
4696bf215546Sopenharmony_ci      struct radv_image *image = cmd_buffer->state.attachments[att_idx].iview->image;
4697bf215546Sopenharmony_ci
4698bf215546Sopenharmony_ci      assert(vk_format_is_depth_or_stencil(image->vk.format));
4699bf215546Sopenharmony_ci
4700bf215546Sopenharmony_ci      /* From the Vulkan spec 1.1.108:
4701bf215546Sopenharmony_ci       *
4702bf215546Sopenharmony_ci       * "If the image referenced by the framebuffer attachment at
4703bf215546Sopenharmony_ci       *  index attachmentIndex was not created with
4704bf215546Sopenharmony_ci       *  VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT
4705bf215546Sopenharmony_ci       *  then the values specified in sampleLocationsInfo are
4706bf215546Sopenharmony_ci       *  ignored."
4707bf215546Sopenharmony_ci       */
4708bf215546Sopenharmony_ci      if (!(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT))
4709bf215546Sopenharmony_ci         continue;
4710bf215546Sopenharmony_ci
4711bf215546Sopenharmony_ci      const VkSampleLocationsInfoEXT *sample_locs_info = &att_sample_locs->sampleLocationsInfo;
4712bf215546Sopenharmony_ci
4713bf215546Sopenharmony_ci      state->attachments[att_idx].sample_location.per_pixel =
4714bf215546Sopenharmony_ci         sample_locs_info->sampleLocationsPerPixel;
4715bf215546Sopenharmony_ci      state->attachments[att_idx].sample_location.grid_size =
4716bf215546Sopenharmony_ci         sample_locs_info->sampleLocationGridSize;
4717bf215546Sopenharmony_ci      state->attachments[att_idx].sample_location.count = sample_locs_info->sampleLocationsCount;
4718bf215546Sopenharmony_ci      typed_memcpy(&state->attachments[att_idx].sample_location.locations[0],
4719bf215546Sopenharmony_ci                   sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount);
4720bf215546Sopenharmony_ci   }
4721bf215546Sopenharmony_ci
4722bf215546Sopenharmony_ci   state->subpass_sample_locs =
4723bf215546Sopenharmony_ci      vk_alloc(&cmd_buffer->pool->vk.alloc,
4724bf215546Sopenharmony_ci               sample_locs->postSubpassSampleLocationsCount * sizeof(state->subpass_sample_locs[0]),
4725bf215546Sopenharmony_ci               8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4726bf215546Sopenharmony_ci   if (state->subpass_sample_locs == NULL) {
4727bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
4728bf215546Sopenharmony_ci      return cmd_buffer->record_result;
4729bf215546Sopenharmony_ci   }
4730bf215546Sopenharmony_ci
4731bf215546Sopenharmony_ci   state->num_subpass_sample_locs = sample_locs->postSubpassSampleLocationsCount;
4732bf215546Sopenharmony_ci
4733bf215546Sopenharmony_ci   for (uint32_t i = 0; i < sample_locs->postSubpassSampleLocationsCount; i++) {
4734bf215546Sopenharmony_ci      const VkSubpassSampleLocationsEXT *subpass_sample_locs_info =
4735bf215546Sopenharmony_ci         &sample_locs->pPostSubpassSampleLocations[i];
4736bf215546Sopenharmony_ci      const VkSampleLocationsInfoEXT *sample_locs_info =
4737bf215546Sopenharmony_ci         &subpass_sample_locs_info->sampleLocationsInfo;
4738bf215546Sopenharmony_ci
4739bf215546Sopenharmony_ci      state->subpass_sample_locs[i].subpass_idx = subpass_sample_locs_info->subpassIndex;
4740bf215546Sopenharmony_ci      state->subpass_sample_locs[i].sample_location.per_pixel =
4741bf215546Sopenharmony_ci         sample_locs_info->sampleLocationsPerPixel;
4742bf215546Sopenharmony_ci      state->subpass_sample_locs[i].sample_location.grid_size =
4743bf215546Sopenharmony_ci         sample_locs_info->sampleLocationGridSize;
4744bf215546Sopenharmony_ci      state->subpass_sample_locs[i].sample_location.count = sample_locs_info->sampleLocationsCount;
4745bf215546Sopenharmony_ci      typed_memcpy(&state->subpass_sample_locs[i].sample_location.locations[0],
4746bf215546Sopenharmony_ci                   sample_locs_info->pSampleLocations, sample_locs_info->sampleLocationsCount);
4747bf215546Sopenharmony_ci   }
4748bf215546Sopenharmony_ci
4749bf215546Sopenharmony_ci   return VK_SUCCESS;
4750bf215546Sopenharmony_ci}
4751bf215546Sopenharmony_ci
4752bf215546Sopenharmony_cistatic VkResult
4753bf215546Sopenharmony_ciradv_cmd_state_setup_attachments(struct radv_cmd_buffer *cmd_buffer, struct radv_render_pass *pass,
4754bf215546Sopenharmony_ci                                 const VkRenderPassBeginInfo *info)
4755bf215546Sopenharmony_ci{
4756bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
4757bf215546Sopenharmony_ci   const struct VkRenderPassAttachmentBeginInfo *attachment_info = NULL;
4758bf215546Sopenharmony_ci
4759bf215546Sopenharmony_ci   if (info) {
4760bf215546Sopenharmony_ci      attachment_info = vk_find_struct_const(info->pNext, RENDER_PASS_ATTACHMENT_BEGIN_INFO);
4761bf215546Sopenharmony_ci   }
4762bf215546Sopenharmony_ci
4763bf215546Sopenharmony_ci   if (pass->attachment_count == 0) {
4764bf215546Sopenharmony_ci      state->attachments = NULL;
4765bf215546Sopenharmony_ci      return VK_SUCCESS;
4766bf215546Sopenharmony_ci   }
4767bf215546Sopenharmony_ci
4768bf215546Sopenharmony_ci   state->attachments =
4769bf215546Sopenharmony_ci      vk_alloc(&cmd_buffer->pool->vk.alloc, pass->attachment_count * sizeof(state->attachments[0]),
4770bf215546Sopenharmony_ci               8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
4771bf215546Sopenharmony_ci   if (state->attachments == NULL) {
4772bf215546Sopenharmony_ci      cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
4773bf215546Sopenharmony_ci      return cmd_buffer->record_result;
4774bf215546Sopenharmony_ci   }
4775bf215546Sopenharmony_ci
4776bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pass->attachment_count; ++i) {
4777bf215546Sopenharmony_ci      struct radv_render_pass_attachment *att = &pass->attachments[i];
4778bf215546Sopenharmony_ci      VkImageAspectFlags att_aspects = vk_format_aspects(att->format);
4779bf215546Sopenharmony_ci      VkImageAspectFlags clear_aspects = 0;
4780bf215546Sopenharmony_ci
4781bf215546Sopenharmony_ci      if (att_aspects == VK_IMAGE_ASPECT_COLOR_BIT) {
4782bf215546Sopenharmony_ci         /* color attachment */
4783bf215546Sopenharmony_ci         if (att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
4784bf215546Sopenharmony_ci            clear_aspects |= VK_IMAGE_ASPECT_COLOR_BIT;
4785bf215546Sopenharmony_ci         }
4786bf215546Sopenharmony_ci      } else {
4787bf215546Sopenharmony_ci         /* depthstencil attachment */
4788bf215546Sopenharmony_ci         if ((att_aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&
4789bf215546Sopenharmony_ci             att->load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
4790bf215546Sopenharmony_ci            clear_aspects |= VK_IMAGE_ASPECT_DEPTH_BIT;
4791bf215546Sopenharmony_ci            if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
4792bf215546Sopenharmony_ci                att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_DONT_CARE)
4793bf215546Sopenharmony_ci               clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
4794bf215546Sopenharmony_ci         }
4795bf215546Sopenharmony_ci         if ((att_aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&
4796bf215546Sopenharmony_ci             att->stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) {
4797bf215546Sopenharmony_ci            clear_aspects |= VK_IMAGE_ASPECT_STENCIL_BIT;
4798bf215546Sopenharmony_ci         }
4799bf215546Sopenharmony_ci      }
4800bf215546Sopenharmony_ci
4801bf215546Sopenharmony_ci      state->attachments[i].pending_clear_aspects = clear_aspects;
4802bf215546Sopenharmony_ci      state->attachments[i].cleared_views = 0;
4803bf215546Sopenharmony_ci      if (clear_aspects && info) {
4804bf215546Sopenharmony_ci         assert(info->clearValueCount > i);
4805bf215546Sopenharmony_ci         state->attachments[i].clear_value = info->pClearValues[i];
4806bf215546Sopenharmony_ci      }
4807bf215546Sopenharmony_ci
4808bf215546Sopenharmony_ci      state->attachments[i].current_layout = att->initial_layout;
4809bf215546Sopenharmony_ci      state->attachments[i].current_in_render_loop = false;
4810bf215546Sopenharmony_ci      state->attachments[i].current_stencil_layout = att->stencil_initial_layout;
4811bf215546Sopenharmony_ci      state->attachments[i].sample_location.count = 0;
4812bf215546Sopenharmony_ci
4813bf215546Sopenharmony_ci      struct radv_image_view *iview;
4814bf215546Sopenharmony_ci      if (attachment_info && attachment_info->attachmentCount > i) {
4815bf215546Sopenharmony_ci         iview = radv_image_view_from_handle(attachment_info->pAttachments[i]);
4816bf215546Sopenharmony_ci      } else {
4817bf215546Sopenharmony_ci         iview = radv_image_view_from_handle(state->framebuffer->attachments[i]);
4818bf215546Sopenharmony_ci      }
4819bf215546Sopenharmony_ci
4820bf215546Sopenharmony_ci      state->attachments[i].iview = iview;
4821bf215546Sopenharmony_ci      if (iview->vk.aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
4822bf215546Sopenharmony_ci         radv_initialise_ds_surface(cmd_buffer->device, &state->attachments[i].ds, iview);
4823bf215546Sopenharmony_ci      } else {
4824bf215546Sopenharmony_ci         radv_initialise_color_surface(cmd_buffer->device, &state->attachments[i].cb, iview);
4825bf215546Sopenharmony_ci      }
4826bf215546Sopenharmony_ci   }
4827bf215546Sopenharmony_ci
4828bf215546Sopenharmony_ci   return VK_SUCCESS;
4829bf215546Sopenharmony_ci}
4830bf215546Sopenharmony_ci
4831bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
4832bf215546Sopenharmony_ciradv_AllocateCommandBuffers(VkDevice _device, const VkCommandBufferAllocateInfo *pAllocateInfo,
4833bf215546Sopenharmony_ci                            VkCommandBuffer *pCommandBuffers)
4834bf215546Sopenharmony_ci{
4835bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
4836bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_pool, pool, pAllocateInfo->commandPool);
4837bf215546Sopenharmony_ci
4838bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
4839bf215546Sopenharmony_ci   uint32_t i;
4840bf215546Sopenharmony_ci
4841bf215546Sopenharmony_ci   for (i = 0; i < pAllocateInfo->commandBufferCount; i++) {
4842bf215546Sopenharmony_ci
4843bf215546Sopenharmony_ci      if (!list_is_empty(&pool->free_cmd_buffers)) {
4844bf215546Sopenharmony_ci         struct radv_cmd_buffer *cmd_buffer =
4845bf215546Sopenharmony_ci            list_first_entry(&pool->free_cmd_buffers, struct radv_cmd_buffer, pool_link);
4846bf215546Sopenharmony_ci
4847bf215546Sopenharmony_ci         list_del(&cmd_buffer->pool_link);
4848bf215546Sopenharmony_ci         list_addtail(&cmd_buffer->pool_link, &pool->cmd_buffers);
4849bf215546Sopenharmony_ci
4850bf215546Sopenharmony_ci         result = radv_reset_cmd_buffer(cmd_buffer);
4851bf215546Sopenharmony_ci         vk_command_buffer_finish(&cmd_buffer->vk);
4852bf215546Sopenharmony_ci         VkResult init_result =
4853bf215546Sopenharmony_ci            vk_command_buffer_init(&cmd_buffer->vk, &pool->vk, pAllocateInfo->level);
4854bf215546Sopenharmony_ci         if (init_result != VK_SUCCESS)
4855bf215546Sopenharmony_ci            result = init_result;
4856bf215546Sopenharmony_ci
4857bf215546Sopenharmony_ci         pCommandBuffers[i] = radv_cmd_buffer_to_handle(cmd_buffer);
4858bf215546Sopenharmony_ci      } else {
4859bf215546Sopenharmony_ci         result = radv_create_cmd_buffer(device, pool, pAllocateInfo->level, &pCommandBuffers[i]);
4860bf215546Sopenharmony_ci      }
4861bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
4862bf215546Sopenharmony_ci         break;
4863bf215546Sopenharmony_ci   }
4864bf215546Sopenharmony_ci
4865bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
4866bf215546Sopenharmony_ci      radv_FreeCommandBuffers(_device, pAllocateInfo->commandPool, i, pCommandBuffers);
4867bf215546Sopenharmony_ci
4868bf215546Sopenharmony_ci      /* From the Vulkan 1.0.66 spec:
4869bf215546Sopenharmony_ci       *
4870bf215546Sopenharmony_ci       * "vkAllocateCommandBuffers can be used to create multiple
4871bf215546Sopenharmony_ci       *  command buffers. If the creation of any of those command
4872bf215546Sopenharmony_ci       *  buffers fails, the implementation must destroy all
4873bf215546Sopenharmony_ci       *  successfully created command buffer objects from this
4874bf215546Sopenharmony_ci       *  command, set all entries of the pCommandBuffers array to
4875bf215546Sopenharmony_ci       *  NULL and return the error."
4876bf215546Sopenharmony_ci       */
4877bf215546Sopenharmony_ci      memset(pCommandBuffers, 0, sizeof(*pCommandBuffers) * pAllocateInfo->commandBufferCount);
4878bf215546Sopenharmony_ci   }
4879bf215546Sopenharmony_ci
4880bf215546Sopenharmony_ci   return result;
4881bf215546Sopenharmony_ci}
4882bf215546Sopenharmony_ci
4883bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
4884bf215546Sopenharmony_ciradv_FreeCommandBuffers(VkDevice device, VkCommandPool commandPool, uint32_t commandBufferCount,
4885bf215546Sopenharmony_ci                        const VkCommandBuffer *pCommandBuffers)
4886bf215546Sopenharmony_ci{
4887bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
4888bf215546Sopenharmony_ci
4889bf215546Sopenharmony_ci   for (uint32_t i = 0; i < commandBufferCount; i++) {
4890bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, pCommandBuffers[i]);
4891bf215546Sopenharmony_ci
4892bf215546Sopenharmony_ci      if (!cmd_buffer)
4893bf215546Sopenharmony_ci         continue;
4894bf215546Sopenharmony_ci      assert(cmd_buffer->pool == pool);
4895bf215546Sopenharmony_ci
4896bf215546Sopenharmony_ci      list_del(&cmd_buffer->pool_link);
4897bf215546Sopenharmony_ci      list_addtail(&cmd_buffer->pool_link, &pool->free_cmd_buffers);
4898bf215546Sopenharmony_ci   }
4899bf215546Sopenharmony_ci}
4900bf215546Sopenharmony_ci
4901bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
4902bf215546Sopenharmony_ciradv_ResetCommandBuffer(VkCommandBuffer commandBuffer, VkCommandBufferResetFlags flags)
4903bf215546Sopenharmony_ci{
4904bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
4905bf215546Sopenharmony_ci   return radv_reset_cmd_buffer(cmd_buffer);
4906bf215546Sopenharmony_ci}
4907bf215546Sopenharmony_ci
4908bf215546Sopenharmony_cistatic void
4909bf215546Sopenharmony_ciradv_inherit_dynamic_rendering(struct radv_cmd_buffer *cmd_buffer,
4910bf215546Sopenharmony_ci                               const VkCommandBufferInheritanceInfo *inherit_info,
4911bf215546Sopenharmony_ci                               const VkCommandBufferInheritanceRenderingInfo *dyn_info)
4912bf215546Sopenharmony_ci{
4913bf215546Sopenharmony_ci   const VkAttachmentSampleCountInfoAMD *sample_info =
4914bf215546Sopenharmony_ci      vk_find_struct_const(inherit_info->pNext, ATTACHMENT_SAMPLE_COUNT_INFO_AMD);
4915bf215546Sopenharmony_ci   VkResult result;
4916bf215546Sopenharmony_ci   /* (normal + resolve) for color attachments and ds and a VRS attachment */
4917bf215546Sopenharmony_ci   VkAttachmentDescription2 att_desc[MAX_RTS * 2 + 3];
4918bf215546Sopenharmony_ci   VkAttachmentReference2 color_refs[MAX_RTS], ds_ref;
4919bf215546Sopenharmony_ci   unsigned att_count = 0;
4920bf215546Sopenharmony_ci
4921bf215546Sopenharmony_ci   VkSubpassDescription2 subpass = {
4922bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
4923bf215546Sopenharmony_ci      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
4924bf215546Sopenharmony_ci      .viewMask = dyn_info->viewMask,
4925bf215546Sopenharmony_ci      .colorAttachmentCount = dyn_info->colorAttachmentCount,
4926bf215546Sopenharmony_ci      .pColorAttachments = color_refs,
4927bf215546Sopenharmony_ci   };
4928bf215546Sopenharmony_ci
4929bf215546Sopenharmony_ci   for (unsigned i = 0; i < dyn_info->colorAttachmentCount; ++i) {
4930bf215546Sopenharmony_ci      if (dyn_info->pColorAttachmentFormats[i] == VK_FORMAT_UNDEFINED) {
4931bf215546Sopenharmony_ci         color_refs[i] = (VkAttachmentReference2){
4932bf215546Sopenharmony_ci            .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
4933bf215546Sopenharmony_ci            .attachment = VK_ATTACHMENT_UNUSED,
4934bf215546Sopenharmony_ci         };
4935bf215546Sopenharmony_ci         continue;
4936bf215546Sopenharmony_ci      }
4937bf215546Sopenharmony_ci
4938bf215546Sopenharmony_ci      color_refs[i] = (VkAttachmentReference2){
4939bf215546Sopenharmony_ci         .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
4940bf215546Sopenharmony_ci         .attachment = att_count,
4941bf215546Sopenharmony_ci         .layout = VK_IMAGE_LAYOUT_GENERAL, /* Shouldn't be used */
4942bf215546Sopenharmony_ci         .aspectMask = 0,                   /* Shouldn't be used */
4943bf215546Sopenharmony_ci      };
4944bf215546Sopenharmony_ci
4945bf215546Sopenharmony_ci      VkAttachmentDescription2 *att = att_desc + att_count++;
4946bf215546Sopenharmony_ci      memset(att, 0, sizeof(*att));
4947bf215546Sopenharmony_ci      att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
4948bf215546Sopenharmony_ci      att->format = dyn_info->pColorAttachmentFormats[i];
4949bf215546Sopenharmony_ci      att->samples =
4950bf215546Sopenharmony_ci         sample_info ? sample_info->pColorAttachmentSamples[i] : dyn_info->rasterizationSamples;
4951bf215546Sopenharmony_ci      att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
4952bf215546Sopenharmony_ci      att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
4953bf215546Sopenharmony_ci      att->initialLayout = VK_IMAGE_LAYOUT_GENERAL;
4954bf215546Sopenharmony_ci      att->finalLayout = VK_IMAGE_LAYOUT_GENERAL;
4955bf215546Sopenharmony_ci   }
4956bf215546Sopenharmony_ci
4957bf215546Sopenharmony_ci   if (dyn_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED ||
4958bf215546Sopenharmony_ci       dyn_info->stencilAttachmentFormat != VK_FORMAT_UNDEFINED) {
4959bf215546Sopenharmony_ci      VkFormat fmt = dyn_info->depthAttachmentFormat != VK_FORMAT_UNDEFINED
4960bf215546Sopenharmony_ci                        ? dyn_info->depthAttachmentFormat
4961bf215546Sopenharmony_ci                        : dyn_info->stencilAttachmentFormat;
4962bf215546Sopenharmony_ci
4963bf215546Sopenharmony_ci      ds_ref = (VkAttachmentReference2){
4964bf215546Sopenharmony_ci         .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
4965bf215546Sopenharmony_ci         .attachment = att_count,
4966bf215546Sopenharmony_ci         .layout = VK_IMAGE_LAYOUT_GENERAL, /* Shouldn't be used */
4967bf215546Sopenharmony_ci         .aspectMask = 0,                   /* Shouldn't be used */
4968bf215546Sopenharmony_ci      };
4969bf215546Sopenharmony_ci      subpass.pDepthStencilAttachment = &ds_ref;
4970bf215546Sopenharmony_ci
4971bf215546Sopenharmony_ci      VkAttachmentDescription2 *att = att_desc + att_count++;
4972bf215546Sopenharmony_ci
4973bf215546Sopenharmony_ci      memset(att, 0, sizeof(*att));
4974bf215546Sopenharmony_ci      att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
4975bf215546Sopenharmony_ci      att->format = fmt;
4976bf215546Sopenharmony_ci      att->samples =
4977bf215546Sopenharmony_ci         sample_info ? sample_info->depthStencilAttachmentSamples : dyn_info->rasterizationSamples;
4978bf215546Sopenharmony_ci      att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
4979bf215546Sopenharmony_ci      att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
4980bf215546Sopenharmony_ci      att->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
4981bf215546Sopenharmony_ci      att->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
4982bf215546Sopenharmony_ci   }
4983bf215546Sopenharmony_ci
4984bf215546Sopenharmony_ci   VkRenderPassCreateInfo2 rp_create_info = {
4985bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
4986bf215546Sopenharmony_ci      .attachmentCount = att_count,
4987bf215546Sopenharmony_ci      .pAttachments = att_desc,
4988bf215546Sopenharmony_ci      .subpassCount = 1,
4989bf215546Sopenharmony_ci      .pSubpasses = &subpass,
4990bf215546Sopenharmony_ci   };
4991bf215546Sopenharmony_ci
4992bf215546Sopenharmony_ci   VkRenderPass rp;
4993bf215546Sopenharmony_ci   result =
4994bf215546Sopenharmony_ci      radv_CreateRenderPass2(radv_device_to_handle(cmd_buffer->device), &rp_create_info, NULL, &rp);
4995bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
4996bf215546Sopenharmony_ci      cmd_buffer->record_result = result;
4997bf215546Sopenharmony_ci      return;
4998bf215546Sopenharmony_ci   }
4999bf215546Sopenharmony_ci
5000bf215546Sopenharmony_ci   cmd_buffer->state.pass = radv_render_pass_from_handle(rp);
5001bf215546Sopenharmony_ci   cmd_buffer->state.own_render_pass = true;
5002bf215546Sopenharmony_ci}
5003bf215546Sopenharmony_ci
5004bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
5005bf215546Sopenharmony_ciradv_BeginCommandBuffer(VkCommandBuffer commandBuffer, const VkCommandBufferBeginInfo *pBeginInfo)
5006bf215546Sopenharmony_ci{
5007bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5008bf215546Sopenharmony_ci   VkResult result = VK_SUCCESS;
5009bf215546Sopenharmony_ci
5010bf215546Sopenharmony_ci   if (cmd_buffer->status != RADV_CMD_BUFFER_STATUS_INITIAL) {
5011bf215546Sopenharmony_ci      /* If the command buffer has already been resetted with
5012bf215546Sopenharmony_ci       * vkResetCommandBuffer, no need to do it again.
5013bf215546Sopenharmony_ci       */
5014bf215546Sopenharmony_ci      result = radv_reset_cmd_buffer(cmd_buffer);
5015bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
5016bf215546Sopenharmony_ci         return result;
5017bf215546Sopenharmony_ci   }
5018bf215546Sopenharmony_ci
5019bf215546Sopenharmony_ci   memset(&cmd_buffer->state, 0, sizeof(cmd_buffer->state));
5020bf215546Sopenharmony_ci   cmd_buffer->state.last_primitive_reset_en = -1;
5021bf215546Sopenharmony_ci   cmd_buffer->state.last_index_type = -1;
5022bf215546Sopenharmony_ci   cmd_buffer->state.last_num_instances = -1;
5023bf215546Sopenharmony_ci   cmd_buffer->state.last_vertex_offset = -1;
5024bf215546Sopenharmony_ci   cmd_buffer->state.last_first_instance = -1;
5025bf215546Sopenharmony_ci   cmd_buffer->state.last_drawid = -1;
5026bf215546Sopenharmony_ci   cmd_buffer->state.last_subpass_color_count = MAX_RTS;
5027bf215546Sopenharmony_ci   cmd_buffer->state.predication_type = -1;
5028bf215546Sopenharmony_ci   cmd_buffer->state.last_sx_ps_downconvert = -1;
5029bf215546Sopenharmony_ci   cmd_buffer->state.last_sx_blend_opt_epsilon = -1;
5030bf215546Sopenharmony_ci   cmd_buffer->state.last_sx_blend_opt_control = -1;
5031bf215546Sopenharmony_ci   cmd_buffer->state.last_nggc_settings = -1;
5032bf215546Sopenharmony_ci   cmd_buffer->state.last_nggc_settings_sgpr_idx = -1;
5033bf215546Sopenharmony_ci   cmd_buffer->state.mesh_shading = false;
5034bf215546Sopenharmony_ci   cmd_buffer->state.last_vrs_rates = -1;
5035bf215546Sopenharmony_ci   cmd_buffer->state.last_vrs_rates_sgpr_idx = -1;
5036bf215546Sopenharmony_ci   cmd_buffer->usage_flags = pBeginInfo->flags;
5037bf215546Sopenharmony_ci
5038bf215546Sopenharmony_ci   if (cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_SECONDARY &&
5039bf215546Sopenharmony_ci       (pBeginInfo->flags & VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT)) {
5040bf215546Sopenharmony_ci      struct radv_subpass *subpass = NULL;
5041bf215546Sopenharmony_ci
5042bf215546Sopenharmony_ci      assert(pBeginInfo->pInheritanceInfo);
5043bf215546Sopenharmony_ci
5044bf215546Sopenharmony_ci      cmd_buffer->state.framebuffer =
5045bf215546Sopenharmony_ci         vk_framebuffer_from_handle(pBeginInfo->pInheritanceInfo->framebuffer);
5046bf215546Sopenharmony_ci
5047bf215546Sopenharmony_ci      if (pBeginInfo->pInheritanceInfo->renderPass) {
5048bf215546Sopenharmony_ci         cmd_buffer->state.pass =
5049bf215546Sopenharmony_ci            radv_render_pass_from_handle(pBeginInfo->pInheritanceInfo->renderPass);
5050bf215546Sopenharmony_ci         assert(pBeginInfo->pInheritanceInfo->subpass < cmd_buffer->state.pass->subpass_count);
5051bf215546Sopenharmony_ci         subpass = &cmd_buffer->state.pass->subpasses[pBeginInfo->pInheritanceInfo->subpass];
5052bf215546Sopenharmony_ci      } else {
5053bf215546Sopenharmony_ci         const VkCommandBufferInheritanceRenderingInfo *dyn_info =
5054bf215546Sopenharmony_ci            vk_find_struct_const(pBeginInfo->pInheritanceInfo->pNext,
5055bf215546Sopenharmony_ci                                 COMMAND_BUFFER_INHERITANCE_RENDERING_INFO);
5056bf215546Sopenharmony_ci         if (dyn_info) {
5057bf215546Sopenharmony_ci            radv_inherit_dynamic_rendering(cmd_buffer, pBeginInfo->pInheritanceInfo, dyn_info);
5058bf215546Sopenharmony_ci            subpass = &cmd_buffer->state.pass->subpasses[0];
5059bf215546Sopenharmony_ci         }
5060bf215546Sopenharmony_ci      }
5061bf215546Sopenharmony_ci
5062bf215546Sopenharmony_ci      if (cmd_buffer->state.framebuffer) {
5063bf215546Sopenharmony_ci         result = radv_cmd_state_setup_attachments(cmd_buffer, cmd_buffer->state.pass, NULL);
5064bf215546Sopenharmony_ci         if (result != VK_SUCCESS)
5065bf215546Sopenharmony_ci            return result;
5066bf215546Sopenharmony_ci      }
5067bf215546Sopenharmony_ci
5068bf215546Sopenharmony_ci      cmd_buffer->state.inherited_pipeline_statistics =
5069bf215546Sopenharmony_ci         pBeginInfo->pInheritanceInfo->pipelineStatistics;
5070bf215546Sopenharmony_ci
5071bf215546Sopenharmony_ci      if (cmd_buffer->state.pass) {
5072bf215546Sopenharmony_ci         cmd_buffer->state.subpass = subpass;
5073bf215546Sopenharmony_ci         if (cmd_buffer->state.framebuffer)
5074bf215546Sopenharmony_ci            cmd_buffer->state.dirty |= RADV_CMD_DIRTY_FRAMEBUFFER;
5075bf215546Sopenharmony_ci      }
5076bf215546Sopenharmony_ci   }
5077bf215546Sopenharmony_ci
5078bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
5079bf215546Sopenharmony_ci      radv_cmd_buffer_trace_emit(cmd_buffer);
5080bf215546Sopenharmony_ci
5081bf215546Sopenharmony_ci   radv_describe_begin_cmd_buffer(cmd_buffer);
5082bf215546Sopenharmony_ci
5083bf215546Sopenharmony_ci   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_RECORDING;
5084bf215546Sopenharmony_ci
5085bf215546Sopenharmony_ci   return result;
5086bf215546Sopenharmony_ci}
5087bf215546Sopenharmony_ci
5088bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5089bf215546Sopenharmony_ciradv_CmdBindVertexBuffers2(VkCommandBuffer commandBuffer, uint32_t firstBinding,
5090bf215546Sopenharmony_ci                           uint32_t bindingCount, const VkBuffer *pBuffers,
5091bf215546Sopenharmony_ci                           const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes,
5092bf215546Sopenharmony_ci                           const VkDeviceSize *pStrides)
5093bf215546Sopenharmony_ci{
5094bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5095bf215546Sopenharmony_ci   struct radv_vertex_binding *vb = cmd_buffer->vertex_bindings;
5096bf215546Sopenharmony_ci   const struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input;
5097bf215546Sopenharmony_ci
5098bf215546Sopenharmony_ci   /* We have to defer setting up vertex buffer since we need the buffer
5099bf215546Sopenharmony_ci    * stride from the pipeline. */
5100bf215546Sopenharmony_ci
5101bf215546Sopenharmony_ci   assert(firstBinding + bindingCount <= MAX_VBS);
5102bf215546Sopenharmony_ci   enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
5103bf215546Sopenharmony_ci
5104bf215546Sopenharmony_ci   if (firstBinding + bindingCount > cmd_buffer->used_vertex_bindings)
5105bf215546Sopenharmony_ci      cmd_buffer->used_vertex_bindings = firstBinding + bindingCount;
5106bf215546Sopenharmony_ci
5107bf215546Sopenharmony_ci   uint32_t misaligned_mask_invalid = 0;
5108bf215546Sopenharmony_ci
5109bf215546Sopenharmony_ci   for (uint32_t i = 0; i < bindingCount; i++) {
5110bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_buffer, buffer, pBuffers[i]);
5111bf215546Sopenharmony_ci      uint32_t idx = firstBinding + i;
5112bf215546Sopenharmony_ci      VkDeviceSize size = pSizes ? pSizes[i] : 0;
5113bf215546Sopenharmony_ci      /* if pStrides=NULL, it shouldn't overwrite the strides specified by CmdSetVertexInputEXT */
5114bf215546Sopenharmony_ci      VkDeviceSize stride = pStrides ? pStrides[i] : vb[idx].stride;
5115bf215546Sopenharmony_ci
5116bf215546Sopenharmony_ci      if (!!cmd_buffer->vertex_binding_buffers[idx] != !!buffer ||
5117bf215546Sopenharmony_ci          (buffer && ((vb[idx].offset & 0x3) != (pOffsets[i] & 0x3) ||
5118bf215546Sopenharmony_ci                      (vb[idx].stride & 0x3) != (stride & 0x3)))) {
5119bf215546Sopenharmony_ci         misaligned_mask_invalid |= state->bindings_match_attrib ? BITFIELD_BIT(idx) : 0xffffffff;
5120bf215546Sopenharmony_ci      }
5121bf215546Sopenharmony_ci
5122bf215546Sopenharmony_ci      cmd_buffer->vertex_binding_buffers[idx] = buffer;
5123bf215546Sopenharmony_ci      vb[idx].offset = pOffsets[i];
5124bf215546Sopenharmony_ci      vb[idx].size = size;
5125bf215546Sopenharmony_ci      vb[idx].stride = stride;
5126bf215546Sopenharmony_ci
5127bf215546Sopenharmony_ci      uint32_t bit = BITFIELD_BIT(idx);
5128bf215546Sopenharmony_ci      if (buffer) {
5129bf215546Sopenharmony_ci         radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, cmd_buffer->vertex_binding_buffers[idx]->bo);
5130bf215546Sopenharmony_ci         cmd_buffer->state.vbo_bound_mask |= bit;
5131bf215546Sopenharmony_ci      } else {
5132bf215546Sopenharmony_ci         cmd_buffer->state.vbo_bound_mask &= ~bit;
5133bf215546Sopenharmony_ci      }
5134bf215546Sopenharmony_ci   }
5135bf215546Sopenharmony_ci
5136bf215546Sopenharmony_ci   if ((chip == GFX6 || chip >= GFX10) && misaligned_mask_invalid) {
5137bf215546Sopenharmony_ci      cmd_buffer->state.vbo_misaligned_mask_invalid = misaligned_mask_invalid;
5138bf215546Sopenharmony_ci      cmd_buffer->state.vbo_misaligned_mask &= ~misaligned_mask_invalid;
5139bf215546Sopenharmony_ci   }
5140bf215546Sopenharmony_ci
5141bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
5142bf215546Sopenharmony_ci                              RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
5143bf215546Sopenharmony_ci}
5144bf215546Sopenharmony_ci
5145bf215546Sopenharmony_cistatic uint32_t
5146bf215546Sopenharmony_civk_to_index_type(VkIndexType type)
5147bf215546Sopenharmony_ci{
5148bf215546Sopenharmony_ci   switch (type) {
5149bf215546Sopenharmony_ci   case VK_INDEX_TYPE_UINT8_EXT:
5150bf215546Sopenharmony_ci      return V_028A7C_VGT_INDEX_8;
5151bf215546Sopenharmony_ci   case VK_INDEX_TYPE_UINT16:
5152bf215546Sopenharmony_ci      return V_028A7C_VGT_INDEX_16;
5153bf215546Sopenharmony_ci   case VK_INDEX_TYPE_UINT32:
5154bf215546Sopenharmony_ci      return V_028A7C_VGT_INDEX_32;
5155bf215546Sopenharmony_ci   default:
5156bf215546Sopenharmony_ci      unreachable("invalid index type");
5157bf215546Sopenharmony_ci   }
5158bf215546Sopenharmony_ci}
5159bf215546Sopenharmony_ci
5160bf215546Sopenharmony_ciuint32_t
5161bf215546Sopenharmony_ciradv_get_vgt_index_size(uint32_t type)
5162bf215546Sopenharmony_ci{
5163bf215546Sopenharmony_ci   uint32_t index_type = G_028A7C_INDEX_TYPE(type);
5164bf215546Sopenharmony_ci   switch (index_type) {
5165bf215546Sopenharmony_ci   case V_028A7C_VGT_INDEX_8:
5166bf215546Sopenharmony_ci      return 1;
5167bf215546Sopenharmony_ci   case V_028A7C_VGT_INDEX_16:
5168bf215546Sopenharmony_ci      return 2;
5169bf215546Sopenharmony_ci   case V_028A7C_VGT_INDEX_32:
5170bf215546Sopenharmony_ci      return 4;
5171bf215546Sopenharmony_ci   default:
5172bf215546Sopenharmony_ci      unreachable("invalid index type");
5173bf215546Sopenharmony_ci   }
5174bf215546Sopenharmony_ci}
5175bf215546Sopenharmony_ci
5176bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5177bf215546Sopenharmony_ciradv_CmdBindIndexBuffer(VkCommandBuffer commandBuffer, VkBuffer buffer, VkDeviceSize offset,
5178bf215546Sopenharmony_ci                        VkIndexType indexType)
5179bf215546Sopenharmony_ci{
5180bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5181bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, index_buffer, buffer);
5182bf215546Sopenharmony_ci
5183bf215546Sopenharmony_ci   cmd_buffer->state.index_buffer = index_buffer;
5184bf215546Sopenharmony_ci   cmd_buffer->state.index_offset = offset;
5185bf215546Sopenharmony_ci   cmd_buffer->state.index_type = vk_to_index_type(indexType);
5186bf215546Sopenharmony_ci   cmd_buffer->state.index_va = radv_buffer_get_va(index_buffer->bo);
5187bf215546Sopenharmony_ci   cmd_buffer->state.index_va += index_buffer->offset + offset;
5188bf215546Sopenharmony_ci
5189bf215546Sopenharmony_ci   int index_size = radv_get_vgt_index_size(vk_to_index_type(indexType));
5190bf215546Sopenharmony_ci   cmd_buffer->state.max_index_count =
5191bf215546Sopenharmony_ci      (vk_buffer_range(&index_buffer->vk, offset, VK_WHOLE_SIZE)) / index_size;
5192bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
5193bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, index_buffer->bo);
5194bf215546Sopenharmony_ci}
5195bf215546Sopenharmony_ci
5196bf215546Sopenharmony_cistatic void
5197bf215546Sopenharmony_ciradv_bind_descriptor_set(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point,
5198bf215546Sopenharmony_ci                         struct radv_descriptor_set *set, unsigned idx)
5199bf215546Sopenharmony_ci{
5200bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
5201bf215546Sopenharmony_ci
5202bf215546Sopenharmony_ci   radv_set_descriptor_set(cmd_buffer, bind_point, set, idx);
5203bf215546Sopenharmony_ci
5204bf215546Sopenharmony_ci   assert(set);
5205bf215546Sopenharmony_ci   assert(!(set->header.layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR));
5206bf215546Sopenharmony_ci
5207bf215546Sopenharmony_ci   if (!cmd_buffer->device->use_global_bo_list) {
5208bf215546Sopenharmony_ci      for (unsigned j = 0; j < set->header.buffer_count; ++j)
5209bf215546Sopenharmony_ci         if (set->descriptors[j])
5210bf215546Sopenharmony_ci            radv_cs_add_buffer(ws, cmd_buffer->cs, set->descriptors[j]);
5211bf215546Sopenharmony_ci   }
5212bf215546Sopenharmony_ci
5213bf215546Sopenharmony_ci   if (set->header.bo)
5214bf215546Sopenharmony_ci      radv_cs_add_buffer(ws, cmd_buffer->cs, set->header.bo);
5215bf215546Sopenharmony_ci}
5216bf215546Sopenharmony_ci
5217bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5218bf215546Sopenharmony_ciradv_CmdBindDescriptorSets(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
5219bf215546Sopenharmony_ci                           VkPipelineLayout _layout, uint32_t firstSet, uint32_t descriptorSetCount,
5220bf215546Sopenharmony_ci                           const VkDescriptorSet *pDescriptorSets, uint32_t dynamicOffsetCount,
5221bf215546Sopenharmony_ci                           const uint32_t *pDynamicOffsets)
5222bf215546Sopenharmony_ci{
5223bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5224bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
5225bf215546Sopenharmony_ci   unsigned dyn_idx = 0;
5226bf215546Sopenharmony_ci
5227bf215546Sopenharmony_ci   const bool no_dynamic_bounds =
5228bf215546Sopenharmony_ci      cmd_buffer->device->instance->debug_flags & RADV_DEBUG_NO_DYNAMIC_BOUNDS;
5229bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
5230bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
5231bf215546Sopenharmony_ci
5232bf215546Sopenharmony_ci   for (unsigned i = 0; i < descriptorSetCount; ++i) {
5233bf215546Sopenharmony_ci      unsigned set_idx = i + firstSet;
5234bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_descriptor_set, set, pDescriptorSets[i]);
5235bf215546Sopenharmony_ci
5236bf215546Sopenharmony_ci      if (!set) {
5237bf215546Sopenharmony_ci         /* From the Vulkan spec 1.3.211:
5238bf215546Sopenharmony_ci          *
5239bf215546Sopenharmony_ci          * "VUID-vkCmdBindDescriptorSets-layout-06564
5240bf215546Sopenharmony_ci          *  If layout was not created with VK_PIPELINE_LAYOUT_CREATE_INDEPENDENT_SETS_BIT_EXT, each
5241bf215546Sopenharmony_ci          *  element of pDescriptorSets must be a valid VkDescriptorSet"
5242bf215546Sopenharmony_ci          */
5243bf215546Sopenharmony_ci         assert(layout->independent_sets);
5244bf215546Sopenharmony_ci         continue;
5245bf215546Sopenharmony_ci      }
5246bf215546Sopenharmony_ci
5247bf215546Sopenharmony_ci      /* If the set is already bound we only need to update the
5248bf215546Sopenharmony_ci       * (potentially changed) dynamic offsets. */
5249bf215546Sopenharmony_ci      if (descriptors_state->sets[set_idx] != set ||
5250bf215546Sopenharmony_ci          !(descriptors_state->valid & (1u << set_idx))) {
5251bf215546Sopenharmony_ci         radv_bind_descriptor_set(cmd_buffer, pipelineBindPoint, set, set_idx);
5252bf215546Sopenharmony_ci      }
5253bf215546Sopenharmony_ci
5254bf215546Sopenharmony_ci      for (unsigned j = 0; j < set->header.layout->dynamic_offset_count; ++j, ++dyn_idx) {
5255bf215546Sopenharmony_ci         unsigned idx = j + layout->set[i + firstSet].dynamic_offset_start;
5256bf215546Sopenharmony_ci         uint32_t *dst = descriptors_state->dynamic_buffers + idx * 4;
5257bf215546Sopenharmony_ci         assert(dyn_idx < dynamicOffsetCount);
5258bf215546Sopenharmony_ci
5259bf215546Sopenharmony_ci         struct radv_descriptor_range *range = set->header.dynamic_descriptors + j;
5260bf215546Sopenharmony_ci
5261bf215546Sopenharmony_ci         if (!range->va) {
5262bf215546Sopenharmony_ci            memset(dst, 0, 4 * 4);
5263bf215546Sopenharmony_ci         } else {
5264bf215546Sopenharmony_ci            uint64_t va = range->va + pDynamicOffsets[dyn_idx];
5265bf215546Sopenharmony_ci            dst[0] = va;
5266bf215546Sopenharmony_ci            dst[1] = S_008F04_BASE_ADDRESS_HI(va >> 32);
5267bf215546Sopenharmony_ci            dst[2] = no_dynamic_bounds ? 0xffffffffu : range->size;
5268bf215546Sopenharmony_ci            dst[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
5269bf215546Sopenharmony_ci                     S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
5270bf215546Sopenharmony_ci
5271bf215546Sopenharmony_ci            if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX11) {
5272bf215546Sopenharmony_ci               dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX11_FORMAT_32_FLOAT) |
5273bf215546Sopenharmony_ci                         S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW);
5274bf215546Sopenharmony_ci            } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10) {
5275bf215546Sopenharmony_ci               dst[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
5276bf215546Sopenharmony_ci                         S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
5277bf215546Sopenharmony_ci            } else {
5278bf215546Sopenharmony_ci               dst[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
5279bf215546Sopenharmony_ci                         S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
5280bf215546Sopenharmony_ci            }
5281bf215546Sopenharmony_ci         }
5282bf215546Sopenharmony_ci
5283bf215546Sopenharmony_ci         cmd_buffer->push_constant_stages |= set->header.layout->dynamic_shader_stages;
5284bf215546Sopenharmony_ci      }
5285bf215546Sopenharmony_ci   }
5286bf215546Sopenharmony_ci}
5287bf215546Sopenharmony_ci
5288bf215546Sopenharmony_cistatic bool
5289bf215546Sopenharmony_ciradv_init_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer, struct radv_descriptor_set *set,
5290bf215546Sopenharmony_ci                              struct radv_descriptor_set_layout *layout,
5291bf215546Sopenharmony_ci                              VkPipelineBindPoint bind_point)
5292bf215546Sopenharmony_ci{
5293bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
5294bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
5295bf215546Sopenharmony_ci   set->header.size = layout->size;
5296bf215546Sopenharmony_ci
5297bf215546Sopenharmony_ci   if (set->header.layout != layout) {
5298bf215546Sopenharmony_ci      if (set->header.layout)
5299bf215546Sopenharmony_ci         vk_descriptor_set_layout_unref(&cmd_buffer->device->vk, &set->header.layout->vk);
5300bf215546Sopenharmony_ci      vk_descriptor_set_layout_ref(&layout->vk);
5301bf215546Sopenharmony_ci      set->header.layout = layout;
5302bf215546Sopenharmony_ci   }
5303bf215546Sopenharmony_ci
5304bf215546Sopenharmony_ci   if (descriptors_state->push_set.capacity < set->header.size) {
5305bf215546Sopenharmony_ci      size_t new_size = MAX2(set->header.size, 1024);
5306bf215546Sopenharmony_ci      new_size = MAX2(new_size, 2 * descriptors_state->push_set.capacity);
5307bf215546Sopenharmony_ci      new_size = MIN2(new_size, 96 * MAX_PUSH_DESCRIPTORS);
5308bf215546Sopenharmony_ci
5309bf215546Sopenharmony_ci      free(set->header.mapped_ptr);
5310bf215546Sopenharmony_ci      set->header.mapped_ptr = malloc(new_size);
5311bf215546Sopenharmony_ci
5312bf215546Sopenharmony_ci      if (!set->header.mapped_ptr) {
5313bf215546Sopenharmony_ci         descriptors_state->push_set.capacity = 0;
5314bf215546Sopenharmony_ci         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
5315bf215546Sopenharmony_ci         return false;
5316bf215546Sopenharmony_ci      }
5317bf215546Sopenharmony_ci
5318bf215546Sopenharmony_ci      descriptors_state->push_set.capacity = new_size;
5319bf215546Sopenharmony_ci   }
5320bf215546Sopenharmony_ci
5321bf215546Sopenharmony_ci   return true;
5322bf215546Sopenharmony_ci}
5323bf215546Sopenharmony_ci
5324bf215546Sopenharmony_civoid
5325bf215546Sopenharmony_ciradv_meta_push_descriptor_set(struct radv_cmd_buffer *cmd_buffer,
5326bf215546Sopenharmony_ci                              VkPipelineBindPoint pipelineBindPoint, VkPipelineLayout _layout,
5327bf215546Sopenharmony_ci                              uint32_t set, uint32_t descriptorWriteCount,
5328bf215546Sopenharmony_ci                              const VkWriteDescriptorSet *pDescriptorWrites)
5329bf215546Sopenharmony_ci{
5330bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
5331bf215546Sopenharmony_ci   struct radv_descriptor_set *push_set =
5332bf215546Sopenharmony_ci      (struct radv_descriptor_set *)&cmd_buffer->meta_push_descriptors;
5333bf215546Sopenharmony_ci   unsigned bo_offset;
5334bf215546Sopenharmony_ci
5335bf215546Sopenharmony_ci   assert(set == 0);
5336bf215546Sopenharmony_ci   assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
5337bf215546Sopenharmony_ci
5338bf215546Sopenharmony_ci   push_set->header.size = layout->set[set].layout->size;
5339bf215546Sopenharmony_ci   push_set->header.layout = layout->set[set].layout;
5340bf215546Sopenharmony_ci
5341bf215546Sopenharmony_ci   if (!radv_cmd_buffer_upload_alloc(cmd_buffer, push_set->header.size, &bo_offset,
5342bf215546Sopenharmony_ci                                     (void **)&push_set->header.mapped_ptr))
5343bf215546Sopenharmony_ci      return;
5344bf215546Sopenharmony_ci
5345bf215546Sopenharmony_ci   push_set->header.va = radv_buffer_get_va(cmd_buffer->upload.upload_bo);
5346bf215546Sopenharmony_ci   push_set->header.va += bo_offset;
5347bf215546Sopenharmony_ci
5348bf215546Sopenharmony_ci   radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
5349bf215546Sopenharmony_ci                                   radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
5350bf215546Sopenharmony_ci                                   pDescriptorWrites, 0, NULL);
5351bf215546Sopenharmony_ci
5352bf215546Sopenharmony_ci   radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
5353bf215546Sopenharmony_ci}
5354bf215546Sopenharmony_ci
5355bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5356bf215546Sopenharmony_ciradv_CmdPushDescriptorSetKHR(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
5357bf215546Sopenharmony_ci                             VkPipelineLayout _layout, uint32_t set, uint32_t descriptorWriteCount,
5358bf215546Sopenharmony_ci                             const VkWriteDescriptorSet *pDescriptorWrites)
5359bf215546Sopenharmony_ci{
5360bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5361bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
5362bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
5363bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, pipelineBindPoint);
5364bf215546Sopenharmony_ci   struct radv_descriptor_set *push_set =
5365bf215546Sopenharmony_ci      (struct radv_descriptor_set *)&descriptors_state->push_set.set;
5366bf215546Sopenharmony_ci
5367bf215546Sopenharmony_ci   assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
5368bf215546Sopenharmony_ci
5369bf215546Sopenharmony_ci   if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
5370bf215546Sopenharmony_ci                                      pipelineBindPoint))
5371bf215546Sopenharmony_ci      return;
5372bf215546Sopenharmony_ci
5373bf215546Sopenharmony_ci   /* Check that there are no inline uniform block updates when calling vkCmdPushDescriptorSetKHR()
5374bf215546Sopenharmony_ci    * because it is invalid, according to Vulkan spec.
5375bf215546Sopenharmony_ci    */
5376bf215546Sopenharmony_ci   for (int i = 0; i < descriptorWriteCount; i++) {
5377bf215546Sopenharmony_ci      ASSERTED const VkWriteDescriptorSet *writeset = &pDescriptorWrites[i];
5378bf215546Sopenharmony_ci      assert(writeset->descriptorType != VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK);
5379bf215546Sopenharmony_ci   }
5380bf215546Sopenharmony_ci
5381bf215546Sopenharmony_ci   radv_cmd_update_descriptor_sets(cmd_buffer->device, cmd_buffer,
5382bf215546Sopenharmony_ci                                   radv_descriptor_set_to_handle(push_set), descriptorWriteCount,
5383bf215546Sopenharmony_ci                                   pDescriptorWrites, 0, NULL);
5384bf215546Sopenharmony_ci
5385bf215546Sopenharmony_ci   radv_set_descriptor_set(cmd_buffer, pipelineBindPoint, push_set, set);
5386bf215546Sopenharmony_ci   descriptors_state->push_dirty = true;
5387bf215546Sopenharmony_ci}
5388bf215546Sopenharmony_ci
5389bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5390bf215546Sopenharmony_ciradv_CmdPushDescriptorSetWithTemplateKHR(VkCommandBuffer commandBuffer,
5391bf215546Sopenharmony_ci                                         VkDescriptorUpdateTemplate descriptorUpdateTemplate,
5392bf215546Sopenharmony_ci                                         VkPipelineLayout _layout, uint32_t set, const void *pData)
5393bf215546Sopenharmony_ci{
5394bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5395bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline_layout, layout, _layout);
5396bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_descriptor_update_template, templ, descriptorUpdateTemplate);
5397bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
5398bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, templ->bind_point);
5399bf215546Sopenharmony_ci   struct radv_descriptor_set *push_set =
5400bf215546Sopenharmony_ci      (struct radv_descriptor_set *)&descriptors_state->push_set.set;
5401bf215546Sopenharmony_ci
5402bf215546Sopenharmony_ci   assert(layout->set[set].layout->flags & VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR);
5403bf215546Sopenharmony_ci
5404bf215546Sopenharmony_ci   if (!radv_init_push_descriptor_set(cmd_buffer, push_set, layout->set[set].layout,
5405bf215546Sopenharmony_ci                                      templ->bind_point))
5406bf215546Sopenharmony_ci      return;
5407bf215546Sopenharmony_ci
5408bf215546Sopenharmony_ci   radv_cmd_update_descriptor_set_with_template(cmd_buffer->device, cmd_buffer, push_set,
5409bf215546Sopenharmony_ci                                                descriptorUpdateTemplate, pData);
5410bf215546Sopenharmony_ci
5411bf215546Sopenharmony_ci   radv_set_descriptor_set(cmd_buffer, templ->bind_point, push_set, set);
5412bf215546Sopenharmony_ci   descriptors_state->push_dirty = true;
5413bf215546Sopenharmony_ci}
5414bf215546Sopenharmony_ci
5415bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5416bf215546Sopenharmony_ciradv_CmdPushConstants(VkCommandBuffer commandBuffer, VkPipelineLayout layout,
5417bf215546Sopenharmony_ci                      VkShaderStageFlags stageFlags, uint32_t offset, uint32_t size,
5418bf215546Sopenharmony_ci                      const void *pValues)
5419bf215546Sopenharmony_ci{
5420bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5421bf215546Sopenharmony_ci   memcpy(cmd_buffer->push_constants + offset, pValues, size);
5422bf215546Sopenharmony_ci   cmd_buffer->push_constant_stages |= stageFlags;
5423bf215546Sopenharmony_ci}
5424bf215546Sopenharmony_ci
5425bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
5426bf215546Sopenharmony_ciradv_EndCommandBuffer(VkCommandBuffer commandBuffer)
5427bf215546Sopenharmony_ci{
5428bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5429bf215546Sopenharmony_ci
5430bf215546Sopenharmony_ci   radv_emit_mip_change_flush_default(cmd_buffer);
5431bf215546Sopenharmony_ci
5432bf215546Sopenharmony_ci   if (cmd_buffer->qf != RADV_QUEUE_TRANSFER) {
5433bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX6)
5434bf215546Sopenharmony_ci         cmd_buffer->state.flush_bits |=
5435bf215546Sopenharmony_ci            RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_WB_L2;
5436bf215546Sopenharmony_ci
5437bf215546Sopenharmony_ci      /* Make sure to sync all pending active queries at the end of
5438bf215546Sopenharmony_ci       * command buffer.
5439bf215546Sopenharmony_ci       */
5440bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= cmd_buffer->active_query_flush_bits;
5441bf215546Sopenharmony_ci
5442bf215546Sopenharmony_ci      /* Flush noncoherent images on GFX9+ so we can assume they're clean on the start of a
5443bf215546Sopenharmony_ci       * command buffer.
5444bf215546Sopenharmony_ci       */
5445bf215546Sopenharmony_ci      if (cmd_buffer->state.rb_noncoherent_dirty && can_skip_buffer_l2_flushes(cmd_buffer->device))
5446bf215546Sopenharmony_ci         cmd_buffer->state.flush_bits |= radv_src_access_flush(
5447bf215546Sopenharmony_ci            cmd_buffer,
5448bf215546Sopenharmony_ci            VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT |
5449bf215546Sopenharmony_ci            VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
5450bf215546Sopenharmony_ci            NULL);
5451bf215546Sopenharmony_ci
5452bf215546Sopenharmony_ci      /* Since NGG streamout uses GDS, we need to make GDS idle when
5453bf215546Sopenharmony_ci       * we leave the IB, otherwise another process might overwrite
5454bf215546Sopenharmony_ci       * it while our shaders are busy.
5455bf215546Sopenharmony_ci       */
5456bf215546Sopenharmony_ci      if (cmd_buffer->gds_needed)
5457bf215546Sopenharmony_ci         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH;
5458bf215546Sopenharmony_ci
5459bf215546Sopenharmony_ci      /* Finalize the internal compute command stream, if it exists. */
5460bf215546Sopenharmony_ci      if (cmd_buffer->ace_internal.cs) {
5461bf215546Sopenharmony_ci         VkResult result = radv_ace_internal_finalize(cmd_buffer);
5462bf215546Sopenharmony_ci         if (result != VK_SUCCESS)
5463bf215546Sopenharmony_ci            return vk_error(cmd_buffer, result);
5464bf215546Sopenharmony_ci      }
5465bf215546Sopenharmony_ci
5466bf215546Sopenharmony_ci      si_emit_cache_flush(cmd_buffer);
5467bf215546Sopenharmony_ci   }
5468bf215546Sopenharmony_ci
5469bf215546Sopenharmony_ci   /* Make sure CP DMA is idle at the end of IBs because the kernel
5470bf215546Sopenharmony_ci    * doesn't wait for it.
5471bf215546Sopenharmony_ci    */
5472bf215546Sopenharmony_ci   si_cp_dma_wait_for_idle(cmd_buffer);
5473bf215546Sopenharmony_ci
5474bf215546Sopenharmony_ci   radv_describe_end_cmd_buffer(cmd_buffer);
5475bf215546Sopenharmony_ci
5476bf215546Sopenharmony_ci   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
5477bf215546Sopenharmony_ci   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.subpass_sample_locs);
5478bf215546Sopenharmony_ci
5479bf215546Sopenharmony_ci   VkResult result = cmd_buffer->device->ws->cs_finalize(cmd_buffer->cs);
5480bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
5481bf215546Sopenharmony_ci      return vk_error(cmd_buffer, result);
5482bf215546Sopenharmony_ci
5483bf215546Sopenharmony_ci   cmd_buffer->status = RADV_CMD_BUFFER_STATUS_EXECUTABLE;
5484bf215546Sopenharmony_ci
5485bf215546Sopenharmony_ci   return cmd_buffer->record_result;
5486bf215546Sopenharmony_ci}
5487bf215546Sopenharmony_ci
5488bf215546Sopenharmony_cistatic void
5489bf215546Sopenharmony_ciradv_emit_compute_pipeline(struct radv_cmd_buffer *cmd_buffer,
5490bf215546Sopenharmony_ci                           struct radv_compute_pipeline *pipeline)
5491bf215546Sopenharmony_ci{
5492bf215546Sopenharmony_ci   if (pipeline == cmd_buffer->state.emitted_compute_pipeline)
5493bf215546Sopenharmony_ci      return;
5494bf215546Sopenharmony_ci
5495bf215546Sopenharmony_ci   assert(!pipeline->base.ctx_cs.cdw);
5496bf215546Sopenharmony_ci
5497bf215546Sopenharmony_ci   cmd_buffer->state.emitted_compute_pipeline = pipeline;
5498bf215546Sopenharmony_ci
5499bf215546Sopenharmony_ci   radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.cs.cdw);
5500bf215546Sopenharmony_ci   radeon_emit_array(cmd_buffer->cs, pipeline->base.cs.buf, pipeline->base.cs.cdw);
5501bf215546Sopenharmony_ci
5502bf215546Sopenharmony_ci   cmd_buffer->compute_scratch_size_per_wave_needed =
5503bf215546Sopenharmony_ci      MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, pipeline->base.scratch_bytes_per_wave);
5504bf215546Sopenharmony_ci   cmd_buffer->compute_scratch_waves_wanted =
5505bf215546Sopenharmony_ci      MAX2(cmd_buffer->compute_scratch_waves_wanted, pipeline->base.max_waves);
5506bf215546Sopenharmony_ci
5507bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, pipeline->base.slab_bo);
5508bf215546Sopenharmony_ci
5509bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
5510bf215546Sopenharmony_ci      radv_save_pipeline(cmd_buffer, &pipeline->base);
5511bf215546Sopenharmony_ci}
5512bf215546Sopenharmony_ci
5513bf215546Sopenharmony_cistatic void
5514bf215546Sopenharmony_ciradv_mark_descriptor_sets_dirty(struct radv_cmd_buffer *cmd_buffer, VkPipelineBindPoint bind_point)
5515bf215546Sopenharmony_ci{
5516bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
5517bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, bind_point);
5518bf215546Sopenharmony_ci
5519bf215546Sopenharmony_ci   descriptors_state->dirty |= descriptors_state->valid;
5520bf215546Sopenharmony_ci}
5521bf215546Sopenharmony_ci
5522bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5523bf215546Sopenharmony_ciradv_CmdBindPipeline(VkCommandBuffer commandBuffer, VkPipelineBindPoint pipelineBindPoint,
5524bf215546Sopenharmony_ci                     VkPipeline _pipeline)
5525bf215546Sopenharmony_ci{
5526bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5527bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_pipeline, pipeline, _pipeline);
5528bf215546Sopenharmony_ci
5529bf215546Sopenharmony_ci   switch (pipelineBindPoint) {
5530bf215546Sopenharmony_ci   case VK_PIPELINE_BIND_POINT_COMPUTE: {
5531bf215546Sopenharmony_ci      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
5532bf215546Sopenharmony_ci
5533bf215546Sopenharmony_ci      if (cmd_buffer->state.compute_pipeline == compute_pipeline)
5534bf215546Sopenharmony_ci         return;
5535bf215546Sopenharmony_ci      radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
5536bf215546Sopenharmony_ci
5537bf215546Sopenharmony_ci      cmd_buffer->state.compute_pipeline = compute_pipeline;
5538bf215546Sopenharmony_ci      cmd_buffer->push_constant_stages |= VK_SHADER_STAGE_COMPUTE_BIT;
5539bf215546Sopenharmony_ci      cmd_buffer->task_rings_needed |=
5540bf215546Sopenharmony_ci         pipeline->shaders[MESA_SHADER_COMPUTE]->info.cs.uses_task_rings;
5541bf215546Sopenharmony_ci      break;
5542bf215546Sopenharmony_ci   }
5543bf215546Sopenharmony_ci   case VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR: {
5544bf215546Sopenharmony_ci      struct radv_compute_pipeline *compute_pipeline = radv_pipeline_to_compute(pipeline);
5545bf215546Sopenharmony_ci
5546bf215546Sopenharmony_ci      if (cmd_buffer->state.rt_pipeline == compute_pipeline)
5547bf215546Sopenharmony_ci         return;
5548bf215546Sopenharmony_ci      radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
5549bf215546Sopenharmony_ci
5550bf215546Sopenharmony_ci      cmd_buffer->state.rt_pipeline = compute_pipeline;
5551bf215546Sopenharmony_ci      cmd_buffer->push_constant_stages |= RADV_RT_STAGE_BITS;
5552bf215546Sopenharmony_ci      if (compute_pipeline->dynamic_stack_size)
5553bf215546Sopenharmony_ci         radv_set_rt_stack_size(cmd_buffer, cmd_buffer->state.rt_stack_size);
5554bf215546Sopenharmony_ci      break;
5555bf215546Sopenharmony_ci   }
5556bf215546Sopenharmony_ci   case VK_PIPELINE_BIND_POINT_GRAPHICS: {
5557bf215546Sopenharmony_ci      struct radv_graphics_pipeline *graphics_pipeline =
5558bf215546Sopenharmony_ci         pipeline ? radv_pipeline_to_graphics(pipeline) : NULL;
5559bf215546Sopenharmony_ci
5560bf215546Sopenharmony_ci      if (cmd_buffer->state.graphics_pipeline == graphics_pipeline)
5561bf215546Sopenharmony_ci         return;
5562bf215546Sopenharmony_ci      radv_mark_descriptor_sets_dirty(cmd_buffer, pipelineBindPoint);
5563bf215546Sopenharmony_ci
5564bf215546Sopenharmony_ci      bool vtx_emit_count_changed =
5565bf215546Sopenharmony_ci         !pipeline || !cmd_buffer->state.graphics_pipeline ||
5566bf215546Sopenharmony_ci         cmd_buffer->state.graphics_pipeline->vtx_emit_num != graphics_pipeline->vtx_emit_num ||
5567bf215546Sopenharmony_ci         cmd_buffer->state.graphics_pipeline->vtx_base_sgpr != graphics_pipeline->vtx_base_sgpr;
5568bf215546Sopenharmony_ci      cmd_buffer->state.graphics_pipeline = graphics_pipeline;
5569bf215546Sopenharmony_ci      if (!pipeline)
5570bf215546Sopenharmony_ci         break;
5571bf215546Sopenharmony_ci
5572bf215546Sopenharmony_ci      bool mesh_shading = radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_MESH);
5573bf215546Sopenharmony_ci      if (mesh_shading != cmd_buffer->state.mesh_shading) {
5574bf215546Sopenharmony_ci         /* Re-emit VRS state because the combiner is different (vertex vs primitive).
5575bf215546Sopenharmony_ci          * Re-emit primitive topology because the mesh shading pipeline clobbered it.
5576bf215546Sopenharmony_ci          */
5577bf215546Sopenharmony_ci         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE |
5578bf215546Sopenharmony_ci                                    RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
5579bf215546Sopenharmony_ci      }
5580bf215546Sopenharmony_ci
5581bf215546Sopenharmony_ci      cmd_buffer->state.mesh_shading = mesh_shading;
5582bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
5583bf215546Sopenharmony_ci      cmd_buffer->push_constant_stages |= graphics_pipeline->active_stages;
5584bf215546Sopenharmony_ci
5585bf215546Sopenharmony_ci      /* the new vertex shader might not have the same user regs */
5586bf215546Sopenharmony_ci      if (vtx_emit_count_changed) {
5587bf215546Sopenharmony_ci         cmd_buffer->state.last_first_instance = -1;
5588bf215546Sopenharmony_ci         cmd_buffer->state.last_vertex_offset = -1;
5589bf215546Sopenharmony_ci         cmd_buffer->state.last_drawid = -1;
5590bf215546Sopenharmony_ci      }
5591bf215546Sopenharmony_ci
5592bf215546Sopenharmony_ci      /* Prefetch all pipeline shaders at first draw time. */
5593bf215546Sopenharmony_ci      cmd_buffer->state.prefetch_L2_mask |= RADV_PREFETCH_SHADERS;
5594bf215546Sopenharmony_ci
5595bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.has_vgt_flush_ngg_legacy_bug &&
5596bf215546Sopenharmony_ci          cmd_buffer->state.emitted_graphics_pipeline &&
5597bf215546Sopenharmony_ci          cmd_buffer->state.emitted_graphics_pipeline->is_ngg &&
5598bf215546Sopenharmony_ci          !cmd_buffer->state.graphics_pipeline->is_ngg) {
5599bf215546Sopenharmony_ci         /* Transitioning from NGG to legacy GS requires
5600bf215546Sopenharmony_ci          * VGT_FLUSH on GFX10 and Navi21. VGT_FLUSH
5601bf215546Sopenharmony_ci          * is also emitted at the beginning of IBs when legacy
5602bf215546Sopenharmony_ci          * GS ring pointers are set.
5603bf215546Sopenharmony_ci          */
5604bf215546Sopenharmony_ci         cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_FLUSH;
5605bf215546Sopenharmony_ci      }
5606bf215546Sopenharmony_ci
5607bf215546Sopenharmony_ci      radv_bind_dynamic_state(cmd_buffer, &graphics_pipeline->dynamic_state);
5608bf215546Sopenharmony_ci
5609bf215546Sopenharmony_ci      if (graphics_pipeline->esgs_ring_size > cmd_buffer->esgs_ring_size_needed)
5610bf215546Sopenharmony_ci         cmd_buffer->esgs_ring_size_needed = graphics_pipeline->esgs_ring_size;
5611bf215546Sopenharmony_ci      if (graphics_pipeline->gsvs_ring_size > cmd_buffer->gsvs_ring_size_needed)
5612bf215546Sopenharmony_ci         cmd_buffer->gsvs_ring_size_needed = graphics_pipeline->gsvs_ring_size;
5613bf215546Sopenharmony_ci
5614bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TESS_CTRL))
5615bf215546Sopenharmony_ci         cmd_buffer->tess_rings_needed = true;
5616bf215546Sopenharmony_ci      if (mesh_shading)
5617bf215546Sopenharmony_ci         cmd_buffer->mesh_scratch_ring_needed |=
5618bf215546Sopenharmony_ci            pipeline->shaders[MESA_SHADER_MESH]->info.ms.needs_ms_scratch_ring;
5619bf215546Sopenharmony_ci
5620bf215546Sopenharmony_ci      if (radv_pipeline_has_stage(graphics_pipeline, MESA_SHADER_TASK)) {
5621bf215546Sopenharmony_ci         if (!cmd_buffer->ace_internal.cs) {
5622bf215546Sopenharmony_ci            cmd_buffer->ace_internal.cs = radv_ace_internal_create(cmd_buffer);
5623bf215546Sopenharmony_ci            if (!cmd_buffer->ace_internal.cs)
5624bf215546Sopenharmony_ci               return;
5625bf215546Sopenharmony_ci         }
5626bf215546Sopenharmony_ci
5627bf215546Sopenharmony_ci         cmd_buffer->task_rings_needed = true;
5628bf215546Sopenharmony_ci      }
5629bf215546Sopenharmony_ci      break;
5630bf215546Sopenharmony_ci   }
5631bf215546Sopenharmony_ci   default:
5632bf215546Sopenharmony_ci      assert(!"invalid bind point");
5633bf215546Sopenharmony_ci      break;
5634bf215546Sopenharmony_ci   }
5635bf215546Sopenharmony_ci}
5636bf215546Sopenharmony_ci
5637bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5638bf215546Sopenharmony_ciradv_CmdSetViewport(VkCommandBuffer commandBuffer, uint32_t firstViewport, uint32_t viewportCount,
5639bf215546Sopenharmony_ci                    const VkViewport *pViewports)
5640bf215546Sopenharmony_ci{
5641bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5642bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5643bf215546Sopenharmony_ci   ASSERTED const uint32_t total_count = firstViewport + viewportCount;
5644bf215546Sopenharmony_ci
5645bf215546Sopenharmony_ci   assert(firstViewport < MAX_VIEWPORTS);
5646bf215546Sopenharmony_ci   assert(total_count >= 1 && total_count <= MAX_VIEWPORTS);
5647bf215546Sopenharmony_ci
5648bf215546Sopenharmony_ci   if (state->dynamic.viewport.count < total_count)
5649bf215546Sopenharmony_ci      state->dynamic.viewport.count = total_count;
5650bf215546Sopenharmony_ci
5651bf215546Sopenharmony_ci   memcpy(state->dynamic.viewport.viewports + firstViewport, pViewports,
5652bf215546Sopenharmony_ci          viewportCount * sizeof(*pViewports));
5653bf215546Sopenharmony_ci   for (unsigned i = 0; i < viewportCount; i++) {
5654bf215546Sopenharmony_ci      radv_get_viewport_xform(&pViewports[i],
5655bf215546Sopenharmony_ci                              state->dynamic.viewport.xform[i + firstViewport].scale,
5656bf215546Sopenharmony_ci                              state->dynamic.viewport.xform[i + firstViewport].translate);
5657bf215546Sopenharmony_ci   }
5658bf215546Sopenharmony_ci
5659bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_VIEWPORT;
5660bf215546Sopenharmony_ci}
5661bf215546Sopenharmony_ci
5662bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5663bf215546Sopenharmony_ciradv_CmdSetScissor(VkCommandBuffer commandBuffer, uint32_t firstScissor, uint32_t scissorCount,
5664bf215546Sopenharmony_ci                   const VkRect2D *pScissors)
5665bf215546Sopenharmony_ci{
5666bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5667bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5668bf215546Sopenharmony_ci   ASSERTED const uint32_t total_count = firstScissor + scissorCount;
5669bf215546Sopenharmony_ci
5670bf215546Sopenharmony_ci   assert(firstScissor < MAX_SCISSORS);
5671bf215546Sopenharmony_ci   assert(total_count >= 1 && total_count <= MAX_SCISSORS);
5672bf215546Sopenharmony_ci
5673bf215546Sopenharmony_ci   if (state->dynamic.scissor.count < total_count)
5674bf215546Sopenharmony_ci      state->dynamic.scissor.count = total_count;
5675bf215546Sopenharmony_ci
5676bf215546Sopenharmony_ci   memcpy(state->dynamic.scissor.scissors + firstScissor, pScissors,
5677bf215546Sopenharmony_ci          scissorCount * sizeof(*pScissors));
5678bf215546Sopenharmony_ci
5679bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
5680bf215546Sopenharmony_ci}
5681bf215546Sopenharmony_ci
5682bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5683bf215546Sopenharmony_ciradv_CmdSetLineWidth(VkCommandBuffer commandBuffer, float lineWidth)
5684bf215546Sopenharmony_ci{
5685bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5686bf215546Sopenharmony_ci
5687bf215546Sopenharmony_ci   if (cmd_buffer->state.dynamic.line_width != lineWidth)
5688bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
5689bf215546Sopenharmony_ci
5690bf215546Sopenharmony_ci   cmd_buffer->state.dynamic.line_width = lineWidth;
5691bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_WIDTH;
5692bf215546Sopenharmony_ci}
5693bf215546Sopenharmony_ci
5694bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5695bf215546Sopenharmony_ciradv_CmdSetDepthBias(VkCommandBuffer commandBuffer, float depthBiasConstantFactor,
5696bf215546Sopenharmony_ci                     float depthBiasClamp, float depthBiasSlopeFactor)
5697bf215546Sopenharmony_ci{
5698bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5699bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5700bf215546Sopenharmony_ci
5701bf215546Sopenharmony_ci   state->dynamic.depth_bias.bias = depthBiasConstantFactor;
5702bf215546Sopenharmony_ci   state->dynamic.depth_bias.clamp = depthBiasClamp;
5703bf215546Sopenharmony_ci   state->dynamic.depth_bias.slope = depthBiasSlopeFactor;
5704bf215546Sopenharmony_ci
5705bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS;
5706bf215546Sopenharmony_ci}
5707bf215546Sopenharmony_ci
5708bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5709bf215546Sopenharmony_ciradv_CmdSetBlendConstants(VkCommandBuffer commandBuffer, const float blendConstants[4])
5710bf215546Sopenharmony_ci{
5711bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5712bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5713bf215546Sopenharmony_ci
5714bf215546Sopenharmony_ci   memcpy(state->dynamic.blend_constants, blendConstants, sizeof(float) * 4);
5715bf215546Sopenharmony_ci
5716bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS;
5717bf215546Sopenharmony_ci}
5718bf215546Sopenharmony_ci
5719bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5720bf215546Sopenharmony_ciradv_CmdSetDepthBounds(VkCommandBuffer commandBuffer, float minDepthBounds, float maxDepthBounds)
5721bf215546Sopenharmony_ci{
5722bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5723bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5724bf215546Sopenharmony_ci
5725bf215546Sopenharmony_ci   state->dynamic.depth_bounds.min = minDepthBounds;
5726bf215546Sopenharmony_ci   state->dynamic.depth_bounds.max = maxDepthBounds;
5727bf215546Sopenharmony_ci
5728bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS;
5729bf215546Sopenharmony_ci}
5730bf215546Sopenharmony_ci
5731bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5732bf215546Sopenharmony_ciradv_CmdSetStencilCompareMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
5733bf215546Sopenharmony_ci                              uint32_t compareMask)
5734bf215546Sopenharmony_ci{
5735bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5736bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5737bf215546Sopenharmony_ci
5738bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
5739bf215546Sopenharmony_ci      state->dynamic.stencil_compare_mask.front = compareMask;
5740bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
5741bf215546Sopenharmony_ci      state->dynamic.stencil_compare_mask.back = compareMask;
5742bf215546Sopenharmony_ci
5743bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK;
5744bf215546Sopenharmony_ci}
5745bf215546Sopenharmony_ci
5746bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5747bf215546Sopenharmony_ciradv_CmdSetStencilWriteMask(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
5748bf215546Sopenharmony_ci                            uint32_t writeMask)
5749bf215546Sopenharmony_ci{
5750bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5751bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5752bf215546Sopenharmony_ci
5753bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
5754bf215546Sopenharmony_ci      state->dynamic.stencil_write_mask.front = writeMask;
5755bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
5756bf215546Sopenharmony_ci      state->dynamic.stencil_write_mask.back = writeMask;
5757bf215546Sopenharmony_ci
5758bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK;
5759bf215546Sopenharmony_ci}
5760bf215546Sopenharmony_ci
5761bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5762bf215546Sopenharmony_ciradv_CmdSetStencilReference(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
5763bf215546Sopenharmony_ci                            uint32_t reference)
5764bf215546Sopenharmony_ci{
5765bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5766bf215546Sopenharmony_ci
5767bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_FRONT_BIT)
5768bf215546Sopenharmony_ci      cmd_buffer->state.dynamic.stencil_reference.front = reference;
5769bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_BACK_BIT)
5770bf215546Sopenharmony_ci      cmd_buffer->state.dynamic.stencil_reference.back = reference;
5771bf215546Sopenharmony_ci
5772bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE;
5773bf215546Sopenharmony_ci}
5774bf215546Sopenharmony_ci
5775bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5776bf215546Sopenharmony_ciradv_CmdSetDiscardRectangleEXT(VkCommandBuffer commandBuffer, uint32_t firstDiscardRectangle,
5777bf215546Sopenharmony_ci                               uint32_t discardRectangleCount, const VkRect2D *pDiscardRectangles)
5778bf215546Sopenharmony_ci{
5779bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5780bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5781bf215546Sopenharmony_ci   ASSERTED const uint32_t total_count = firstDiscardRectangle + discardRectangleCount;
5782bf215546Sopenharmony_ci
5783bf215546Sopenharmony_ci   assert(firstDiscardRectangle < MAX_DISCARD_RECTANGLES);
5784bf215546Sopenharmony_ci   assert(total_count >= 1 && total_count <= MAX_DISCARD_RECTANGLES);
5785bf215546Sopenharmony_ci
5786bf215546Sopenharmony_ci   typed_memcpy(&state->dynamic.discard_rectangle.rectangles[firstDiscardRectangle],
5787bf215546Sopenharmony_ci                pDiscardRectangles, discardRectangleCount);
5788bf215546Sopenharmony_ci
5789bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DISCARD_RECTANGLE;
5790bf215546Sopenharmony_ci}
5791bf215546Sopenharmony_ci
5792bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5793bf215546Sopenharmony_ciradv_CmdSetSampleLocationsEXT(VkCommandBuffer commandBuffer,
5794bf215546Sopenharmony_ci                              const VkSampleLocationsInfoEXT *pSampleLocationsInfo)
5795bf215546Sopenharmony_ci{
5796bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5797bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5798bf215546Sopenharmony_ci
5799bf215546Sopenharmony_ci   assert(pSampleLocationsInfo->sampleLocationsCount <= MAX_SAMPLE_LOCATIONS);
5800bf215546Sopenharmony_ci
5801bf215546Sopenharmony_ci   state->dynamic.sample_location.per_pixel = pSampleLocationsInfo->sampleLocationsPerPixel;
5802bf215546Sopenharmony_ci   state->dynamic.sample_location.grid_size = pSampleLocationsInfo->sampleLocationGridSize;
5803bf215546Sopenharmony_ci   state->dynamic.sample_location.count = pSampleLocationsInfo->sampleLocationsCount;
5804bf215546Sopenharmony_ci   typed_memcpy(&state->dynamic.sample_location.locations[0],
5805bf215546Sopenharmony_ci                pSampleLocationsInfo->pSampleLocations, pSampleLocationsInfo->sampleLocationsCount);
5806bf215546Sopenharmony_ci
5807bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS;
5808bf215546Sopenharmony_ci}
5809bf215546Sopenharmony_ci
5810bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5811bf215546Sopenharmony_ciradv_CmdSetLineStippleEXT(VkCommandBuffer commandBuffer, uint32_t lineStippleFactor,
5812bf215546Sopenharmony_ci                          uint16_t lineStipplePattern)
5813bf215546Sopenharmony_ci{
5814bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5815bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5816bf215546Sopenharmony_ci
5817bf215546Sopenharmony_ci   state->dynamic.line_stipple.factor = lineStippleFactor;
5818bf215546Sopenharmony_ci   state->dynamic.line_stipple.pattern = lineStipplePattern;
5819bf215546Sopenharmony_ci
5820bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
5821bf215546Sopenharmony_ci}
5822bf215546Sopenharmony_ci
5823bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5824bf215546Sopenharmony_ciradv_CmdSetCullMode(VkCommandBuffer commandBuffer, VkCullModeFlags cullMode)
5825bf215546Sopenharmony_ci{
5826bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5827bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5828bf215546Sopenharmony_ci
5829bf215546Sopenharmony_ci   state->dynamic.cull_mode = cullMode;
5830bf215546Sopenharmony_ci
5831bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_CULL_MODE;
5832bf215546Sopenharmony_ci}
5833bf215546Sopenharmony_ci
5834bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5835bf215546Sopenharmony_ciradv_CmdSetFrontFace(VkCommandBuffer commandBuffer, VkFrontFace frontFace)
5836bf215546Sopenharmony_ci{
5837bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5838bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5839bf215546Sopenharmony_ci
5840bf215546Sopenharmony_ci   state->dynamic.front_face = frontFace;
5841bf215546Sopenharmony_ci
5842bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
5843bf215546Sopenharmony_ci}
5844bf215546Sopenharmony_ci
5845bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5846bf215546Sopenharmony_ciradv_CmdSetPrimitiveTopology(VkCommandBuffer commandBuffer, VkPrimitiveTopology primitiveTopology)
5847bf215546Sopenharmony_ci{
5848bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5849bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5850bf215546Sopenharmony_ci   unsigned primitive_topology = si_translate_prim(primitiveTopology);
5851bf215546Sopenharmony_ci
5852bf215546Sopenharmony_ci   if ((state->dynamic.primitive_topology == V_008958_DI_PT_LINESTRIP) !=
5853bf215546Sopenharmony_ci       (primitive_topology == V_008958_DI_PT_LINESTRIP))
5854bf215546Sopenharmony_ci      state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE;
5855bf215546Sopenharmony_ci
5856bf215546Sopenharmony_ci   if (radv_prim_is_points_or_lines(state->dynamic.primitive_topology) !=
5857bf215546Sopenharmony_ci       radv_prim_is_points_or_lines(primitive_topology))
5858bf215546Sopenharmony_ci      state->dirty |= RADV_CMD_DIRTY_DYNAMIC_SCISSOR;
5859bf215546Sopenharmony_ci
5860bf215546Sopenharmony_ci   state->dynamic.primitive_topology = primitive_topology;
5861bf215546Sopenharmony_ci
5862bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY;
5863bf215546Sopenharmony_ci}
5864bf215546Sopenharmony_ci
5865bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5866bf215546Sopenharmony_ciradv_CmdSetViewportWithCount(VkCommandBuffer commandBuffer, uint32_t viewportCount,
5867bf215546Sopenharmony_ci                             const VkViewport *pViewports)
5868bf215546Sopenharmony_ci{
5869bf215546Sopenharmony_ci   radv_CmdSetViewport(commandBuffer, 0, viewportCount, pViewports);
5870bf215546Sopenharmony_ci}
5871bf215546Sopenharmony_ci
5872bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5873bf215546Sopenharmony_ciradv_CmdSetScissorWithCount(VkCommandBuffer commandBuffer, uint32_t scissorCount,
5874bf215546Sopenharmony_ci                            const VkRect2D *pScissors)
5875bf215546Sopenharmony_ci{
5876bf215546Sopenharmony_ci   radv_CmdSetScissor(commandBuffer, 0, scissorCount, pScissors);
5877bf215546Sopenharmony_ci}
5878bf215546Sopenharmony_ci
5879bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5880bf215546Sopenharmony_ciradv_CmdSetDepthTestEnable(VkCommandBuffer commandBuffer, VkBool32 depthTestEnable)
5881bf215546Sopenharmony_ci
5882bf215546Sopenharmony_ci{
5883bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5884bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5885bf215546Sopenharmony_ci
5886bf215546Sopenharmony_ci   state->dynamic.depth_test_enable = depthTestEnable;
5887bf215546Sopenharmony_ci
5888bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE;
5889bf215546Sopenharmony_ci}
5890bf215546Sopenharmony_ci
5891bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5892bf215546Sopenharmony_ciradv_CmdSetDepthWriteEnable(VkCommandBuffer commandBuffer, VkBool32 depthWriteEnable)
5893bf215546Sopenharmony_ci{
5894bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5895bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5896bf215546Sopenharmony_ci
5897bf215546Sopenharmony_ci   state->dynamic.depth_write_enable = depthWriteEnable;
5898bf215546Sopenharmony_ci
5899bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE;
5900bf215546Sopenharmony_ci}
5901bf215546Sopenharmony_ci
5902bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5903bf215546Sopenharmony_ciradv_CmdSetDepthCompareOp(VkCommandBuffer commandBuffer, VkCompareOp depthCompareOp)
5904bf215546Sopenharmony_ci{
5905bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5906bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5907bf215546Sopenharmony_ci
5908bf215546Sopenharmony_ci   state->dynamic.depth_compare_op = depthCompareOp;
5909bf215546Sopenharmony_ci
5910bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP;
5911bf215546Sopenharmony_ci}
5912bf215546Sopenharmony_ci
5913bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5914bf215546Sopenharmony_ciradv_CmdSetDepthBoundsTestEnable(VkCommandBuffer commandBuffer, VkBool32 depthBoundsTestEnable)
5915bf215546Sopenharmony_ci{
5916bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5917bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5918bf215546Sopenharmony_ci
5919bf215546Sopenharmony_ci   state->dynamic.depth_bounds_test_enable = depthBoundsTestEnable;
5920bf215546Sopenharmony_ci
5921bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE;
5922bf215546Sopenharmony_ci}
5923bf215546Sopenharmony_ci
5924bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5925bf215546Sopenharmony_ciradv_CmdSetStencilTestEnable(VkCommandBuffer commandBuffer, VkBool32 stencilTestEnable)
5926bf215546Sopenharmony_ci{
5927bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5928bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5929bf215546Sopenharmony_ci
5930bf215546Sopenharmony_ci   state->dynamic.stencil_test_enable = stencilTestEnable;
5931bf215546Sopenharmony_ci
5932bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE;
5933bf215546Sopenharmony_ci}
5934bf215546Sopenharmony_ci
5935bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5936bf215546Sopenharmony_ciradv_CmdSetStencilOp(VkCommandBuffer commandBuffer, VkStencilFaceFlags faceMask,
5937bf215546Sopenharmony_ci                     VkStencilOp failOp, VkStencilOp passOp, VkStencilOp depthFailOp,
5938bf215546Sopenharmony_ci                     VkCompareOp compareOp)
5939bf215546Sopenharmony_ci{
5940bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5941bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5942bf215546Sopenharmony_ci
5943bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_FRONT_BIT) {
5944bf215546Sopenharmony_ci      state->dynamic.stencil_op.front.fail_op = failOp;
5945bf215546Sopenharmony_ci      state->dynamic.stencil_op.front.pass_op = passOp;
5946bf215546Sopenharmony_ci      state->dynamic.stencil_op.front.depth_fail_op = depthFailOp;
5947bf215546Sopenharmony_ci      state->dynamic.stencil_op.front.compare_op = compareOp;
5948bf215546Sopenharmony_ci   }
5949bf215546Sopenharmony_ci
5950bf215546Sopenharmony_ci   if (faceMask & VK_STENCIL_FACE_BACK_BIT) {
5951bf215546Sopenharmony_ci      state->dynamic.stencil_op.back.fail_op = failOp;
5952bf215546Sopenharmony_ci      state->dynamic.stencil_op.back.pass_op = passOp;
5953bf215546Sopenharmony_ci      state->dynamic.stencil_op.back.depth_fail_op = depthFailOp;
5954bf215546Sopenharmony_ci      state->dynamic.stencil_op.back.compare_op = compareOp;
5955bf215546Sopenharmony_ci   }
5956bf215546Sopenharmony_ci
5957bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_STENCIL_OP;
5958bf215546Sopenharmony_ci}
5959bf215546Sopenharmony_ci
5960bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5961bf215546Sopenharmony_ciradv_CmdSetFragmentShadingRateKHR(VkCommandBuffer commandBuffer, const VkExtent2D *pFragmentSize,
5962bf215546Sopenharmony_ci                                  const VkFragmentShadingRateCombinerOpKHR combinerOps[2])
5963bf215546Sopenharmony_ci{
5964bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5965bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5966bf215546Sopenharmony_ci
5967bf215546Sopenharmony_ci   state->dynamic.fragment_shading_rate.size = *pFragmentSize;
5968bf215546Sopenharmony_ci   for (unsigned i = 0; i < 2; i++)
5969bf215546Sopenharmony_ci      state->dynamic.fragment_shading_rate.combiner_ops[i] = combinerOps[i];
5970bf215546Sopenharmony_ci
5971bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
5972bf215546Sopenharmony_ci}
5973bf215546Sopenharmony_ci
5974bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5975bf215546Sopenharmony_ciradv_CmdSetDepthBiasEnable(VkCommandBuffer commandBuffer, VkBool32 depthBiasEnable)
5976bf215546Sopenharmony_ci{
5977bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5978bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5979bf215546Sopenharmony_ci
5980bf215546Sopenharmony_ci   state->dynamic.depth_bias_enable = depthBiasEnable;
5981bf215546Sopenharmony_ci
5982bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE;
5983bf215546Sopenharmony_ci}
5984bf215546Sopenharmony_ci
5985bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5986bf215546Sopenharmony_ciradv_CmdSetPrimitiveRestartEnable(VkCommandBuffer commandBuffer, VkBool32 primitiveRestartEnable)
5987bf215546Sopenharmony_ci{
5988bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
5989bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
5990bf215546Sopenharmony_ci
5991bf215546Sopenharmony_ci   state->dynamic.primitive_restart_enable = primitiveRestartEnable;
5992bf215546Sopenharmony_ci
5993bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE;
5994bf215546Sopenharmony_ci}
5995bf215546Sopenharmony_ci
5996bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
5997bf215546Sopenharmony_ciradv_CmdSetRasterizerDiscardEnable(VkCommandBuffer commandBuffer, VkBool32 rasterizerDiscardEnable)
5998bf215546Sopenharmony_ci{
5999bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
6000bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6001bf215546Sopenharmony_ci
6002bf215546Sopenharmony_ci   state->dynamic.rasterizer_discard_enable = rasterizerDiscardEnable;
6003bf215546Sopenharmony_ci
6004bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;
6005bf215546Sopenharmony_ci}
6006bf215546Sopenharmony_ci
6007bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6008bf215546Sopenharmony_ciradv_CmdSetPatchControlPointsEXT(VkCommandBuffer commandBuffer, uint32_t patchControlPoints)
6009bf215546Sopenharmony_ci{
6010bf215546Sopenharmony_ci   /* not implemented */
6011bf215546Sopenharmony_ci}
6012bf215546Sopenharmony_ci
6013bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6014bf215546Sopenharmony_ciradv_CmdSetLogicOpEXT(VkCommandBuffer commandBuffer, VkLogicOp logicOp)
6015bf215546Sopenharmony_ci{
6016bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
6017bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6018bf215546Sopenharmony_ci   unsigned logic_op = si_translate_blend_logic_op(logicOp);
6019bf215546Sopenharmony_ci
6020bf215546Sopenharmony_ci   state->dynamic.logic_op = logic_op;
6021bf215546Sopenharmony_ci
6022bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_LOGIC_OP;
6023bf215546Sopenharmony_ci}
6024bf215546Sopenharmony_ci
6025bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6026bf215546Sopenharmony_ciradv_CmdSetColorWriteEnableEXT(VkCommandBuffer commandBuffer, uint32_t attachmentCount,
6027bf215546Sopenharmony_ci                               const VkBool32 *pColorWriteEnables)
6028bf215546Sopenharmony_ci{
6029bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
6030bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6031bf215546Sopenharmony_ci   uint32_t color_write_enable = 0;
6032bf215546Sopenharmony_ci
6033bf215546Sopenharmony_ci   assert(attachmentCount <= MAX_RTS);
6034bf215546Sopenharmony_ci
6035bf215546Sopenharmony_ci   for (uint32_t i = 0; i < attachmentCount; i++) {
6036bf215546Sopenharmony_ci      color_write_enable |= pColorWriteEnables[i] ? (0xfu << (i * 4)) : 0;
6037bf215546Sopenharmony_ci   }
6038bf215546Sopenharmony_ci
6039bf215546Sopenharmony_ci   state->dynamic.color_write_enable = color_write_enable;
6040bf215546Sopenharmony_ci
6041bf215546Sopenharmony_ci   state->dirty |= RADV_CMD_DIRTY_DYNAMIC_COLOR_WRITE_ENABLE;
6042bf215546Sopenharmony_ci}
6043bf215546Sopenharmony_ci
6044bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6045bf215546Sopenharmony_ciradv_CmdSetVertexInputEXT(VkCommandBuffer commandBuffer, uint32_t vertexBindingDescriptionCount,
6046bf215546Sopenharmony_ci                          const VkVertexInputBindingDescription2EXT *pVertexBindingDescriptions,
6047bf215546Sopenharmony_ci                          uint32_t vertexAttributeDescriptionCount,
6048bf215546Sopenharmony_ci                          const VkVertexInputAttributeDescription2EXT *pVertexAttributeDescriptions)
6049bf215546Sopenharmony_ci{
6050bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
6051bf215546Sopenharmony_ci   struct radv_vs_input_state *state = &cmd_buffer->state.dynamic_vs_input;
6052bf215546Sopenharmony_ci
6053bf215546Sopenharmony_ci   const VkVertexInputBindingDescription2EXT *bindings[MAX_VBS];
6054bf215546Sopenharmony_ci   for (unsigned i = 0; i < vertexBindingDescriptionCount; i++)
6055bf215546Sopenharmony_ci      bindings[pVertexBindingDescriptions[i].binding] = &pVertexBindingDescriptions[i];
6056bf215546Sopenharmony_ci
6057bf215546Sopenharmony_ci   cmd_buffer->state.vbo_misaligned_mask = 0;
6058bf215546Sopenharmony_ci   cmd_buffer->state.vbo_misaligned_mask_invalid = 0;
6059bf215546Sopenharmony_ci
6060bf215546Sopenharmony_ci   memset(state, 0, sizeof(*state));
6061bf215546Sopenharmony_ci   state->bindings_match_attrib = true;
6062bf215546Sopenharmony_ci
6063bf215546Sopenharmony_ci   enum amd_gfx_level chip = cmd_buffer->device->physical_device->rad_info.gfx_level;
6064bf215546Sopenharmony_ci   for (unsigned i = 0; i < vertexAttributeDescriptionCount; i++) {
6065bf215546Sopenharmony_ci      const VkVertexInputAttributeDescription2EXT *attrib = &pVertexAttributeDescriptions[i];
6066bf215546Sopenharmony_ci      const VkVertexInputBindingDescription2EXT *binding = bindings[attrib->binding];
6067bf215546Sopenharmony_ci      unsigned loc = attrib->location;
6068bf215546Sopenharmony_ci
6069bf215546Sopenharmony_ci      state->attribute_mask |= 1u << loc;
6070bf215546Sopenharmony_ci      state->bindings[loc] = attrib->binding;
6071bf215546Sopenharmony_ci      if (attrib->binding != loc)
6072bf215546Sopenharmony_ci         state->bindings_match_attrib = false;
6073bf215546Sopenharmony_ci      if (binding->inputRate == VK_VERTEX_INPUT_RATE_INSTANCE) {
6074bf215546Sopenharmony_ci         state->instance_rate_inputs |= 1u << loc;
6075bf215546Sopenharmony_ci         state->divisors[loc] = binding->divisor;
6076bf215546Sopenharmony_ci         if (binding->divisor == 0) {
6077bf215546Sopenharmony_ci            state->zero_divisors |= 1u << loc;
6078bf215546Sopenharmony_ci         } else if (binding->divisor > 1) {
6079bf215546Sopenharmony_ci            state->nontrivial_divisors |= 1u << loc;
6080bf215546Sopenharmony_ci         }
6081bf215546Sopenharmony_ci      }
6082bf215546Sopenharmony_ci      cmd_buffer->vertex_bindings[attrib->binding].stride = binding->stride;
6083bf215546Sopenharmony_ci      state->offsets[loc] = attrib->offset;
6084bf215546Sopenharmony_ci
6085bf215546Sopenharmony_ci      struct dynamic_vertex_format_cache *found = NULL;
6086bf215546Sopenharmony_ci      util_dynarray_foreach(&cmd_buffer->cached_vertex_formats,
6087bf215546Sopenharmony_ci                            struct dynamic_vertex_format_cache,
6088bf215546Sopenharmony_ci                            vf) {
6089bf215546Sopenharmony_ci         if (vf->format == attrib->format) {
6090bf215546Sopenharmony_ci            found = vf;
6091bf215546Sopenharmony_ci            break;
6092bf215546Sopenharmony_ci         }
6093bf215546Sopenharmony_ci      }
6094bf215546Sopenharmony_ci      if (!found) {
6095bf215546Sopenharmony_ci         unsigned nfmt, dfmt;
6096bf215546Sopenharmony_ci         bool post_shuffle;
6097bf215546Sopenharmony_ci         enum radv_vs_input_alpha_adjust alpha_adjust;
6098bf215546Sopenharmony_ci         const struct util_format_description *format_desc = vk_format_description(attrib->format);
6099bf215546Sopenharmony_ci
6100bf215546Sopenharmony_ci         found = util_dynarray_grow(&cmd_buffer->cached_vertex_formats,
6101bf215546Sopenharmony_ci                                    struct dynamic_vertex_format_cache, 1);
6102bf215546Sopenharmony_ci         radv_translate_vertex_format(cmd_buffer->device->physical_device, attrib->format, format_desc,
6103bf215546Sopenharmony_ci                                      &dfmt, &nfmt, &post_shuffle, &alpha_adjust);
6104bf215546Sopenharmony_ci         found->format = attrib->format;
6105bf215546Sopenharmony_ci         found->hw_fmt = dfmt | (nfmt << 4);
6106bf215546Sopenharmony_ci         const uint8_t format_align_req_minus_1 = format_desc->channel[0].size >= 32 ? 3 :
6107bf215546Sopenharmony_ci            (format_desc->block.bits / 8u - 1);
6108bf215546Sopenharmony_ci         found->fmt_align_req_minus_1 = format_align_req_minus_1;
6109bf215546Sopenharmony_ci         found->fmt_size = format_desc->block.bits / 8u;
6110bf215546Sopenharmony_ci         found->post_shuffle = post_shuffle;
6111bf215546Sopenharmony_ci         found->alpha_adjust_lo = alpha_adjust & 0x1;
6112bf215546Sopenharmony_ci         found->alpha_adjust_hi = (alpha_adjust >> 1) & 0x1;
6113bf215546Sopenharmony_ci      }
6114bf215546Sopenharmony_ci
6115bf215546Sopenharmony_ci      state->formats[loc] = found->hw_fmt;
6116bf215546Sopenharmony_ci      state->format_align_req_minus_1[loc] = found->fmt_align_req_minus_1;
6117bf215546Sopenharmony_ci      state->format_sizes[loc] = found->fmt_size;
6118bf215546Sopenharmony_ci      state->alpha_adjust_lo |= found->alpha_adjust_lo << loc;
6119bf215546Sopenharmony_ci      state->alpha_adjust_hi |= found->alpha_adjust_hi << loc;
6120bf215546Sopenharmony_ci      if (found->post_shuffle)
6121bf215546Sopenharmony_ci         state->post_shuffle |= 1u << loc;
6122bf215546Sopenharmony_ci
6123bf215546Sopenharmony_ci      if ((chip == GFX6 || chip >= GFX10) &&
6124bf215546Sopenharmony_ci          cmd_buffer->state.vbo_bound_mask & BITFIELD_BIT(attrib->binding)) {
6125bf215546Sopenharmony_ci         if (binding->stride & found->fmt_align_req_minus_1) {
6126bf215546Sopenharmony_ci            cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc);
6127bf215546Sopenharmony_ci         } else if ((cmd_buffer->vertex_bindings[attrib->binding].offset + state->offsets[loc]) &
6128bf215546Sopenharmony_ci                    found->fmt_align_req_minus_1) {
6129bf215546Sopenharmony_ci            cmd_buffer->state.vbo_misaligned_mask |= BITFIELD_BIT(loc);
6130bf215546Sopenharmony_ci         }
6131bf215546Sopenharmony_ci      }
6132bf215546Sopenharmony_ci   }
6133bf215546Sopenharmony_ci
6134bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER |
6135bf215546Sopenharmony_ci                              RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT;
6136bf215546Sopenharmony_ci}
6137bf215546Sopenharmony_ci
6138bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6139bf215546Sopenharmony_ciradv_CmdExecuteCommands(VkCommandBuffer commandBuffer, uint32_t commandBufferCount,
6140bf215546Sopenharmony_ci                        const VkCommandBuffer *pCmdBuffers)
6141bf215546Sopenharmony_ci{
6142bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, primary, commandBuffer);
6143bf215546Sopenharmony_ci
6144bf215546Sopenharmony_ci   assert(commandBufferCount > 0);
6145bf215546Sopenharmony_ci
6146bf215546Sopenharmony_ci   radv_emit_mip_change_flush_default(primary);
6147bf215546Sopenharmony_ci
6148bf215546Sopenharmony_ci   /* Emit pending flushes on primary prior to executing secondary */
6149bf215546Sopenharmony_ci   si_emit_cache_flush(primary);
6150bf215546Sopenharmony_ci
6151bf215546Sopenharmony_ci   /* Make sure CP DMA is idle on primary prior to executing secondary. */
6152bf215546Sopenharmony_ci   si_cp_dma_wait_for_idle(primary);
6153bf215546Sopenharmony_ci
6154bf215546Sopenharmony_ci   for (uint32_t i = 0; i < commandBufferCount; i++) {
6155bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_cmd_buffer, secondary, pCmdBuffers[i]);
6156bf215546Sopenharmony_ci      bool allow_ib2 = true;
6157bf215546Sopenharmony_ci
6158bf215546Sopenharmony_ci      if (secondary->device->physical_device->rad_info.gfx_level == GFX7 &&
6159bf215546Sopenharmony_ci          secondary->state.uses_draw_indirect_multi) {
6160bf215546Sopenharmony_ci         /* Do not launch an IB2 for secondary command buffers that contain
6161bf215546Sopenharmony_ci          * DRAW_{INDEX}_INDIRECT_MULTI on GFX7 because it's illegal and hang the GPU.
6162bf215546Sopenharmony_ci          */
6163bf215546Sopenharmony_ci         allow_ib2 = false;
6164bf215546Sopenharmony_ci      }
6165bf215546Sopenharmony_ci
6166bf215546Sopenharmony_ci      if (secondary->qf == RADV_QUEUE_COMPUTE) {
6167bf215546Sopenharmony_ci         /* IB2 packets are not supported on compute queues according to PAL. */
6168bf215546Sopenharmony_ci         allow_ib2 = false;
6169bf215546Sopenharmony_ci      }
6170bf215546Sopenharmony_ci
6171bf215546Sopenharmony_ci      primary->scratch_size_per_wave_needed =
6172bf215546Sopenharmony_ci         MAX2(primary->scratch_size_per_wave_needed, secondary->scratch_size_per_wave_needed);
6173bf215546Sopenharmony_ci      primary->scratch_waves_wanted =
6174bf215546Sopenharmony_ci         MAX2(primary->scratch_waves_wanted, secondary->scratch_waves_wanted);
6175bf215546Sopenharmony_ci      primary->compute_scratch_size_per_wave_needed =
6176bf215546Sopenharmony_ci         MAX2(primary->compute_scratch_size_per_wave_needed,
6177bf215546Sopenharmony_ci              secondary->compute_scratch_size_per_wave_needed);
6178bf215546Sopenharmony_ci      primary->compute_scratch_waves_wanted =
6179bf215546Sopenharmony_ci         MAX2(primary->compute_scratch_waves_wanted, secondary->compute_scratch_waves_wanted);
6180bf215546Sopenharmony_ci
6181bf215546Sopenharmony_ci      if (secondary->esgs_ring_size_needed > primary->esgs_ring_size_needed)
6182bf215546Sopenharmony_ci         primary->esgs_ring_size_needed = secondary->esgs_ring_size_needed;
6183bf215546Sopenharmony_ci      if (secondary->gsvs_ring_size_needed > primary->gsvs_ring_size_needed)
6184bf215546Sopenharmony_ci         primary->gsvs_ring_size_needed = secondary->gsvs_ring_size_needed;
6185bf215546Sopenharmony_ci      if (secondary->tess_rings_needed)
6186bf215546Sopenharmony_ci         primary->tess_rings_needed = true;
6187bf215546Sopenharmony_ci      if (secondary->task_rings_needed)
6188bf215546Sopenharmony_ci         primary->task_rings_needed = true;
6189bf215546Sopenharmony_ci      if (secondary->mesh_scratch_ring_needed)
6190bf215546Sopenharmony_ci         primary->mesh_scratch_ring_needed = true;
6191bf215546Sopenharmony_ci      if (secondary->sample_positions_needed)
6192bf215546Sopenharmony_ci         primary->sample_positions_needed = true;
6193bf215546Sopenharmony_ci      if (secondary->gds_needed)
6194bf215546Sopenharmony_ci         primary->gds_needed = true;
6195bf215546Sopenharmony_ci
6196bf215546Sopenharmony_ci      if (!secondary->state.framebuffer && primary->state.pass && (primary->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)) {
6197bf215546Sopenharmony_ci         /* Emit the framebuffer state from primary if secondary
6198bf215546Sopenharmony_ci          * has been recorded without a framebuffer, otherwise
6199bf215546Sopenharmony_ci          * fast color/depth clears can't work.
6200bf215546Sopenharmony_ci          */
6201bf215546Sopenharmony_ci         radv_emit_fb_mip_change_flush(primary);
6202bf215546Sopenharmony_ci         radv_emit_framebuffer_state(primary);
6203bf215546Sopenharmony_ci      }
6204bf215546Sopenharmony_ci
6205bf215546Sopenharmony_ci      if (secondary->ace_internal.cs) {
6206bf215546Sopenharmony_ci         if (!primary->ace_internal.cs) {
6207bf215546Sopenharmony_ci            primary->ace_internal.cs = radv_ace_internal_create(primary);
6208bf215546Sopenharmony_ci            if (!primary->ace_internal.cs)
6209bf215546Sopenharmony_ci               return;
6210bf215546Sopenharmony_ci         }
6211bf215546Sopenharmony_ci
6212bf215546Sopenharmony_ci         struct radeon_cmdbuf *ace_primary = primary->ace_internal.cs;
6213bf215546Sopenharmony_ci         struct radeon_cmdbuf *ace_secondary = secondary->ace_internal.cs;
6214bf215546Sopenharmony_ci
6215bf215546Sopenharmony_ci         /* Emit pending flushes on primary prior to executing secondary. */
6216bf215546Sopenharmony_ci         radv_ace_internal_cache_flush(primary);
6217bf215546Sopenharmony_ci
6218bf215546Sopenharmony_ci         /* Wait for primary GFX->ACE semaphore, if necessary. */
6219bf215546Sopenharmony_ci         if (radv_flush_gfx2ace_semaphore(primary))
6220bf215546Sopenharmony_ci            radv_wait_gfx2ace_semaphore(primary);
6221bf215546Sopenharmony_ci
6222bf215546Sopenharmony_ci         /* Execute the secondary compute cmdbuf.
6223bf215546Sopenharmony_ci          * Don't use IB2 packets because they are not supported on compute queues.
6224bf215546Sopenharmony_ci          */
6225bf215546Sopenharmony_ci         primary->device->ws->cs_execute_secondary(ace_primary, ace_secondary, false);
6226bf215546Sopenharmony_ci      }
6227bf215546Sopenharmony_ci
6228bf215546Sopenharmony_ci      /* Update pending ACE internal flush bits from the secondary cmdbuf */
6229bf215546Sopenharmony_ci      primary->ace_internal.flush_bits |= secondary->ace_internal.flush_bits;
6230bf215546Sopenharmony_ci
6231bf215546Sopenharmony_ci      /* Increment primary semaphore if secondary was dirty.
6232bf215546Sopenharmony_ci       * This happens when the secondary cmdbuf has a barrier which
6233bf215546Sopenharmony_ci       * isn't consumed by a draw call.
6234bf215546Sopenharmony_ci       */
6235bf215546Sopenharmony_ci      if (radv_ace_internal_sem_dirty(secondary))
6236bf215546Sopenharmony_ci         primary->ace_internal.sem.gfx2ace_value++;
6237bf215546Sopenharmony_ci
6238bf215546Sopenharmony_ci      primary->device->ws->cs_execute_secondary(primary->cs, secondary->cs, allow_ib2);
6239bf215546Sopenharmony_ci
6240bf215546Sopenharmony_ci      /* When the secondary command buffer is compute only we don't
6241bf215546Sopenharmony_ci       * need to re-emit the current graphics pipeline.
6242bf215546Sopenharmony_ci       */
6243bf215546Sopenharmony_ci      if (secondary->state.emitted_graphics_pipeline) {
6244bf215546Sopenharmony_ci         primary->state.emitted_graphics_pipeline = secondary->state.emitted_graphics_pipeline;
6245bf215546Sopenharmony_ci      }
6246bf215546Sopenharmony_ci
6247bf215546Sopenharmony_ci      /* When the secondary command buffer is graphics only we don't
6248bf215546Sopenharmony_ci       * need to re-emit the current compute pipeline.
6249bf215546Sopenharmony_ci       */
6250bf215546Sopenharmony_ci      if (secondary->state.emitted_compute_pipeline) {
6251bf215546Sopenharmony_ci         primary->state.emitted_compute_pipeline = secondary->state.emitted_compute_pipeline;
6252bf215546Sopenharmony_ci      }
6253bf215546Sopenharmony_ci
6254bf215546Sopenharmony_ci      /* Only re-emit the draw packets when needed. */
6255bf215546Sopenharmony_ci      if (secondary->state.last_primitive_reset_en != -1) {
6256bf215546Sopenharmony_ci         primary->state.last_primitive_reset_en = secondary->state.last_primitive_reset_en;
6257bf215546Sopenharmony_ci      }
6258bf215546Sopenharmony_ci
6259bf215546Sopenharmony_ci      if (secondary->state.last_primitive_reset_index) {
6260bf215546Sopenharmony_ci         primary->state.last_primitive_reset_index = secondary->state.last_primitive_reset_index;
6261bf215546Sopenharmony_ci      }
6262bf215546Sopenharmony_ci
6263bf215546Sopenharmony_ci      if (secondary->state.last_ia_multi_vgt_param) {
6264bf215546Sopenharmony_ci         primary->state.last_ia_multi_vgt_param = secondary->state.last_ia_multi_vgt_param;
6265bf215546Sopenharmony_ci      }
6266bf215546Sopenharmony_ci
6267bf215546Sopenharmony_ci      primary->state.last_first_instance = secondary->state.last_first_instance;
6268bf215546Sopenharmony_ci      primary->state.last_num_instances = secondary->state.last_num_instances;
6269bf215546Sopenharmony_ci      primary->state.last_drawid = secondary->state.last_drawid;
6270bf215546Sopenharmony_ci      primary->state.last_subpass_color_count = secondary->state.last_subpass_color_count;
6271bf215546Sopenharmony_ci      primary->state.last_vertex_offset = secondary->state.last_vertex_offset;
6272bf215546Sopenharmony_ci      primary->state.last_sx_ps_downconvert = secondary->state.last_sx_ps_downconvert;
6273bf215546Sopenharmony_ci      primary->state.last_sx_blend_opt_epsilon = secondary->state.last_sx_blend_opt_epsilon;
6274bf215546Sopenharmony_ci      primary->state.last_sx_blend_opt_control = secondary->state.last_sx_blend_opt_control;
6275bf215546Sopenharmony_ci
6276bf215546Sopenharmony_ci      if (secondary->state.last_index_type != -1) {
6277bf215546Sopenharmony_ci         primary->state.last_index_type = secondary->state.last_index_type;
6278bf215546Sopenharmony_ci      }
6279bf215546Sopenharmony_ci
6280bf215546Sopenharmony_ci      primary->state.last_nggc_settings = secondary->state.last_nggc_settings;
6281bf215546Sopenharmony_ci      primary->state.last_nggc_settings_sgpr_idx = secondary->state.last_nggc_settings_sgpr_idx;
6282bf215546Sopenharmony_ci      primary->state.last_nggc_skip = secondary->state.last_nggc_skip;
6283bf215546Sopenharmony_ci
6284bf215546Sopenharmony_ci      primary->state.last_vrs_rates = secondary->state.last_vrs_rates;
6285bf215546Sopenharmony_ci      primary->state.last_vrs_rates_sgpr_idx = secondary->state.last_vrs_rates_sgpr_idx;
6286bf215546Sopenharmony_ci   }
6287bf215546Sopenharmony_ci
6288bf215546Sopenharmony_ci   /* After executing commands from secondary buffers we have to dirty
6289bf215546Sopenharmony_ci    * some states.
6290bf215546Sopenharmony_ci    */
6291bf215546Sopenharmony_ci   primary->state.dirty |=
6292bf215546Sopenharmony_ci      RADV_CMD_DIRTY_PIPELINE | RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_DYNAMIC_ALL;
6293bf215546Sopenharmony_ci   radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_GRAPHICS);
6294bf215546Sopenharmony_ci   radv_mark_descriptor_sets_dirty(primary, VK_PIPELINE_BIND_POINT_COMPUTE);
6295bf215546Sopenharmony_ci}
6296bf215546Sopenharmony_ci
6297bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
6298bf215546Sopenharmony_ciradv_CreateCommandPool(VkDevice _device, const VkCommandPoolCreateInfo *pCreateInfo,
6299bf215546Sopenharmony_ci                       const VkAllocationCallbacks *pAllocator, VkCommandPool *pCmdPool)
6300bf215546Sopenharmony_ci{
6301bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
6302bf215546Sopenharmony_ci   struct radv_cmd_pool *pool;
6303bf215546Sopenharmony_ci
6304bf215546Sopenharmony_ci   pool =
6305bf215546Sopenharmony_ci      vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*pool), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6306bf215546Sopenharmony_ci   if (pool == NULL)
6307bf215546Sopenharmony_ci      return vk_error(device, VK_ERROR_OUT_OF_HOST_MEMORY);
6308bf215546Sopenharmony_ci
6309bf215546Sopenharmony_ci   VkResult result = vk_command_pool_init(&pool->vk, &device->vk, pCreateInfo, pAllocator);
6310bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
6311bf215546Sopenharmony_ci      vk_free2(&device->vk.alloc, pAllocator, pool);
6312bf215546Sopenharmony_ci      return result;
6313bf215546Sopenharmony_ci   }
6314bf215546Sopenharmony_ci
6315bf215546Sopenharmony_ci   list_inithead(&pool->cmd_buffers);
6316bf215546Sopenharmony_ci   list_inithead(&pool->free_cmd_buffers);
6317bf215546Sopenharmony_ci
6318bf215546Sopenharmony_ci   *pCmdPool = radv_cmd_pool_to_handle(pool);
6319bf215546Sopenharmony_ci
6320bf215546Sopenharmony_ci   return VK_SUCCESS;
6321bf215546Sopenharmony_ci}
6322bf215546Sopenharmony_ci
6323bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6324bf215546Sopenharmony_ciradv_DestroyCommandPool(VkDevice _device, VkCommandPool commandPool,
6325bf215546Sopenharmony_ci                        const VkAllocationCallbacks *pAllocator)
6326bf215546Sopenharmony_ci{
6327bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_device, device, _device);
6328bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
6329bf215546Sopenharmony_ci
6330bf215546Sopenharmony_ci   if (!pool)
6331bf215546Sopenharmony_ci      return;
6332bf215546Sopenharmony_ci
6333bf215546Sopenharmony_ci   list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link)
6334bf215546Sopenharmony_ci   {
6335bf215546Sopenharmony_ci      radv_destroy_cmd_buffer(cmd_buffer);
6336bf215546Sopenharmony_ci   }
6337bf215546Sopenharmony_ci
6338bf215546Sopenharmony_ci   list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link)
6339bf215546Sopenharmony_ci   {
6340bf215546Sopenharmony_ci      radv_destroy_cmd_buffer(cmd_buffer);
6341bf215546Sopenharmony_ci   }
6342bf215546Sopenharmony_ci
6343bf215546Sopenharmony_ci   vk_command_pool_finish(&pool->vk);
6344bf215546Sopenharmony_ci   vk_free2(&device->vk.alloc, pAllocator, pool);
6345bf215546Sopenharmony_ci}
6346bf215546Sopenharmony_ci
6347bf215546Sopenharmony_ciVKAPI_ATTR VkResult VKAPI_CALL
6348bf215546Sopenharmony_ciradv_ResetCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolResetFlags flags)
6349bf215546Sopenharmony_ci{
6350bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
6351bf215546Sopenharmony_ci   VkResult result;
6352bf215546Sopenharmony_ci
6353bf215546Sopenharmony_ci   list_for_each_entry(struct radv_cmd_buffer, cmd_buffer, &pool->cmd_buffers, pool_link)
6354bf215546Sopenharmony_ci   {
6355bf215546Sopenharmony_ci      result = radv_reset_cmd_buffer(cmd_buffer);
6356bf215546Sopenharmony_ci      if (result != VK_SUCCESS)
6357bf215546Sopenharmony_ci         return result;
6358bf215546Sopenharmony_ci   }
6359bf215546Sopenharmony_ci
6360bf215546Sopenharmony_ci   return VK_SUCCESS;
6361bf215546Sopenharmony_ci}
6362bf215546Sopenharmony_ci
6363bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6364bf215546Sopenharmony_ciradv_TrimCommandPool(VkDevice device, VkCommandPool commandPool, VkCommandPoolTrimFlags flags)
6365bf215546Sopenharmony_ci{
6366bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_pool, pool, commandPool);
6367bf215546Sopenharmony_ci
6368bf215546Sopenharmony_ci   list_for_each_entry_safe(struct radv_cmd_buffer, cmd_buffer, &pool->free_cmd_buffers, pool_link)
6369bf215546Sopenharmony_ci   {
6370bf215546Sopenharmony_ci      radv_destroy_cmd_buffer(cmd_buffer);
6371bf215546Sopenharmony_ci   }
6372bf215546Sopenharmony_ci}
6373bf215546Sopenharmony_ci
6374bf215546Sopenharmony_cistatic void
6375bf215546Sopenharmony_ciradv_cmd_buffer_begin_subpass(struct radv_cmd_buffer *cmd_buffer, uint32_t subpass_id)
6376bf215546Sopenharmony_ci{
6377bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6378bf215546Sopenharmony_ci   struct radv_subpass *subpass = &state->pass->subpasses[subpass_id];
6379bf215546Sopenharmony_ci
6380bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096);
6381bf215546Sopenharmony_ci
6382bf215546Sopenharmony_ci   radv_emit_subpass_barrier(cmd_buffer, &subpass->start_barrier);
6383bf215546Sopenharmony_ci
6384bf215546Sopenharmony_ci   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
6385bf215546Sopenharmony_ci
6386bf215546Sopenharmony_ci   radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
6387bf215546Sopenharmony_ci
6388bf215546Sopenharmony_ci   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
6389bf215546Sopenharmony_ci      const uint32_t a = subpass->attachments[i].attachment;
6390bf215546Sopenharmony_ci      if (a == VK_ATTACHMENT_UNUSED)
6391bf215546Sopenharmony_ci         continue;
6392bf215546Sopenharmony_ci
6393bf215546Sopenharmony_ci      radv_handle_subpass_image_transition(cmd_buffer, subpass->attachments[i], true);
6394bf215546Sopenharmony_ci   }
6395bf215546Sopenharmony_ci
6396bf215546Sopenharmony_ci   radv_ace_internal_barrier(cmd_buffer, 0, 0);
6397bf215546Sopenharmony_ci   radv_describe_barrier_end(cmd_buffer);
6398bf215546Sopenharmony_ci
6399bf215546Sopenharmony_ci   radv_cmd_buffer_clear_subpass(cmd_buffer);
6400bf215546Sopenharmony_ci
6401bf215546Sopenharmony_ci   if (subpass->vrs_attachment) {
6402bf215546Sopenharmony_ci      int idx = subpass->vrs_attachment->attachment;
6403bf215546Sopenharmony_ci      struct radv_image_view *vrs_iview = cmd_buffer->state.attachments[idx].iview;
6404bf215546Sopenharmony_ci
6405bf215546Sopenharmony_ci      if (subpass->depth_stencil_attachment) {
6406bf215546Sopenharmony_ci         /* When a subpass uses a VRS attachment and a depth/stencil attachment, we just need to
6407bf215546Sopenharmony_ci          * copy the VRS rates to the HTILE buffer of the attachment.
6408bf215546Sopenharmony_ci          */
6409bf215546Sopenharmony_ci         int ds_idx = subpass->depth_stencil_attachment->attachment;
6410bf215546Sopenharmony_ci         struct radv_image_view *ds_iview = cmd_buffer->state.attachments[ds_idx].iview;
6411bf215546Sopenharmony_ci         struct radv_image *ds_image = ds_iview->image;
6412bf215546Sopenharmony_ci         uint32_t level = ds_iview->vk.base_mip_level;
6413bf215546Sopenharmony_ci
6414bf215546Sopenharmony_ci         VkExtent2D extent = {
6415bf215546Sopenharmony_ci            .width = radv_minify(ds_image->info.width, level),
6416bf215546Sopenharmony_ci            .height = radv_minify(ds_image->info.height, level),
6417bf215546Sopenharmony_ci         };
6418bf215546Sopenharmony_ci
6419bf215546Sopenharmony_ci         /* HTILE buffer */
6420bf215546Sopenharmony_ci         uint64_t htile_offset = ds_image->bindings[0].offset + ds_image->planes[0].surface.meta_offset +
6421bf215546Sopenharmony_ci                                 ds_image->planes[0].surface.u.gfx9.meta_levels[level].offset;
6422bf215546Sopenharmony_ci         uint64_t htile_size = ds_image->planes[0].surface.u.gfx9.meta_levels[level].size;
6423bf215546Sopenharmony_ci         struct radv_buffer htile_buffer;
6424bf215546Sopenharmony_ci
6425bf215546Sopenharmony_ci         radv_buffer_init(&htile_buffer, cmd_buffer->device, ds_image->bindings[0].bo, htile_size, htile_offset);
6426bf215546Sopenharmony_ci
6427bf215546Sopenharmony_ci         /* Copy the VRS rates to the HTILE buffer. */
6428bf215546Sopenharmony_ci         radv_copy_vrs_htile(cmd_buffer, vrs_iview->image, &extent, ds_image, &htile_buffer, true);
6429bf215546Sopenharmony_ci
6430bf215546Sopenharmony_ci         radv_buffer_finish(&htile_buffer);
6431bf215546Sopenharmony_ci      } else {
6432bf215546Sopenharmony_ci         /* When a subpass uses a VRS attachment without binding a depth/stencil attachment, we have
6433bf215546Sopenharmony_ci          * to copy the VRS rates to our internal HTILE buffer.
6434bf215546Sopenharmony_ci          */
6435bf215546Sopenharmony_ci         struct vk_framebuffer *fb = cmd_buffer->state.framebuffer;
6436bf215546Sopenharmony_ci         struct radv_image *ds_image = radv_cmd_buffer_get_vrs_image(cmd_buffer);
6437bf215546Sopenharmony_ci
6438bf215546Sopenharmony_ci         if (ds_image) {
6439bf215546Sopenharmony_ci            /* HTILE buffer */
6440bf215546Sopenharmony_ci            struct radv_buffer *htile_buffer = cmd_buffer->device->vrs.buffer;
6441bf215546Sopenharmony_ci
6442bf215546Sopenharmony_ci            VkExtent2D extent = {
6443bf215546Sopenharmony_ci               .width = MIN2(fb->width, ds_image->info.width),
6444bf215546Sopenharmony_ci               .height = MIN2(fb->height, ds_image->info.height),
6445bf215546Sopenharmony_ci            };
6446bf215546Sopenharmony_ci
6447bf215546Sopenharmony_ci            /* Copy the VRS rates to the HTILE buffer. */
6448bf215546Sopenharmony_ci            radv_copy_vrs_htile(cmd_buffer, vrs_iview->image, &extent, ds_image, htile_buffer, false);
6449bf215546Sopenharmony_ci         }
6450bf215546Sopenharmony_ci      }
6451bf215546Sopenharmony_ci   }
6452bf215546Sopenharmony_ci
6453bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
6454bf215546Sopenharmony_ci}
6455bf215546Sopenharmony_ci
6456bf215546Sopenharmony_cistatic void
6457bf215546Sopenharmony_ciradv_mark_noncoherent_rb(struct radv_cmd_buffer *cmd_buffer)
6458bf215546Sopenharmony_ci{
6459bf215546Sopenharmony_ci   const struct radv_subpass *subpass = cmd_buffer->state.subpass;
6460bf215546Sopenharmony_ci
6461bf215546Sopenharmony_ci   /* Have to be conservative in cmdbuffers with inherited attachments. */
6462bf215546Sopenharmony_ci   if (!cmd_buffer->state.attachments) {
6463bf215546Sopenharmony_ci      cmd_buffer->state.rb_noncoherent_dirty = true;
6464bf215546Sopenharmony_ci      return;
6465bf215546Sopenharmony_ci   }
6466bf215546Sopenharmony_ci
6467bf215546Sopenharmony_ci   for (uint32_t i = 0; i < subpass->color_count; ++i) {
6468bf215546Sopenharmony_ci      const uint32_t a = subpass->color_attachments[i].attachment;
6469bf215546Sopenharmony_ci      if (a == VK_ATTACHMENT_UNUSED)
6470bf215546Sopenharmony_ci         continue;
6471bf215546Sopenharmony_ci      if (!cmd_buffer->state.attachments[a].iview->image->l2_coherent) {
6472bf215546Sopenharmony_ci         cmd_buffer->state.rb_noncoherent_dirty = true;
6473bf215546Sopenharmony_ci         return;
6474bf215546Sopenharmony_ci      }
6475bf215546Sopenharmony_ci   }
6476bf215546Sopenharmony_ci   if (subpass->depth_stencil_attachment &&
6477bf215546Sopenharmony_ci       !cmd_buffer->state.attachments[subpass->depth_stencil_attachment->attachment]
6478bf215546Sopenharmony_ci           .iview->image->l2_coherent)
6479bf215546Sopenharmony_ci      cmd_buffer->state.rb_noncoherent_dirty = true;
6480bf215546Sopenharmony_ci}
6481bf215546Sopenharmony_ci
6482bf215546Sopenharmony_civoid
6483bf215546Sopenharmony_ciradv_cmd_buffer_restore_subpass(struct radv_cmd_buffer *cmd_buffer,
6484bf215546Sopenharmony_ci                                const struct radv_subpass *subpass)
6485bf215546Sopenharmony_ci{
6486bf215546Sopenharmony_ci   radv_mark_noncoherent_rb(cmd_buffer);
6487bf215546Sopenharmony_ci   radv_cmd_buffer_set_subpass(cmd_buffer, subpass);
6488bf215546Sopenharmony_ci}
6489bf215546Sopenharmony_ci
6490bf215546Sopenharmony_cistatic void
6491bf215546Sopenharmony_ciradv_cmd_buffer_end_subpass(struct radv_cmd_buffer *cmd_buffer)
6492bf215546Sopenharmony_ci{
6493bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6494bf215546Sopenharmony_ci   const struct radv_subpass *subpass = state->subpass;
6495bf215546Sopenharmony_ci   uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);
6496bf215546Sopenharmony_ci
6497bf215546Sopenharmony_ci   radv_cmd_buffer_resolve_subpass(cmd_buffer);
6498bf215546Sopenharmony_ci
6499bf215546Sopenharmony_ci   radv_describe_barrier_start(cmd_buffer, RGP_BARRIER_EXTERNAL_RENDER_PASS_SYNC);
6500bf215546Sopenharmony_ci
6501bf215546Sopenharmony_ci   for (uint32_t i = 0; i < subpass->attachment_count; ++i) {
6502bf215546Sopenharmony_ci      const uint32_t a = subpass->attachments[i].attachment;
6503bf215546Sopenharmony_ci      if (a == VK_ATTACHMENT_UNUSED)
6504bf215546Sopenharmony_ci         continue;
6505bf215546Sopenharmony_ci
6506bf215546Sopenharmony_ci      if (state->pass->attachments[a].last_subpass_idx != subpass_id)
6507bf215546Sopenharmony_ci         continue;
6508bf215546Sopenharmony_ci
6509bf215546Sopenharmony_ci      VkImageLayout layout = state->pass->attachments[a].final_layout;
6510bf215546Sopenharmony_ci      VkImageLayout stencil_layout = state->pass->attachments[a].stencil_final_layout;
6511bf215546Sopenharmony_ci      struct radv_subpass_attachment att = {a, layout, stencil_layout};
6512bf215546Sopenharmony_ci      radv_handle_subpass_image_transition(cmd_buffer, att, false);
6513bf215546Sopenharmony_ci   }
6514bf215546Sopenharmony_ci
6515bf215546Sopenharmony_ci   radv_ace_internal_barrier(cmd_buffer, 0, 0);
6516bf215546Sopenharmony_ci   radv_describe_barrier_end(cmd_buffer);
6517bf215546Sopenharmony_ci}
6518bf215546Sopenharmony_ci
6519bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6520bf215546Sopenharmony_ciradv_CmdBeginRenderPass2(VkCommandBuffer commandBuffer,
6521bf215546Sopenharmony_ci                         const VkRenderPassBeginInfo *pRenderPassBeginInfo,
6522bf215546Sopenharmony_ci                         const VkSubpassBeginInfo *pSubpassBeginInfo)
6523bf215546Sopenharmony_ci{
6524bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
6525bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_render_pass, pass, pRenderPassBeginInfo->renderPass);
6526bf215546Sopenharmony_ci   RADV_FROM_HANDLE(vk_framebuffer, framebuffer, pRenderPassBeginInfo->framebuffer);
6527bf215546Sopenharmony_ci   VkResult result;
6528bf215546Sopenharmony_ci
6529bf215546Sopenharmony_ci   cmd_buffer->state.framebuffer = framebuffer;
6530bf215546Sopenharmony_ci   cmd_buffer->state.pass = pass;
6531bf215546Sopenharmony_ci   cmd_buffer->state.render_area = pRenderPassBeginInfo->renderArea;
6532bf215546Sopenharmony_ci
6533bf215546Sopenharmony_ci   result = radv_cmd_state_setup_attachments(cmd_buffer, pass, pRenderPassBeginInfo);
6534bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
6535bf215546Sopenharmony_ci      return;
6536bf215546Sopenharmony_ci
6537bf215546Sopenharmony_ci   result = radv_cmd_state_setup_sample_locations(cmd_buffer, pass, pRenderPassBeginInfo);
6538bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
6539bf215546Sopenharmony_ci      return;
6540bf215546Sopenharmony_ci
6541bf215546Sopenharmony_ci   radv_cmd_buffer_begin_subpass(cmd_buffer, 0);
6542bf215546Sopenharmony_ci}
6543bf215546Sopenharmony_ci
6544bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
6545bf215546Sopenharmony_ciradv_CmdNextSubpass2(VkCommandBuffer commandBuffer, const VkSubpassBeginInfo *pSubpassBeginInfo,
6546bf215546Sopenharmony_ci                     const VkSubpassEndInfo *pSubpassEndInfo)
6547bf215546Sopenharmony_ci{
6548bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
6549bf215546Sopenharmony_ci
6550bf215546Sopenharmony_ci   radv_mark_noncoherent_rb(cmd_buffer);
6551bf215546Sopenharmony_ci
6552bf215546Sopenharmony_ci   uint32_t prev_subpass = radv_get_subpass_id(cmd_buffer);
6553bf215546Sopenharmony_ci   radv_cmd_buffer_end_subpass(cmd_buffer);
6554bf215546Sopenharmony_ci   radv_cmd_buffer_begin_subpass(cmd_buffer, prev_subpass + 1);
6555bf215546Sopenharmony_ci}
6556bf215546Sopenharmony_ci
6557bf215546Sopenharmony_cistatic void
6558bf215546Sopenharmony_ciradv_emit_view_index_per_stage(struct radeon_cmdbuf *cs, struct radv_graphics_pipeline *pipeline,
6559bf215546Sopenharmony_ci                               unsigned stage, unsigned index)
6560bf215546Sopenharmony_ci{
6561bf215546Sopenharmony_ci   struct radv_userdata_info *loc = radv_lookup_user_sgpr(&pipeline->base, stage, AC_UD_VIEW_INDEX);
6562bf215546Sopenharmony_ci   if (loc->sgpr_idx == -1)
6563bf215546Sopenharmony_ci      return;
6564bf215546Sopenharmony_ci   uint32_t base_reg = pipeline->base.user_data_0[stage];
6565bf215546Sopenharmony_ci   radeon_set_sh_reg(cs, base_reg + loc->sgpr_idx * 4, index);
6566bf215546Sopenharmony_ci}
6567bf215546Sopenharmony_ci
6568bf215546Sopenharmony_cistatic void
6569bf215546Sopenharmony_ciradv_emit_view_index(struct radv_cmd_buffer *cmd_buffer, unsigned index)
6570bf215546Sopenharmony_ci{
6571bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
6572bf215546Sopenharmony_ci
6573bf215546Sopenharmony_ci   radv_foreach_stage(stage, pipeline->active_stages & ~VK_SHADER_STAGE_TASK_BIT_NV) {
6574bf215546Sopenharmony_ci      radv_emit_view_index_per_stage(cmd_buffer->cs, pipeline, stage, index);
6575bf215546Sopenharmony_ci   }
6576bf215546Sopenharmony_ci   if (radv_pipeline_has_gs_copy_shader(&pipeline->base)) {
6577bf215546Sopenharmony_ci      struct radv_userdata_info *loc =
6578bf215546Sopenharmony_ci         &pipeline->base.gs_copy_shader->info.user_sgprs_locs.shader_data[AC_UD_VIEW_INDEX];
6579bf215546Sopenharmony_ci      if (loc->sgpr_idx != -1) {
6580bf215546Sopenharmony_ci         uint32_t base_reg = R_00B130_SPI_SHADER_USER_DATA_VS_0;
6581bf215546Sopenharmony_ci         radeon_set_sh_reg(cmd_buffer->cs, base_reg + loc->sgpr_idx * 4, index);
6582bf215546Sopenharmony_ci      }
6583bf215546Sopenharmony_ci   }
6584bf215546Sopenharmony_ci   if (pipeline->active_stages & VK_SHADER_STAGE_TASK_BIT_NV) {
6585bf215546Sopenharmony_ci      radv_emit_view_index_per_stage(cmd_buffer->ace_internal.cs, pipeline, MESA_SHADER_TASK,
6586bf215546Sopenharmony_ci                                     index);
6587bf215546Sopenharmony_ci   }
6588bf215546Sopenharmony_ci}
6589bf215546Sopenharmony_ci
6590bf215546Sopenharmony_ci/**
6591bf215546Sopenharmony_ci * Emulates predication for MEC using COND_EXEC.
6592bf215546Sopenharmony_ci * When the current command buffer is predicating, emit a COND_EXEC packet
6593bf215546Sopenharmony_ci * so that the MEC skips the next few dwords worth of packets.
6594bf215546Sopenharmony_ci *
6595bf215546Sopenharmony_ci * To make it work with inverted conditional rendering, we allocate
6596bf215546Sopenharmony_ci * space in the upload BO and emit some packets to invert the condition.
6597bf215546Sopenharmony_ci */
6598bf215546Sopenharmony_cistatic void
6599bf215546Sopenharmony_ciradv_cs_emit_compute_predication(struct radv_cmd_state *state, struct radeon_cmdbuf *cs,
6600bf215546Sopenharmony_ci                                 uint64_t inv_va, bool *inv_emitted, unsigned dwords)
6601bf215546Sopenharmony_ci{
6602bf215546Sopenharmony_ci   if (!state->predicating)
6603bf215546Sopenharmony_ci      return;
6604bf215546Sopenharmony_ci
6605bf215546Sopenharmony_ci   uint64_t va = state->predication_va;
6606bf215546Sopenharmony_ci
6607bf215546Sopenharmony_ci   if (!state->predication_type) {
6608bf215546Sopenharmony_ci      /* Invert the condition the first time it is needed. */
6609bf215546Sopenharmony_ci      if (!*inv_emitted) {
6610bf215546Sopenharmony_ci         *inv_emitted = true;
6611bf215546Sopenharmony_ci
6612bf215546Sopenharmony_ci         /* Write 1 to the inverted predication VA. */
6613bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
6614bf215546Sopenharmony_ci         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
6615bf215546Sopenharmony_ci                            COPY_DATA_WR_CONFIRM);
6616bf215546Sopenharmony_ci         radeon_emit(cs, 1);
6617bf215546Sopenharmony_ci         radeon_emit(cs, 0);
6618bf215546Sopenharmony_ci         radeon_emit(cs, inv_va);
6619bf215546Sopenharmony_ci         radeon_emit(cs, inv_va >> 32);
6620bf215546Sopenharmony_ci
6621bf215546Sopenharmony_ci         /* If the API predication VA == 0, skip next command. */
6622bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
6623bf215546Sopenharmony_ci         radeon_emit(cs, va);
6624bf215546Sopenharmony_ci         radeon_emit(cs, va >> 32);
6625bf215546Sopenharmony_ci         radeon_emit(cs, 0);
6626bf215546Sopenharmony_ci         radeon_emit(cs, 6); /* 1x COPY_DATA size */
6627bf215546Sopenharmony_ci
6628bf215546Sopenharmony_ci         /* Write 0 to the new predication VA (when the API condition != 0) */
6629bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
6630bf215546Sopenharmony_ci         radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
6631bf215546Sopenharmony_ci                            COPY_DATA_WR_CONFIRM);
6632bf215546Sopenharmony_ci         radeon_emit(cs, 0);
6633bf215546Sopenharmony_ci         radeon_emit(cs, 0);
6634bf215546Sopenharmony_ci         radeon_emit(cs, inv_va);
6635bf215546Sopenharmony_ci         radeon_emit(cs, inv_va >> 32);
6636bf215546Sopenharmony_ci      }
6637bf215546Sopenharmony_ci
6638bf215546Sopenharmony_ci      va = inv_va;
6639bf215546Sopenharmony_ci   }
6640bf215546Sopenharmony_ci
6641bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_COND_EXEC, 3, 0));
6642bf215546Sopenharmony_ci   radeon_emit(cs, va);
6643bf215546Sopenharmony_ci   radeon_emit(cs, va >> 32);
6644bf215546Sopenharmony_ci   radeon_emit(cs, 0); /* Cache policy */
6645bf215546Sopenharmony_ci   radeon_emit(cs, dwords); /* Size of the predicated packet(s) in DWORDs. */
6646bf215546Sopenharmony_ci}
6647bf215546Sopenharmony_ci
6648bf215546Sopenharmony_cistatic void
6649bf215546Sopenharmony_ciradv_cs_emit_draw_packet(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_count,
6650bf215546Sopenharmony_ci                         uint32_t use_opaque)
6651bf215546Sopenharmony_ci{
6652bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_AUTO, 1, cmd_buffer->state.predicating));
6653bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, vertex_count);
6654bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX | use_opaque);
6655bf215546Sopenharmony_ci}
6656bf215546Sopenharmony_ci
6657bf215546Sopenharmony_ci/**
6658bf215546Sopenharmony_ci * Emit a PKT3_DRAW_INDEX_2 packet to render "index_count` vertices.
6659bf215546Sopenharmony_ci *
6660bf215546Sopenharmony_ci * The starting address "index_va" may point anywhere within the index buffer. The number of
6661bf215546Sopenharmony_ci * indexes allocated in the index buffer *past that point* is specified by "max_index_count".
6662bf215546Sopenharmony_ci * Hardware uses this information to return 0 for out-of-bounds reads.
6663bf215546Sopenharmony_ci */
6664bf215546Sopenharmony_cistatic void
6665bf215546Sopenharmony_ciradv_cs_emit_draw_indexed_packet(struct radv_cmd_buffer *cmd_buffer, uint64_t index_va,
6666bf215546Sopenharmony_ci                                 uint32_t max_index_count, uint32_t index_count, bool not_eop)
6667bf215546Sopenharmony_ci{
6668bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_DRAW_INDEX_2, 4, cmd_buffer->state.predicating));
6669bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, max_index_count);
6670bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, index_va);
6671bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, index_va >> 32);
6672bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, index_count);
6673bf215546Sopenharmony_ci   /* NOT_EOP allows merging multiple draws into 1 wave, but only user VGPRs
6674bf215546Sopenharmony_ci    * can be changed between draws and GS fast launch must be disabled.
6675bf215546Sopenharmony_ci    * NOT_EOP doesn't work on gfx9 and older.
6676bf215546Sopenharmony_ci    */
6677bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, V_0287F0_DI_SRC_SEL_DMA | S_0287F0_NOT_EOP(not_eop));
6678bf215546Sopenharmony_ci}
6679bf215546Sopenharmony_ci
6680bf215546Sopenharmony_ci/* MUST inline this function to avoid massive perf loss in drawoverhead */
6681bf215546Sopenharmony_ciALWAYS_INLINE static void
6682bf215546Sopenharmony_ciradv_cs_emit_indirect_draw_packet(struct radv_cmd_buffer *cmd_buffer, bool indexed,
6683bf215546Sopenharmony_ci                                  uint32_t draw_count, uint64_t count_va, uint32_t stride)
6684bf215546Sopenharmony_ci{
6685bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6686bf215546Sopenharmony_ci   const unsigned di_src_sel = indexed ? V_0287F0_DI_SRC_SEL_DMA : V_0287F0_DI_SRC_SEL_AUTO_INDEX;
6687bf215546Sopenharmony_ci   bool draw_id_enable = cmd_buffer->state.graphics_pipeline->uses_drawid;
6688bf215546Sopenharmony_ci   uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr;
6689bf215546Sopenharmony_ci   uint32_t vertex_offset_reg, start_instance_reg = 0, draw_id_reg = 0;
6690bf215546Sopenharmony_ci   bool predicating = cmd_buffer->state.predicating;
6691bf215546Sopenharmony_ci   bool mesh = cmd_buffer->state.mesh_shading;
6692bf215546Sopenharmony_ci   assert(base_reg);
6693bf215546Sopenharmony_ci
6694bf215546Sopenharmony_ci   /* just reset draw state for vertex data */
6695bf215546Sopenharmony_ci   cmd_buffer->state.last_first_instance = -1;
6696bf215546Sopenharmony_ci   cmd_buffer->state.last_num_instances = -1;
6697bf215546Sopenharmony_ci   cmd_buffer->state.last_drawid = -1;
6698bf215546Sopenharmony_ci   cmd_buffer->state.last_vertex_offset = -1;
6699bf215546Sopenharmony_ci
6700bf215546Sopenharmony_ci   vertex_offset_reg = (base_reg - SI_SH_REG_OFFSET) >> 2;
6701bf215546Sopenharmony_ci   if (cmd_buffer->state.graphics_pipeline->uses_baseinstance)
6702bf215546Sopenharmony_ci      start_instance_reg = ((base_reg + (draw_id_enable ? 8 : 4)) - SI_SH_REG_OFFSET) >> 2;
6703bf215546Sopenharmony_ci   if (draw_id_enable)
6704bf215546Sopenharmony_ci      draw_id_reg = ((base_reg + mesh * 12 + 4) - SI_SH_REG_OFFSET) >> 2;
6705bf215546Sopenharmony_ci
6706bf215546Sopenharmony_ci   if (draw_count == 1 && !count_va && !draw_id_enable) {
6707bf215546Sopenharmony_ci      radeon_emit(cs,
6708bf215546Sopenharmony_ci                  PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT : PKT3_DRAW_INDIRECT, 3, predicating));
6709bf215546Sopenharmony_ci      radeon_emit(cs, 0);
6710bf215546Sopenharmony_ci      radeon_emit(cs, vertex_offset_reg);
6711bf215546Sopenharmony_ci      radeon_emit(cs, start_instance_reg);
6712bf215546Sopenharmony_ci      radeon_emit(cs, di_src_sel);
6713bf215546Sopenharmony_ci   } else {
6714bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(indexed ? PKT3_DRAW_INDEX_INDIRECT_MULTI : PKT3_DRAW_INDIRECT_MULTI, 8,
6715bf215546Sopenharmony_ci                           predicating));
6716bf215546Sopenharmony_ci      radeon_emit(cs, 0);
6717bf215546Sopenharmony_ci      radeon_emit(cs, vertex_offset_reg);
6718bf215546Sopenharmony_ci      radeon_emit(cs, start_instance_reg);
6719bf215546Sopenharmony_ci      radeon_emit(cs, draw_id_reg | S_2C3_DRAW_INDEX_ENABLE(draw_id_enable) |
6720bf215546Sopenharmony_ci                         S_2C3_COUNT_INDIRECT_ENABLE(!!count_va));
6721bf215546Sopenharmony_ci      radeon_emit(cs, draw_count); /* count */
6722bf215546Sopenharmony_ci      radeon_emit(cs, count_va);   /* count_addr */
6723bf215546Sopenharmony_ci      radeon_emit(cs, count_va >> 32);
6724bf215546Sopenharmony_ci      radeon_emit(cs, stride); /* stride */
6725bf215546Sopenharmony_ci      radeon_emit(cs, di_src_sel);
6726bf215546Sopenharmony_ci
6727bf215546Sopenharmony_ci      cmd_buffer->state.uses_draw_indirect_multi = true;
6728bf215546Sopenharmony_ci   }
6729bf215546Sopenharmony_ci}
6730bf215546Sopenharmony_ci
6731bf215546Sopenharmony_ciALWAYS_INLINE static void
6732bf215546Sopenharmony_ciradv_cs_emit_dispatch_taskmesh_direct_ace_packet(struct radv_cmd_buffer *cmd_buffer,
6733bf215546Sopenharmony_ci                                                 const uint32_t x, const uint32_t y,
6734bf215546Sopenharmony_ci                                                 const uint32_t z)
6735bf215546Sopenharmony_ci{
6736bf215546Sopenharmony_ci   struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base;
6737bf215546Sopenharmony_ci   struct radv_shader *compute_shader = radv_get_shader(pipeline, MESA_SHADER_TASK);
6738bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
6739bf215546Sopenharmony_ci   const bool predicating = cmd_buffer->state.predicating;
6740bf215546Sopenharmony_ci   const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task |
6741bf215546Sopenharmony_ci                                       S_00B800_CS_W32_EN(compute_shader->info.wave_size == 32);
6742bf215546Sopenharmony_ci
6743bf215546Sopenharmony_ci   struct radv_userdata_info *ring_entry_loc =
6744bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_TASK_RING_ENTRY);
6745bf215546Sopenharmony_ci   assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1);
6746bf215546Sopenharmony_ci
6747bf215546Sopenharmony_ci   uint32_t ring_entry_reg =
6748bf215546Sopenharmony_ci      (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
6749bf215546Sopenharmony_ci
6750bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_DIRECT_ACE, 4, predicating) | PKT3_SHADER_TYPE_S(1));
6751bf215546Sopenharmony_ci   radeon_emit(cs, x);
6752bf215546Sopenharmony_ci   radeon_emit(cs, y);
6753bf215546Sopenharmony_ci   radeon_emit(cs, z);
6754bf215546Sopenharmony_ci   radeon_emit(cs, dispatch_initiator);
6755bf215546Sopenharmony_ci   radeon_emit(cs, ring_entry_reg & 0xFFFF);
6756bf215546Sopenharmony_ci}
6757bf215546Sopenharmony_ci
6758bf215546Sopenharmony_ciALWAYS_INLINE static void
6759bf215546Sopenharmony_ciradv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(struct radv_cmd_buffer *cmd_buffer,
6760bf215546Sopenharmony_ci                                                         uint64_t data_va, uint32_t draw_count,
6761bf215546Sopenharmony_ci                                                         uint64_t count_va, uint32_t stride)
6762bf215546Sopenharmony_ci{
6763bf215546Sopenharmony_ci   assert((data_va & 0x03) == 0);
6764bf215546Sopenharmony_ci   assert((count_va & 0x03) == 0);
6765bf215546Sopenharmony_ci
6766bf215546Sopenharmony_ci   struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base;
6767bf215546Sopenharmony_ci   struct radv_shader *compute_shader = radv_get_shader(pipeline, MESA_SHADER_TASK);
6768bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
6769bf215546Sopenharmony_ci
6770bf215546Sopenharmony_ci   const uint32_t count_indirect_enable = !!count_va;
6771bf215546Sopenharmony_ci   const uint32_t xyz_dim_enable = compute_shader->info.cs.uses_grid_size;
6772bf215546Sopenharmony_ci   const uint32_t draw_id_enable = compute_shader->info.vs.needs_draw_id;
6773bf215546Sopenharmony_ci   const uint32_t dispatch_initiator = cmd_buffer->device->dispatch_initiator_task |
6774bf215546Sopenharmony_ci                                       S_00B800_CS_W32_EN(compute_shader->info.wave_size == 32);
6775bf215546Sopenharmony_ci
6776bf215546Sopenharmony_ci   const struct radv_userdata_info *ring_entry_loc =
6777bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_TASK_RING_ENTRY);
6778bf215546Sopenharmony_ci   const struct radv_userdata_info *xyz_dim_loc =
6779bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_GRID_SIZE);
6780bf215546Sopenharmony_ci   const struct radv_userdata_info *draw_id_loc =
6781bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_TASK_DRAW_ID);
6782bf215546Sopenharmony_ci
6783bf215546Sopenharmony_ci   assert(ring_entry_loc->sgpr_idx != -1 && ring_entry_loc->num_sgprs == 1);
6784bf215546Sopenharmony_ci   assert(!xyz_dim_enable || (xyz_dim_loc->sgpr_idx != -1 && xyz_dim_loc->num_sgprs == 3));
6785bf215546Sopenharmony_ci   assert(!draw_id_enable || (draw_id_loc->sgpr_idx != -1 && draw_id_loc->num_sgprs == 1));
6786bf215546Sopenharmony_ci
6787bf215546Sopenharmony_ci   const uint32_t ring_entry_reg =
6788bf215546Sopenharmony_ci      (R_00B900_COMPUTE_USER_DATA_0 + ring_entry_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
6789bf215546Sopenharmony_ci   const uint32_t xyz_dim_reg =
6790bf215546Sopenharmony_ci      !xyz_dim_enable
6791bf215546Sopenharmony_ci         ? 0
6792bf215546Sopenharmony_ci         : (R_00B900_COMPUTE_USER_DATA_0 + xyz_dim_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
6793bf215546Sopenharmony_ci   const uint32_t draw_id_reg =
6794bf215546Sopenharmony_ci      !draw_id_enable
6795bf215546Sopenharmony_ci         ? 0
6796bf215546Sopenharmony_ci         : (R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4 - SI_SH_REG_OFFSET) >> 2;
6797bf215546Sopenharmony_ci
6798bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_INDIRECT_MULTI_ACE, 9, 0) | PKT3_SHADER_TYPE_S(1));
6799bf215546Sopenharmony_ci   radeon_emit(cs, data_va);
6800bf215546Sopenharmony_ci   radeon_emit(cs, data_va >> 32);
6801bf215546Sopenharmony_ci   radeon_emit(cs, ring_entry_reg & 0xFFFF);
6802bf215546Sopenharmony_ci   radeon_emit(cs, (count_indirect_enable << 1) | (draw_id_enable << 2) | (xyz_dim_enable << 3) |
6803bf215546Sopenharmony_ci                      (draw_id_reg << 16));
6804bf215546Sopenharmony_ci   radeon_emit(cs, xyz_dim_reg & 0xFFFF);
6805bf215546Sopenharmony_ci   radeon_emit(cs, draw_count);
6806bf215546Sopenharmony_ci   radeon_emit(cs, count_va);
6807bf215546Sopenharmony_ci   radeon_emit(cs, count_va >> 32);
6808bf215546Sopenharmony_ci   radeon_emit(cs, stride);
6809bf215546Sopenharmony_ci   radeon_emit(cs, dispatch_initiator);
6810bf215546Sopenharmony_ci}
6811bf215546Sopenharmony_ci
6812bf215546Sopenharmony_ciALWAYS_INLINE static void
6813bf215546Sopenharmony_ciradv_cs_emit_dispatch_taskmesh_gfx_packet(struct radv_cmd_buffer *cmd_buffer)
6814bf215546Sopenharmony_ci{
6815bf215546Sopenharmony_ci   struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base;
6816bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6817bf215546Sopenharmony_ci   bool predicating = cmd_buffer->state.predicating;
6818bf215546Sopenharmony_ci
6819bf215546Sopenharmony_ci   struct radv_userdata_info *ring_entry_loc =
6820bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_MESH, AC_UD_TASK_RING_ENTRY);
6821bf215546Sopenharmony_ci
6822bf215546Sopenharmony_ci   assert(ring_entry_loc && ring_entry_loc->sgpr_idx != -1);
6823bf215546Sopenharmony_ci
6824bf215546Sopenharmony_ci   uint32_t base_reg = cmd_buffer->state.graphics_pipeline->vtx_base_sgpr;
6825bf215546Sopenharmony_ci   uint32_t xyz_dim_reg = ((base_reg + 4) - SI_SH_REG_OFFSET) >> 2;
6826bf215546Sopenharmony_ci   uint32_t ring_entry_reg = ((base_reg + ring_entry_loc->sgpr_idx * 4) - SI_SH_REG_OFFSET) >> 2;
6827bf215546Sopenharmony_ci
6828bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_DISPATCH_TASKMESH_GFX, 2, predicating));
6829bf215546Sopenharmony_ci   radeon_emit(cs, (ring_entry_reg << 16) | (xyz_dim_reg & 0xFFFF));
6830bf215546Sopenharmony_ci   radeon_emit(cs, 0);
6831bf215546Sopenharmony_ci   radeon_emit(cs, V_0287F0_DI_SRC_SEL_AUTO_INDEX);
6832bf215546Sopenharmony_ci}
6833bf215546Sopenharmony_ci
6834bf215546Sopenharmony_cistatic inline void
6835bf215546Sopenharmony_ciradv_emit_userdata_vertex_internal(struct radv_cmd_buffer *cmd_buffer,
6836bf215546Sopenharmony_ci                                   const struct radv_draw_info *info, const uint32_t vertex_offset)
6837bf215546Sopenharmony_ci{
6838bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6839bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6840bf215546Sopenharmony_ci   const bool uses_baseinstance = state->graphics_pipeline->uses_baseinstance;
6841bf215546Sopenharmony_ci   const bool uses_drawid = state->graphics_pipeline->uses_drawid;
6842bf215546Sopenharmony_ci
6843bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num);
6844bf215546Sopenharmony_ci
6845bf215546Sopenharmony_ci   radeon_emit(cs, vertex_offset);
6846bf215546Sopenharmony_ci   state->last_vertex_offset = vertex_offset;
6847bf215546Sopenharmony_ci   if (uses_drawid) {
6848bf215546Sopenharmony_ci      radeon_emit(cs, 0);
6849bf215546Sopenharmony_ci      state->last_drawid = 0;
6850bf215546Sopenharmony_ci   }
6851bf215546Sopenharmony_ci   if (uses_baseinstance) {
6852bf215546Sopenharmony_ci      radeon_emit(cs, info->first_instance);
6853bf215546Sopenharmony_ci      state->last_first_instance = info->first_instance;
6854bf215546Sopenharmony_ci   }
6855bf215546Sopenharmony_ci}
6856bf215546Sopenharmony_ci
6857bf215546Sopenharmony_ciALWAYS_INLINE static void
6858bf215546Sopenharmony_ciradv_emit_userdata_vertex(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
6859bf215546Sopenharmony_ci                          const uint32_t vertex_offset)
6860bf215546Sopenharmony_ci{
6861bf215546Sopenharmony_ci   const struct radv_cmd_state *state = &cmd_buffer->state;
6862bf215546Sopenharmony_ci   const bool uses_baseinstance = state->graphics_pipeline->uses_baseinstance;
6863bf215546Sopenharmony_ci   const bool uses_drawid = state->graphics_pipeline->uses_drawid;
6864bf215546Sopenharmony_ci
6865bf215546Sopenharmony_ci   /* this looks very dumb, but it allows the compiler to optimize better and yields
6866bf215546Sopenharmony_ci    * ~3-4% perf increase in drawoverhead
6867bf215546Sopenharmony_ci    */
6868bf215546Sopenharmony_ci   if (vertex_offset != state->last_vertex_offset) {
6869bf215546Sopenharmony_ci      radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
6870bf215546Sopenharmony_ci   } else if (uses_drawid && 0 != state->last_drawid) {
6871bf215546Sopenharmony_ci      radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
6872bf215546Sopenharmony_ci   } else if (uses_baseinstance && info->first_instance != state->last_first_instance) {
6873bf215546Sopenharmony_ci      radv_emit_userdata_vertex_internal(cmd_buffer, info, vertex_offset);
6874bf215546Sopenharmony_ci   }
6875bf215546Sopenharmony_ci}
6876bf215546Sopenharmony_ci
6877bf215546Sopenharmony_ciALWAYS_INLINE static void
6878bf215546Sopenharmony_ciradv_emit_userdata_vertex_drawid(struct radv_cmd_buffer *cmd_buffer, uint32_t vertex_offset, uint32_t drawid)
6879bf215546Sopenharmony_ci{
6880bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6881bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6882bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, 1 + !!drawid);
6883bf215546Sopenharmony_ci   radeon_emit(cs, vertex_offset);
6884bf215546Sopenharmony_ci   state->last_vertex_offset = vertex_offset;
6885bf215546Sopenharmony_ci   if (drawid)
6886bf215546Sopenharmony_ci      radeon_emit(cs, drawid);
6887bf215546Sopenharmony_ci
6888bf215546Sopenharmony_ci}
6889bf215546Sopenharmony_ci
6890bf215546Sopenharmony_ciALWAYS_INLINE static void
6891bf215546Sopenharmony_ciradv_emit_userdata_mesh(struct radv_cmd_buffer *cmd_buffer,
6892bf215546Sopenharmony_ci                        const uint32_t x, const uint32_t y, const uint32_t z,
6893bf215546Sopenharmony_ci                        const uint32_t first_task)
6894bf215546Sopenharmony_ci{
6895bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6896bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6897bf215546Sopenharmony_ci   const bool uses_drawid = state->graphics_pipeline->uses_drawid;
6898bf215546Sopenharmony_ci
6899bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, state->graphics_pipeline->vtx_base_sgpr, state->graphics_pipeline->vtx_emit_num);
6900bf215546Sopenharmony_ci   radeon_emit(cs, first_task);
6901bf215546Sopenharmony_ci   radeon_emit(cs, x);
6902bf215546Sopenharmony_ci   radeon_emit(cs, y);
6903bf215546Sopenharmony_ci   radeon_emit(cs, z);
6904bf215546Sopenharmony_ci
6905bf215546Sopenharmony_ci   if (uses_drawid) {
6906bf215546Sopenharmony_ci      radeon_emit(cs, 0);
6907bf215546Sopenharmony_ci      state->last_drawid = 0;
6908bf215546Sopenharmony_ci   }
6909bf215546Sopenharmony_ci}
6910bf215546Sopenharmony_ci
6911bf215546Sopenharmony_ciALWAYS_INLINE static void
6912bf215546Sopenharmony_ciradv_emit_userdata_mesh_first_task_0_draw_id_0(struct radv_cmd_buffer *cmd_buffer)
6913bf215546Sopenharmony_ci{
6914bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6915bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6916bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = state->graphics_pipeline;
6917bf215546Sopenharmony_ci   const bool uses_drawid = pipeline->uses_drawid;
6918bf215546Sopenharmony_ci
6919bf215546Sopenharmony_ci   radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr, 1);
6920bf215546Sopenharmony_ci   radeon_emit(cs, 0);
6921bf215546Sopenharmony_ci
6922bf215546Sopenharmony_ci   if (uses_drawid) {
6923bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, pipeline->vtx_base_sgpr + (pipeline->vtx_emit_num - 1) * 4, 1);
6924bf215546Sopenharmony_ci      radeon_emit(cs, 0);
6925bf215546Sopenharmony_ci   }
6926bf215546Sopenharmony_ci}
6927bf215546Sopenharmony_ci
6928bf215546Sopenharmony_ciALWAYS_INLINE static void
6929bf215546Sopenharmony_ciradv_emit_userdata_task_ib_only(struct radv_cmd_buffer *cmd_buffer, uint64_t ib_va,
6930bf215546Sopenharmony_ci                                uint32_t ib_stride)
6931bf215546Sopenharmony_ci{
6932bf215546Sopenharmony_ci   struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base;
6933bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
6934bf215546Sopenharmony_ci
6935bf215546Sopenharmony_ci   struct radv_userdata_info *task_ib_loc =
6936bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_TASK_IB);
6937bf215546Sopenharmony_ci
6938bf215546Sopenharmony_ci   if (task_ib_loc->sgpr_idx != -1) {
6939bf215546Sopenharmony_ci      assert(task_ib_loc->num_sgprs == 3);
6940bf215546Sopenharmony_ci      unsigned task_ib_reg = R_00B900_COMPUTE_USER_DATA_0 + task_ib_loc->sgpr_idx * 4;
6941bf215546Sopenharmony_ci
6942bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, task_ib_reg, 3);
6943bf215546Sopenharmony_ci      radeon_emit(cs, ib_va);
6944bf215546Sopenharmony_ci      radeon_emit(cs, ib_va >> 32);
6945bf215546Sopenharmony_ci      radeon_emit(cs, ib_stride);
6946bf215546Sopenharmony_ci   }
6947bf215546Sopenharmony_ci}
6948bf215546Sopenharmony_ci
6949bf215546Sopenharmony_ciALWAYS_INLINE static void
6950bf215546Sopenharmony_ciradv_emit_userdata_task(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z,
6951bf215546Sopenharmony_ci                        uint32_t draw_id, uint32_t first_task, uint64_t ib_va)
6952bf215546Sopenharmony_ci{
6953bf215546Sopenharmony_ci   struct radv_pipeline *pipeline = &cmd_buffer->state.graphics_pipeline->base;
6954bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
6955bf215546Sopenharmony_ci
6956bf215546Sopenharmony_ci   struct radv_userdata_info *xyz_loc =
6957bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_GRID_SIZE);
6958bf215546Sopenharmony_ci   struct radv_userdata_info *draw_id_loc =
6959bf215546Sopenharmony_ci      radv_lookup_user_sgpr(pipeline, MESA_SHADER_TASK, AC_UD_CS_TASK_DRAW_ID);
6960bf215546Sopenharmony_ci
6961bf215546Sopenharmony_ci   if (xyz_loc->sgpr_idx != -1) {
6962bf215546Sopenharmony_ci      assert(xyz_loc->num_sgprs == 3);
6963bf215546Sopenharmony_ci      unsigned xyz_reg = R_00B900_COMPUTE_USER_DATA_0 + xyz_loc->sgpr_idx * 4;
6964bf215546Sopenharmony_ci
6965bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, xyz_reg, 3);
6966bf215546Sopenharmony_ci      radeon_emit(cs, x);
6967bf215546Sopenharmony_ci      radeon_emit(cs, y);
6968bf215546Sopenharmony_ci      radeon_emit(cs, z);
6969bf215546Sopenharmony_ci   }
6970bf215546Sopenharmony_ci
6971bf215546Sopenharmony_ci   if (draw_id_loc->sgpr_idx != -1) {
6972bf215546Sopenharmony_ci      assert(draw_id_loc->num_sgprs == 1);
6973bf215546Sopenharmony_ci      unsigned draw_id_reg = R_00B900_COMPUTE_USER_DATA_0 + draw_id_loc->sgpr_idx * 4;
6974bf215546Sopenharmony_ci
6975bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cs, draw_id_reg, 1);
6976bf215546Sopenharmony_ci      radeon_emit(cs, draw_id);
6977bf215546Sopenharmony_ci   }
6978bf215546Sopenharmony_ci
6979bf215546Sopenharmony_ci   radv_emit_userdata_task_ib_only(cmd_buffer, ib_va, first_task ? 8 : 0);
6980bf215546Sopenharmony_ci}
6981bf215546Sopenharmony_ci
6982bf215546Sopenharmony_ciALWAYS_INLINE static void
6983bf215546Sopenharmony_ciradv_emit_draw_packets_indexed(struct radv_cmd_buffer *cmd_buffer,
6984bf215546Sopenharmony_ci                               const struct radv_draw_info *info,
6985bf215546Sopenharmony_ci                               uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *minfo,
6986bf215546Sopenharmony_ci                               uint32_t stride,
6987bf215546Sopenharmony_ci                               const int32_t *vertexOffset)
6988bf215546Sopenharmony_ci
6989bf215546Sopenharmony_ci{
6990bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
6991bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
6992bf215546Sopenharmony_ci   const int index_size = radv_get_vgt_index_size(state->index_type);
6993bf215546Sopenharmony_ci   unsigned i = 0;
6994bf215546Sopenharmony_ci   const bool uses_drawid = state->graphics_pipeline->uses_drawid;
6995bf215546Sopenharmony_ci   const bool can_eop =
6996bf215546Sopenharmony_ci      !uses_drawid && cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10;
6997bf215546Sopenharmony_ci
6998bf215546Sopenharmony_ci   if (uses_drawid) {
6999bf215546Sopenharmony_ci      if (vertexOffset) {
7000bf215546Sopenharmony_ci         radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset);
7001bf215546Sopenharmony_ci         vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
7002bf215546Sopenharmony_ci            const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
7003bf215546Sopenharmony_ci
7004bf215546Sopenharmony_ci            /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
7005bf215546Sopenharmony_ci            if (!remaining_indexes &&
7006bf215546Sopenharmony_ci                cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
7007bf215546Sopenharmony_ci               continue;
7008bf215546Sopenharmony_ci
7009bf215546Sopenharmony_ci            if (i > 0)
7010bf215546Sopenharmony_ci               radeon_set_sh_reg(cs, state->graphics_pipeline->vtx_base_sgpr + sizeof(uint32_t), i);
7011bf215546Sopenharmony_ci
7012bf215546Sopenharmony_ci            const uint64_t index_va = state->index_va + draw->firstIndex * index_size;
7013bf215546Sopenharmony_ci
7014bf215546Sopenharmony_ci            if (!state->subpass->view_mask) {
7015bf215546Sopenharmony_ci               radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
7016bf215546Sopenharmony_ci            } else {
7017bf215546Sopenharmony_ci               u_foreach_bit(view, state->subpass->view_mask) {
7018bf215546Sopenharmony_ci                  radv_emit_view_index(cmd_buffer, view);
7019bf215546Sopenharmony_ci
7020bf215546Sopenharmony_ci                  radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
7021bf215546Sopenharmony_ci               }
7022bf215546Sopenharmony_ci            }
7023bf215546Sopenharmony_ci         }
7024bf215546Sopenharmony_ci      } else {
7025bf215546Sopenharmony_ci         vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
7026bf215546Sopenharmony_ci            const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
7027bf215546Sopenharmony_ci
7028bf215546Sopenharmony_ci            /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
7029bf215546Sopenharmony_ci            if (!remaining_indexes &&
7030bf215546Sopenharmony_ci                cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
7031bf215546Sopenharmony_ci               continue;
7032bf215546Sopenharmony_ci
7033bf215546Sopenharmony_ci            if (i > 0) {
7034bf215546Sopenharmony_ci               if (state->last_vertex_offset != draw->vertexOffset)
7035bf215546Sopenharmony_ci                  radv_emit_userdata_vertex_drawid(cmd_buffer, draw->vertexOffset, i);
7036bf215546Sopenharmony_ci               else
7037bf215546Sopenharmony_ci                  radeon_set_sh_reg(cs, state->graphics_pipeline->vtx_base_sgpr + sizeof(uint32_t), i);
7038bf215546Sopenharmony_ci            } else
7039bf215546Sopenharmony_ci               radv_emit_userdata_vertex(cmd_buffer, info, draw->vertexOffset);
7040bf215546Sopenharmony_ci
7041bf215546Sopenharmony_ci            const uint64_t index_va = state->index_va + draw->firstIndex * index_size;
7042bf215546Sopenharmony_ci
7043bf215546Sopenharmony_ci            if (!state->subpass->view_mask) {
7044bf215546Sopenharmony_ci               radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
7045bf215546Sopenharmony_ci            } else {
7046bf215546Sopenharmony_ci               u_foreach_bit(view, state->subpass->view_mask) {
7047bf215546Sopenharmony_ci                  radv_emit_view_index(cmd_buffer, view);
7048bf215546Sopenharmony_ci
7049bf215546Sopenharmony_ci                  radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
7050bf215546Sopenharmony_ci               }
7051bf215546Sopenharmony_ci            }
7052bf215546Sopenharmony_ci         }
7053bf215546Sopenharmony_ci      }
7054bf215546Sopenharmony_ci      if (drawCount > 1) {
7055bf215546Sopenharmony_ci         state->last_drawid = drawCount - 1;
7056bf215546Sopenharmony_ci      }
7057bf215546Sopenharmony_ci   } else {
7058bf215546Sopenharmony_ci      if (vertexOffset) {
7059bf215546Sopenharmony_ci         if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX10) {
7060bf215546Sopenharmony_ci            /* GFX10 has a bug that consecutive draw packets with NOT_EOP must not have
7061bf215546Sopenharmony_ci             * count == 0 for the last draw that doesn't have NOT_EOP.
7062bf215546Sopenharmony_ci             */
7063bf215546Sopenharmony_ci            while (drawCount > 1) {
7064bf215546Sopenharmony_ci               const VkMultiDrawIndexedInfoEXT *last = (const VkMultiDrawIndexedInfoEXT*)(((const uint8_t*)minfo) + (drawCount - 1) * stride);
7065bf215546Sopenharmony_ci               if (last->indexCount)
7066bf215546Sopenharmony_ci                  break;
7067bf215546Sopenharmony_ci               drawCount--;
7068bf215546Sopenharmony_ci            }
7069bf215546Sopenharmony_ci         }
7070bf215546Sopenharmony_ci
7071bf215546Sopenharmony_ci         radv_emit_userdata_vertex(cmd_buffer, info, *vertexOffset);
7072bf215546Sopenharmony_ci         vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
7073bf215546Sopenharmony_ci            const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
7074bf215546Sopenharmony_ci
7075bf215546Sopenharmony_ci            /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
7076bf215546Sopenharmony_ci            if (!remaining_indexes &&
7077bf215546Sopenharmony_ci                cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
7078bf215546Sopenharmony_ci               continue;
7079bf215546Sopenharmony_ci
7080bf215546Sopenharmony_ci            const uint64_t index_va = state->index_va + draw->firstIndex * index_size;
7081bf215546Sopenharmony_ci
7082bf215546Sopenharmony_ci            if (!state->subpass->view_mask) {
7083bf215546Sopenharmony_ci               radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && i < drawCount - 1);
7084bf215546Sopenharmony_ci            } else {
7085bf215546Sopenharmony_ci               u_foreach_bit(view, state->subpass->view_mask) {
7086bf215546Sopenharmony_ci                  radv_emit_view_index(cmd_buffer, view);
7087bf215546Sopenharmony_ci
7088bf215546Sopenharmony_ci                  radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
7089bf215546Sopenharmony_ci               }
7090bf215546Sopenharmony_ci            }
7091bf215546Sopenharmony_ci         }
7092bf215546Sopenharmony_ci      } else {
7093bf215546Sopenharmony_ci         vk_foreach_multi_draw_indexed(draw, i, minfo, drawCount, stride) {
7094bf215546Sopenharmony_ci            const uint32_t remaining_indexes = MAX2(state->max_index_count, draw->firstIndex) - draw->firstIndex;
7095bf215546Sopenharmony_ci
7096bf215546Sopenharmony_ci            /* Skip draw calls with 0-sized index buffers if the GPU can't handle them */
7097bf215546Sopenharmony_ci            if (!remaining_indexes &&
7098bf215546Sopenharmony_ci                cmd_buffer->device->physical_device->rad_info.has_zero_index_buffer_bug)
7099bf215546Sopenharmony_ci               continue;
7100bf215546Sopenharmony_ci
7101bf215546Sopenharmony_ci            const VkMultiDrawIndexedInfoEXT *next = (const VkMultiDrawIndexedInfoEXT*)(i < drawCount - 1 ? ((uint8_t*)draw + stride) : NULL);
7102bf215546Sopenharmony_ci            const bool offset_changes = next && next->vertexOffset != draw->vertexOffset;
7103bf215546Sopenharmony_ci            radv_emit_userdata_vertex(cmd_buffer, info, draw->vertexOffset);
7104bf215546Sopenharmony_ci
7105bf215546Sopenharmony_ci            const uint64_t index_va = state->index_va + draw->firstIndex * index_size;
7106bf215546Sopenharmony_ci
7107bf215546Sopenharmony_ci            if (!state->subpass->view_mask) {
7108bf215546Sopenharmony_ci               radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, can_eop && !offset_changes && i < drawCount - 1);
7109bf215546Sopenharmony_ci            } else {
7110bf215546Sopenharmony_ci               u_foreach_bit(view, state->subpass->view_mask) {
7111bf215546Sopenharmony_ci                  radv_emit_view_index(cmd_buffer, view);
7112bf215546Sopenharmony_ci
7113bf215546Sopenharmony_ci                  radv_cs_emit_draw_indexed_packet(cmd_buffer, index_va, remaining_indexes, draw->indexCount, false);
7114bf215546Sopenharmony_ci               }
7115bf215546Sopenharmony_ci            }
7116bf215546Sopenharmony_ci         }
7117bf215546Sopenharmony_ci      }
7118bf215546Sopenharmony_ci      if (drawCount > 1) {
7119bf215546Sopenharmony_ci         state->last_drawid = drawCount - 1;
7120bf215546Sopenharmony_ci      }
7121bf215546Sopenharmony_ci   }
7122bf215546Sopenharmony_ci}
7123bf215546Sopenharmony_ci
7124bf215546Sopenharmony_ciALWAYS_INLINE static void
7125bf215546Sopenharmony_ciradv_emit_direct_draw_packets(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
7126bf215546Sopenharmony_ci                              uint32_t drawCount, const VkMultiDrawInfoEXT *minfo,
7127bf215546Sopenharmony_ci                              uint32_t use_opaque, uint32_t stride)
7128bf215546Sopenharmony_ci{
7129bf215546Sopenharmony_ci   unsigned i = 0;
7130bf215546Sopenharmony_ci   const uint32_t view_mask = cmd_buffer->state.subpass->view_mask;
7131bf215546Sopenharmony_ci   const bool uses_drawid = cmd_buffer->state.graphics_pipeline->uses_drawid;
7132bf215546Sopenharmony_ci   uint32_t last_start = 0;
7133bf215546Sopenharmony_ci
7134bf215546Sopenharmony_ci   vk_foreach_multi_draw(draw, i, minfo, drawCount, stride) {
7135bf215546Sopenharmony_ci      if (!i)
7136bf215546Sopenharmony_ci         radv_emit_userdata_vertex(cmd_buffer, info, draw->firstVertex);
7137bf215546Sopenharmony_ci      else
7138bf215546Sopenharmony_ci         radv_emit_userdata_vertex_drawid(cmd_buffer, draw->firstVertex, uses_drawid ? i : 0);
7139bf215546Sopenharmony_ci
7140bf215546Sopenharmony_ci      if (!view_mask) {
7141bf215546Sopenharmony_ci         radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque);
7142bf215546Sopenharmony_ci      } else {
7143bf215546Sopenharmony_ci         u_foreach_bit(view, view_mask) {
7144bf215546Sopenharmony_ci            radv_emit_view_index(cmd_buffer, view);
7145bf215546Sopenharmony_ci            radv_cs_emit_draw_packet(cmd_buffer, draw->vertexCount, use_opaque);
7146bf215546Sopenharmony_ci         }
7147bf215546Sopenharmony_ci      }
7148bf215546Sopenharmony_ci      last_start = draw->firstVertex;
7149bf215546Sopenharmony_ci   }
7150bf215546Sopenharmony_ci   if (drawCount > 1) {
7151bf215546Sopenharmony_ci       struct radv_cmd_state *state = &cmd_buffer->state;
7152bf215546Sopenharmony_ci       state->last_vertex_offset = last_start;
7153bf215546Sopenharmony_ci       if (uses_drawid)
7154bf215546Sopenharmony_ci           state->last_drawid = drawCount - 1;
7155bf215546Sopenharmony_ci   }
7156bf215546Sopenharmony_ci}
7157bf215546Sopenharmony_ci
7158bf215546Sopenharmony_ciALWAYS_INLINE static void
7159bf215546Sopenharmony_ciradv_emit_direct_mesh_draw_packet(struct radv_cmd_buffer *cmd_buffer,
7160bf215546Sopenharmony_ci                                  uint32_t x, uint32_t y, uint32_t z,
7161bf215546Sopenharmony_ci                                  uint32_t first_task)
7162bf215546Sopenharmony_ci{
7163bf215546Sopenharmony_ci   const uint32_t view_mask = cmd_buffer->state.subpass->view_mask;
7164bf215546Sopenharmony_ci   const uint32_t count = x * y * z;
7165bf215546Sopenharmony_ci
7166bf215546Sopenharmony_ci   radv_emit_userdata_mesh(cmd_buffer, x, y, z, first_task);
7167bf215546Sopenharmony_ci
7168bf215546Sopenharmony_ci   if (!view_mask) {
7169bf215546Sopenharmony_ci      radv_cs_emit_draw_packet(cmd_buffer, count, 0);
7170bf215546Sopenharmony_ci   } else {
7171bf215546Sopenharmony_ci      u_foreach_bit(view, view_mask) {
7172bf215546Sopenharmony_ci         radv_emit_view_index(cmd_buffer, view);
7173bf215546Sopenharmony_ci         radv_cs_emit_draw_packet(cmd_buffer, count, 0);
7174bf215546Sopenharmony_ci      }
7175bf215546Sopenharmony_ci   }
7176bf215546Sopenharmony_ci}
7177bf215546Sopenharmony_ci
7178bf215546Sopenharmony_ciALWAYS_INLINE static void
7179bf215546Sopenharmony_ciradv_emit_direct_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y,
7180bf215546Sopenharmony_ci                                       uint32_t z, uint32_t first_task)
7181bf215546Sopenharmony_ci{
7182bf215546Sopenharmony_ci   uint64_t fake_ib_va = 0;
7183bf215546Sopenharmony_ci   const uint32_t view_mask = cmd_buffer->state.subpass->view_mask;
7184bf215546Sopenharmony_ci   const unsigned num_views = MAX2(1, util_bitcount(view_mask));
7185bf215546Sopenharmony_ci   unsigned ace_predication_size = num_views * 6; /* DISPATCH_TASKMESH_DIRECT_ACE size */
7186bf215546Sopenharmony_ci
7187bf215546Sopenharmony_ci   if (first_task) {
7188bf215546Sopenharmony_ci      /* Pass this as the IB to the shader for emulating firstTask in task shaders. */
7189bf215546Sopenharmony_ci      uint32_t fake_ib_dwords[2] = {x, first_task};
7190bf215546Sopenharmony_ci      unsigned fake_ib_offset;
7191bf215546Sopenharmony_ci      radv_cmd_buffer_upload_data(cmd_buffer, 8, fake_ib_dwords, &fake_ib_offset);
7192bf215546Sopenharmony_ci      fake_ib_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + fake_ib_offset;
7193bf215546Sopenharmony_ci   }
7194bf215546Sopenharmony_ci
7195bf215546Sopenharmony_ci   radv_emit_userdata_task(cmd_buffer, x, y, z, 0, first_task, fake_ib_va);
7196bf215546Sopenharmony_ci   radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
7197bf215546Sopenharmony_ci   radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
7198bf215546Sopenharmony_ci                                    cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
7199bf215546Sopenharmony_ci                                    ace_predication_size);
7200bf215546Sopenharmony_ci
7201bf215546Sopenharmony_ci   if (!view_mask) {
7202bf215546Sopenharmony_ci      radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, x, y, z);
7203bf215546Sopenharmony_ci      radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer);
7204bf215546Sopenharmony_ci   } else {
7205bf215546Sopenharmony_ci      u_foreach_bit (view, view_mask) {
7206bf215546Sopenharmony_ci         radv_emit_view_index(cmd_buffer, view);
7207bf215546Sopenharmony_ci         radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, x, y, z);
7208bf215546Sopenharmony_ci         radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer);
7209bf215546Sopenharmony_ci      }
7210bf215546Sopenharmony_ci   }
7211bf215546Sopenharmony_ci}
7212bf215546Sopenharmony_ci
7213bf215546Sopenharmony_cistatic void
7214bf215546Sopenharmony_ciradv_emit_indirect_taskmesh_draw_packets(struct radv_cmd_buffer *cmd_buffer,
7215bf215546Sopenharmony_ci                                         const struct radv_draw_info *info, uint64_t nv_ib_va,
7216bf215546Sopenharmony_ci                                         uint32_t nv_ib_stride)
7217bf215546Sopenharmony_ci{
7218bf215546Sopenharmony_ci   const uint32_t view_mask = cmd_buffer->state.subpass->view_mask;
7219bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
7220bf215546Sopenharmony_ci   const unsigned num_views = MAX2(1, util_bitcount(view_mask));
7221bf215546Sopenharmony_ci   unsigned ace_predication_size = num_views * 11; /* DISPATCH_TASKMESH_INDIRECT_MULTI_ACE size */
7222bf215546Sopenharmony_ci   struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs;
7223bf215546Sopenharmony_ci
7224bf215546Sopenharmony_ci   const uint64_t va =
7225bf215546Sopenharmony_ci      radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
7226bf215546Sopenharmony_ci   const uint64_t count_va = !info->count_buffer
7227bf215546Sopenharmony_ci                                ? 0
7228bf215546Sopenharmony_ci                                : radv_buffer_get_va(info->count_buffer->bo) +
7229bf215546Sopenharmony_ci                                     info->count_buffer->offset + info->count_buffer_offset;
7230bf215546Sopenharmony_ci   uint64_t workaround_cond_va = 0;
7231bf215546Sopenharmony_ci
7232bf215546Sopenharmony_ci   if (count_va) {
7233bf215546Sopenharmony_ci      radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->count_buffer->bo);
7234bf215546Sopenharmony_ci
7235bf215546Sopenharmony_ci      /* MEC firmware bug workaround.
7236bf215546Sopenharmony_ci       * When the count buffer contains zero, DISPATCH_TASKMESH_INDIRECT_MULTI_ACE hangs.
7237bf215546Sopenharmony_ci       * - We must ensure that DISPATCH_TASKMESH_INDIRECT_MULTI_ACE
7238bf215546Sopenharmony_ci       *   is only executed when the count buffer contains non-zero.
7239bf215546Sopenharmony_ci       * - Furthermore, we must also ensure that each DISPATCH_TASKMESH_GFX packet
7240bf215546Sopenharmony_ci       *   has a matching ACE packet.
7241bf215546Sopenharmony_ci       *
7242bf215546Sopenharmony_ci       * As a workaround:
7243bf215546Sopenharmony_ci       * - Reserve a dword in the upload buffer and initialize it to 1 for the workaround
7244bf215546Sopenharmony_ci       * - When count != 0, write 0 to the workaround BO and execute the indirect dispatch
7245bf215546Sopenharmony_ci       * - When workaround BO != 0 (count was 0), execute an empty direct dispatch
7246bf215546Sopenharmony_ci       */
7247bf215546Sopenharmony_ci
7248bf215546Sopenharmony_ci      uint32_t workaround_cond_init = 0;
7249bf215546Sopenharmony_ci      uint32_t workaround_cond_off;
7250bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_data(cmd_buffer, 4, &workaround_cond_init, &workaround_cond_off))
7251bf215546Sopenharmony_ci         cmd_buffer->record_result = VK_ERROR_OUT_OF_HOST_MEMORY;
7252bf215546Sopenharmony_ci
7253bf215546Sopenharmony_ci      workaround_cond_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + workaround_cond_off;
7254bf215546Sopenharmony_ci
7255bf215546Sopenharmony_ci      radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0));
7256bf215546Sopenharmony_ci      radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
7257bf215546Sopenharmony_ci                             COPY_DATA_WR_CONFIRM);
7258bf215546Sopenharmony_ci      radeon_emit(ace_cs, 1);
7259bf215546Sopenharmony_ci      radeon_emit(ace_cs, 0);
7260bf215546Sopenharmony_ci      radeon_emit(ace_cs, workaround_cond_va);
7261bf215546Sopenharmony_ci      radeon_emit(ace_cs, workaround_cond_va >> 32);
7262bf215546Sopenharmony_ci
7263bf215546Sopenharmony_ci      /* 2x COND_EXEC + 1x COPY_DATA + Nx DISPATCH_TASKMESH_DIRECT_ACE */
7264bf215546Sopenharmony_ci      ace_predication_size += 2 * 5 + 6 + 6 * num_views;
7265bf215546Sopenharmony_ci   }
7266bf215546Sopenharmony_ci
7267bf215546Sopenharmony_ci   radv_cs_add_buffer(ws, cmd_buffer->ace_internal.cs, info->indirect->bo);
7268bf215546Sopenharmony_ci   radv_emit_userdata_task_ib_only(cmd_buffer, nv_ib_va, nv_ib_stride);
7269bf215546Sopenharmony_ci   radv_emit_userdata_mesh_first_task_0_draw_id_0(cmd_buffer);
7270bf215546Sopenharmony_ci   radv_cs_emit_compute_predication(&cmd_buffer->state, cmd_buffer->ace_internal.cs,
7271bf215546Sopenharmony_ci                                    cmd_buffer->mec_inv_pred_va, &cmd_buffer->mec_inv_pred_emitted,
7272bf215546Sopenharmony_ci                                    ace_predication_size);
7273bf215546Sopenharmony_ci
7274bf215546Sopenharmony_ci   if (workaround_cond_va) {
7275bf215546Sopenharmony_ci      radeon_emit(ace_cs, PKT3(PKT3_COND_EXEC, 3, 0));
7276bf215546Sopenharmony_ci      radeon_emit(ace_cs, count_va);
7277bf215546Sopenharmony_ci      radeon_emit(ace_cs, count_va >> 32);
7278bf215546Sopenharmony_ci      radeon_emit(ace_cs, 0);
7279bf215546Sopenharmony_ci      radeon_emit(ace_cs,
7280bf215546Sopenharmony_ci                  6 + 11 * num_views); /* 1x COPY_DATA + Nx DISPATCH_TASKMESH_INDIRECT_MULTI_ACE */
7281bf215546Sopenharmony_ci
7282bf215546Sopenharmony_ci      radeon_emit(ace_cs, PKT3(PKT3_COPY_DATA, 4, 0));
7283bf215546Sopenharmony_ci      radeon_emit(ace_cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
7284bf215546Sopenharmony_ci                             COPY_DATA_WR_CONFIRM);
7285bf215546Sopenharmony_ci      radeon_emit(ace_cs, 0);
7286bf215546Sopenharmony_ci      radeon_emit(ace_cs, 0);
7287bf215546Sopenharmony_ci      radeon_emit(ace_cs, workaround_cond_va);
7288bf215546Sopenharmony_ci      radeon_emit(ace_cs, workaround_cond_va >> 32);
7289bf215546Sopenharmony_ci   }
7290bf215546Sopenharmony_ci
7291bf215546Sopenharmony_ci   if (!view_mask) {
7292bf215546Sopenharmony_ci      radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count,
7293bf215546Sopenharmony_ci                                                               count_va, info->stride);
7294bf215546Sopenharmony_ci      radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer);
7295bf215546Sopenharmony_ci   } else {
7296bf215546Sopenharmony_ci      u_foreach_bit (view, view_mask) {
7297bf215546Sopenharmony_ci         radv_emit_view_index(cmd_buffer, view);
7298bf215546Sopenharmony_ci         radv_cs_emit_dispatch_taskmesh_indirect_multi_ace_packet(cmd_buffer, va, info->count,
7299bf215546Sopenharmony_ci                                                                  count_va, info->stride);
7300bf215546Sopenharmony_ci         radv_cs_emit_dispatch_taskmesh_gfx_packet(cmd_buffer);
7301bf215546Sopenharmony_ci      }
7302bf215546Sopenharmony_ci   }
7303bf215546Sopenharmony_ci
7304bf215546Sopenharmony_ci   if (workaround_cond_va) {
7305bf215546Sopenharmony_ci      radeon_emit(ace_cs, PKT3(PKT3_COND_EXEC, 3, 0));
7306bf215546Sopenharmony_ci      radeon_emit(ace_cs, workaround_cond_va);
7307bf215546Sopenharmony_ci      radeon_emit(ace_cs, workaround_cond_va >> 32);
7308bf215546Sopenharmony_ci      radeon_emit(ace_cs, 0);
7309bf215546Sopenharmony_ci      radeon_emit(ace_cs, 6 * num_views); /* Nx DISPATCH_TASKMESH_DIRECT_ACE */
7310bf215546Sopenharmony_ci
7311bf215546Sopenharmony_ci      for (unsigned v = 0; v < num_views; ++v) {
7312bf215546Sopenharmony_ci         radv_cs_emit_dispatch_taskmesh_direct_ace_packet(cmd_buffer, 0, 0, 0);
7313bf215546Sopenharmony_ci      }
7314bf215546Sopenharmony_ci   }
7315bf215546Sopenharmony_ci}
7316bf215546Sopenharmony_ci
7317bf215546Sopenharmony_cistatic void
7318bf215546Sopenharmony_ciradv_emit_indirect_draw_packets(struct radv_cmd_buffer *cmd_buffer,
7319bf215546Sopenharmony_ci                                const struct radv_draw_info *info)
7320bf215546Sopenharmony_ci{
7321bf215546Sopenharmony_ci   const struct radv_cmd_state *state = &cmd_buffer->state;
7322bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
7323bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
7324bf215546Sopenharmony_ci   const uint64_t va =
7325bf215546Sopenharmony_ci      radv_buffer_get_va(info->indirect->bo) + info->indirect->offset + info->indirect_offset;
7326bf215546Sopenharmony_ci   const uint64_t count_va = info->count_buffer
7327bf215546Sopenharmony_ci                                ? radv_buffer_get_va(info->count_buffer->bo) +
7328bf215546Sopenharmony_ci                                     info->count_buffer->offset + info->count_buffer_offset
7329bf215546Sopenharmony_ci                                : 0;
7330bf215546Sopenharmony_ci
7331bf215546Sopenharmony_ci   radv_cs_add_buffer(ws, cs, info->indirect->bo);
7332bf215546Sopenharmony_ci
7333bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0));
7334bf215546Sopenharmony_ci   radeon_emit(cs, 1);
7335bf215546Sopenharmony_ci   radeon_emit(cs, va);
7336bf215546Sopenharmony_ci   radeon_emit(cs, va >> 32);
7337bf215546Sopenharmony_ci
7338bf215546Sopenharmony_ci   if (info->count_buffer) {
7339bf215546Sopenharmony_ci      radv_cs_add_buffer(ws, cs, info->count_buffer->bo);
7340bf215546Sopenharmony_ci   }
7341bf215546Sopenharmony_ci
7342bf215546Sopenharmony_ci   if (!state->subpass->view_mask) {
7343bf215546Sopenharmony_ci      radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
7344bf215546Sopenharmony_ci                                        info->stride);
7345bf215546Sopenharmony_ci   } else {
7346bf215546Sopenharmony_ci      u_foreach_bit(i, state->subpass->view_mask)
7347bf215546Sopenharmony_ci      {
7348bf215546Sopenharmony_ci         radv_emit_view_index(cmd_buffer, i);
7349bf215546Sopenharmony_ci
7350bf215546Sopenharmony_ci         radv_cs_emit_indirect_draw_packet(cmd_buffer, info->indexed, info->count, count_va,
7351bf215546Sopenharmony_ci                                           info->stride);
7352bf215546Sopenharmony_ci      }
7353bf215546Sopenharmony_ci   }
7354bf215546Sopenharmony_ci}
7355bf215546Sopenharmony_ci
7356bf215546Sopenharmony_ci/*
7357bf215546Sopenharmony_ci * Vega and raven have a bug which triggers if there are multiple context
7358bf215546Sopenharmony_ci * register contexts active at the same time with different scissor values.
7359bf215546Sopenharmony_ci *
7360bf215546Sopenharmony_ci * There are two possible workarounds:
7361bf215546Sopenharmony_ci * 1) Wait for PS_PARTIAL_FLUSH every time the scissor is changed. That way
7362bf215546Sopenharmony_ci *    there is only ever 1 active set of scissor values at the same time.
7363bf215546Sopenharmony_ci *
7364bf215546Sopenharmony_ci * 2) Whenever the hardware switches contexts we have to set the scissor
7365bf215546Sopenharmony_ci *    registers again even if it is a noop. That way the new context gets
7366bf215546Sopenharmony_ci *    the correct scissor values.
7367bf215546Sopenharmony_ci *
7368bf215546Sopenharmony_ci * This implements option 2. radv_need_late_scissor_emission needs to
7369bf215546Sopenharmony_ci * return true on affected HW if radv_emit_all_graphics_states sets
7370bf215546Sopenharmony_ci * any context registers.
7371bf215546Sopenharmony_ci */
7372bf215546Sopenharmony_cistatic bool
7373bf215546Sopenharmony_ciradv_need_late_scissor_emission(struct radv_cmd_buffer *cmd_buffer,
7374bf215546Sopenharmony_ci                                const struct radv_draw_info *info)
7375bf215546Sopenharmony_ci{
7376bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
7377bf215546Sopenharmony_ci
7378bf215546Sopenharmony_ci   if (!cmd_buffer->device->physical_device->rad_info.has_gfx9_scissor_bug)
7379bf215546Sopenharmony_ci      return false;
7380bf215546Sopenharmony_ci
7381bf215546Sopenharmony_ci   if (cmd_buffer->state.context_roll_without_scissor_emitted || info->strmout_buffer)
7382bf215546Sopenharmony_ci      return true;
7383bf215546Sopenharmony_ci
7384bf215546Sopenharmony_ci   uint64_t used_states =
7385bf215546Sopenharmony_ci      cmd_buffer->state.graphics_pipeline->needed_dynamic_state | ~RADV_CMD_DIRTY_DYNAMIC_ALL;
7386bf215546Sopenharmony_ci
7387bf215546Sopenharmony_ci   /* Index, vertex and streamout buffers don't change context regs, and
7388bf215546Sopenharmony_ci    * pipeline is already handled.
7389bf215546Sopenharmony_ci    */
7390bf215546Sopenharmony_ci   used_states &= ~(RADV_CMD_DIRTY_INDEX_BUFFER | RADV_CMD_DIRTY_VERTEX_BUFFER |
7391bf215546Sopenharmony_ci                    RADV_CMD_DIRTY_DYNAMIC_VERTEX_INPUT | RADV_CMD_DIRTY_STREAMOUT_BUFFER |
7392bf215546Sopenharmony_ci                    RADV_CMD_DIRTY_PIPELINE);
7393bf215546Sopenharmony_ci
7394bf215546Sopenharmony_ci   if (cmd_buffer->state.dirty & used_states)
7395bf215546Sopenharmony_ci      return true;
7396bf215546Sopenharmony_ci
7397bf215546Sopenharmony_ci   uint32_t primitive_reset_index = radv_get_primitive_reset_index(cmd_buffer);
7398bf215546Sopenharmony_ci
7399bf215546Sopenharmony_ci   if (info->indexed && state->dynamic.primitive_restart_enable &&
7400bf215546Sopenharmony_ci       primitive_reset_index != state->last_primitive_reset_index)
7401bf215546Sopenharmony_ci      return true;
7402bf215546Sopenharmony_ci
7403bf215546Sopenharmony_ci   return false;
7404bf215546Sopenharmony_ci}
7405bf215546Sopenharmony_ci
7406bf215546Sopenharmony_ciALWAYS_INLINE static bool
7407bf215546Sopenharmony_ciradv_skip_ngg_culling(bool has_tess, const unsigned vtx_cnt,
7408bf215546Sopenharmony_ci                      bool indirect)
7409bf215546Sopenharmony_ci{
7410bf215546Sopenharmony_ci   /* If we have to draw only a few vertices, we get better latency if
7411bf215546Sopenharmony_ci    * we disable NGG culling.
7412bf215546Sopenharmony_ci    *
7413bf215546Sopenharmony_ci    * When tessellation is used, what matters is the number of tessellated
7414bf215546Sopenharmony_ci    * vertices, so let's always assume it's not a small draw.
7415bf215546Sopenharmony_ci    */
7416bf215546Sopenharmony_ci   return !has_tess && !indirect && vtx_cnt < 128;
7417bf215546Sopenharmony_ci}
7418bf215546Sopenharmony_ci
7419bf215546Sopenharmony_ciALWAYS_INLINE static uint32_t
7420bf215546Sopenharmony_ciradv_get_ngg_culling_settings(struct radv_cmd_buffer *cmd_buffer, bool vp_y_inverted)
7421bf215546Sopenharmony_ci{
7422bf215546Sopenharmony_ci   const struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
7423bf215546Sopenharmony_ci   const struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
7424bf215546Sopenharmony_ci
7425bf215546Sopenharmony_ci   /* Cull every triangle when rasterizer discard is enabled. */
7426bf215546Sopenharmony_ci   if (d->rasterizer_discard_enable ||
7427bf215546Sopenharmony_ci       G_028810_DX_RASTERIZATION_KILL(cmd_buffer->state.graphics_pipeline->pa_cl_clip_cntl))
7428bf215546Sopenharmony_ci      return radv_nggc_front_face | radv_nggc_back_face;
7429bf215546Sopenharmony_ci
7430bf215546Sopenharmony_ci   uint32_t pa_su_sc_mode_cntl = cmd_buffer->state.graphics_pipeline->pa_su_sc_mode_cntl;
7431bf215546Sopenharmony_ci   uint32_t nggc_settings = radv_nggc_none;
7432bf215546Sopenharmony_ci
7433bf215546Sopenharmony_ci   /* The culling code needs to know whether face is CW or CCW. */
7434bf215546Sopenharmony_ci   bool ccw = (pipeline->needed_dynamic_state & RADV_DYNAMIC_FRONT_FACE)
7435bf215546Sopenharmony_ci              ? d->front_face == VK_FRONT_FACE_COUNTER_CLOCKWISE
7436bf215546Sopenharmony_ci              : G_028814_FACE(pa_su_sc_mode_cntl) == 0;
7437bf215546Sopenharmony_ci
7438bf215546Sopenharmony_ci   /* Take inverted viewport into account. */
7439bf215546Sopenharmony_ci   ccw ^= vp_y_inverted;
7440bf215546Sopenharmony_ci
7441bf215546Sopenharmony_ci   if (ccw)
7442bf215546Sopenharmony_ci      nggc_settings |= radv_nggc_face_is_ccw;
7443bf215546Sopenharmony_ci
7444bf215546Sopenharmony_ci   /* Face culling settings. */
7445bf215546Sopenharmony_ci   if ((pipeline->needed_dynamic_state & RADV_DYNAMIC_CULL_MODE)
7446bf215546Sopenharmony_ci         ? (d->cull_mode & VK_CULL_MODE_FRONT_BIT)
7447bf215546Sopenharmony_ci         : G_028814_CULL_FRONT(pa_su_sc_mode_cntl))
7448bf215546Sopenharmony_ci      nggc_settings |= radv_nggc_front_face;
7449bf215546Sopenharmony_ci   if ((pipeline->needed_dynamic_state & RADV_DYNAMIC_CULL_MODE)
7450bf215546Sopenharmony_ci         ? (d->cull_mode & VK_CULL_MODE_BACK_BIT)
7451bf215546Sopenharmony_ci         : G_028814_CULL_BACK(pa_su_sc_mode_cntl))
7452bf215546Sopenharmony_ci      nggc_settings |= radv_nggc_back_face;
7453bf215546Sopenharmony_ci
7454bf215546Sopenharmony_ci   /* Small primitive culling is only valid when conservative overestimation is not used. It's also
7455bf215546Sopenharmony_ci    * disabled for user sample locations because small primitive culling assumes a sample
7456bf215546Sopenharmony_ci    * position at (0.5, 0.5). */
7457bf215546Sopenharmony_ci   if (!pipeline->uses_conservative_overestimate && !pipeline->uses_user_sample_locations) {
7458bf215546Sopenharmony_ci      nggc_settings |= radv_nggc_small_primitives;
7459bf215546Sopenharmony_ci
7460bf215546Sopenharmony_ci      /* small_prim_precision = num_samples / 2^subpixel_bits
7461bf215546Sopenharmony_ci       * num_samples is also always a power of two, so the small prim precision can only be
7462bf215546Sopenharmony_ci       * a power of two between 2^-2 and 2^-6, therefore it's enough to remember the exponent.
7463bf215546Sopenharmony_ci       */
7464bf215546Sopenharmony_ci      unsigned subpixel_bits = 256;
7465bf215546Sopenharmony_ci      int32_t small_prim_precision_log2 = util_logbase2(pipeline->ms.num_samples) - util_logbase2(subpixel_bits);
7466bf215546Sopenharmony_ci      nggc_settings |= ((uint32_t) small_prim_precision_log2 << 24u);
7467bf215546Sopenharmony_ci   }
7468bf215546Sopenharmony_ci
7469bf215546Sopenharmony_ci   return nggc_settings;
7470bf215546Sopenharmony_ci}
7471bf215546Sopenharmony_ci
7472bf215546Sopenharmony_cistatic void
7473bf215546Sopenharmony_ciradv_emit_ngg_culling_state(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *draw_info)
7474bf215546Sopenharmony_ci{
7475bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
7476bf215546Sopenharmony_ci   const unsigned stage = pipeline->last_vgt_api_stage;
7477bf215546Sopenharmony_ci   const bool nggc_supported = pipeline->has_ngg_culling;
7478bf215546Sopenharmony_ci
7479bf215546Sopenharmony_ci   if (!nggc_supported && !cmd_buffer->state.last_nggc_settings) {
7480bf215546Sopenharmony_ci      /* Current shader doesn't support culling and culling was already disabled:
7481bf215546Sopenharmony_ci       * No further steps needed, just remember the SGPR's location is not set.
7482bf215546Sopenharmony_ci       */
7483bf215546Sopenharmony_ci      cmd_buffer->state.last_nggc_settings_sgpr_idx = -1;
7484bf215546Sopenharmony_ci      return;
7485bf215546Sopenharmony_ci   }
7486bf215546Sopenharmony_ci
7487bf215546Sopenharmony_ci   /* Check dirty flags:
7488bf215546Sopenharmony_ci    * - Dirty pipeline: SGPR index may have changed (we have to re-emit if changed).
7489bf215546Sopenharmony_ci    * - Dirty dynamic flags: culling settings may have changed.
7490bf215546Sopenharmony_ci    */
7491bf215546Sopenharmony_ci   const bool dirty =
7492bf215546Sopenharmony_ci      cmd_buffer->state.dirty &
7493bf215546Sopenharmony_ci      (RADV_CMD_DIRTY_PIPELINE |
7494bf215546Sopenharmony_ci       RADV_CMD_DIRTY_DYNAMIC_CULL_MODE | RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE |
7495bf215546Sopenharmony_ci       RADV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE | RADV_CMD_DIRTY_DYNAMIC_VIEWPORT);
7496bf215546Sopenharmony_ci
7497bf215546Sopenharmony_ci   /* Check small draw status:
7498bf215546Sopenharmony_ci    * For small draw calls, we disable culling by setting the SGPR to 0.
7499bf215546Sopenharmony_ci    */
7500bf215546Sopenharmony_ci   const bool skip =
7501bf215546Sopenharmony_ci      radv_skip_ngg_culling(stage == MESA_SHADER_TESS_EVAL, draw_info->count, draw_info->indirect);
7502bf215546Sopenharmony_ci
7503bf215546Sopenharmony_ci   /* See if anything changed. */
7504bf215546Sopenharmony_ci   if (!dirty && skip == cmd_buffer->state.last_nggc_skip)
7505bf215546Sopenharmony_ci      return;
7506bf215546Sopenharmony_ci
7507bf215546Sopenharmony_ci   /* Remember small draw state. */
7508bf215546Sopenharmony_ci   cmd_buffer->state.last_nggc_skip = skip;
7509bf215546Sopenharmony_ci   const struct radv_shader *v = pipeline->base.shaders[stage];
7510bf215546Sopenharmony_ci   assert(v->info.has_ngg_culling == nggc_supported);
7511bf215546Sopenharmony_ci
7512bf215546Sopenharmony_ci   /* Find the user SGPR. */
7513bf215546Sopenharmony_ci   const uint32_t base_reg = pipeline->base.user_data_0[stage];
7514bf215546Sopenharmony_ci   const int8_t nggc_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_CULLING_SETTINGS].sgpr_idx;
7515bf215546Sopenharmony_ci   assert(!nggc_supported || nggc_sgpr_idx != -1);
7516bf215546Sopenharmony_ci
7517bf215546Sopenharmony_ci   /* Get viewport transform. */
7518bf215546Sopenharmony_ci   float vp_scale[2], vp_translate[2];
7519bf215546Sopenharmony_ci   memcpy(vp_scale, cmd_buffer->state.dynamic.viewport.xform[0].scale, 2 * sizeof(float));
7520bf215546Sopenharmony_ci   memcpy(vp_translate, cmd_buffer->state.dynamic.viewport.xform[0].translate, 2 * sizeof(float));
7521bf215546Sopenharmony_ci   bool vp_y_inverted = (-vp_scale[1] + vp_translate[1]) > (vp_scale[1] + vp_translate[1]);
7522bf215546Sopenharmony_ci
7523bf215546Sopenharmony_ci   /* Get current culling settings. */
7524bf215546Sopenharmony_ci   uint32_t nggc_settings = nggc_supported && !skip
7525bf215546Sopenharmony_ci                            ? radv_get_ngg_culling_settings(cmd_buffer, vp_y_inverted)
7526bf215546Sopenharmony_ci                            : radv_nggc_none;
7527bf215546Sopenharmony_ci
7528bf215546Sopenharmony_ci   bool emit_viewport = nggc_settings &&
7529bf215546Sopenharmony_ci                        (cmd_buffer->state.dirty & RADV_CMD_DIRTY_DYNAMIC_VIEWPORT ||
7530bf215546Sopenharmony_ci                         cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx ||
7531bf215546Sopenharmony_ci                         !cmd_buffer->state.last_nggc_settings);
7532bf215546Sopenharmony_ci
7533bf215546Sopenharmony_ci   if (emit_viewport) {
7534bf215546Sopenharmony_ci      /* Correction for inverted Y */
7535bf215546Sopenharmony_ci      if (vp_y_inverted) {
7536bf215546Sopenharmony_ci         vp_scale[1] = -vp_scale[1];
7537bf215546Sopenharmony_ci         vp_translate[1] = -vp_translate[1];
7538bf215546Sopenharmony_ci      }
7539bf215546Sopenharmony_ci
7540bf215546Sopenharmony_ci      /* Correction for number of samples per pixel. */
7541bf215546Sopenharmony_ci      for (unsigned i = 0; i < 2; ++i) {
7542bf215546Sopenharmony_ci         vp_scale[i] *= (float) pipeline->ms.num_samples;
7543bf215546Sopenharmony_ci         vp_translate[i] *= (float) pipeline->ms.num_samples;
7544bf215546Sopenharmony_ci      }
7545bf215546Sopenharmony_ci
7546bf215546Sopenharmony_ci      uint32_t vp_reg_values[4] = {fui(vp_scale[0]), fui(vp_scale[1]), fui(vp_translate[0]), fui(vp_translate[1])};
7547bf215546Sopenharmony_ci      const int8_t vp_sgpr_idx = v->info.user_sgprs_locs.shader_data[AC_UD_NGG_VIEWPORT].sgpr_idx;
7548bf215546Sopenharmony_ci      assert(vp_sgpr_idx != -1);
7549bf215546Sopenharmony_ci      radeon_set_sh_reg_seq(cmd_buffer->cs, base_reg + vp_sgpr_idx * 4, 4);
7550bf215546Sopenharmony_ci      radeon_emit_array(cmd_buffer->cs, vp_reg_values, 4);
7551bf215546Sopenharmony_ci   }
7552bf215546Sopenharmony_ci
7553bf215546Sopenharmony_ci   bool emit_settings = nggc_supported &&
7554bf215546Sopenharmony_ci                        (cmd_buffer->state.last_nggc_settings != nggc_settings ||
7555bf215546Sopenharmony_ci                         cmd_buffer->state.last_nggc_settings_sgpr_idx != nggc_sgpr_idx);
7556bf215546Sopenharmony_ci
7557bf215546Sopenharmony_ci   /* This needs to be emitted when culling is turned on
7558bf215546Sopenharmony_ci    * and when it's already on but some settings change.
7559bf215546Sopenharmony_ci    */
7560bf215546Sopenharmony_ci   if (emit_settings) {
7561bf215546Sopenharmony_ci      assert(nggc_sgpr_idx >= 0);
7562bf215546Sopenharmony_ci      radeon_set_sh_reg(cmd_buffer->cs, base_reg + nggc_sgpr_idx * 4, nggc_settings);
7563bf215546Sopenharmony_ci   }
7564bf215546Sopenharmony_ci
7565bf215546Sopenharmony_ci   /* These only need to be emitted when culling is turned on or off,
7566bf215546Sopenharmony_ci    * but not when it stays on and just some settings change.
7567bf215546Sopenharmony_ci    */
7568bf215546Sopenharmony_ci   if (!!cmd_buffer->state.last_nggc_settings != !!nggc_settings) {
7569bf215546Sopenharmony_ci      uint32_t rsrc2 = v->config.rsrc2;
7570bf215546Sopenharmony_ci
7571bf215546Sopenharmony_ci      if (!nggc_settings) {
7572bf215546Sopenharmony_ci         /* Allocate less LDS when culling is disabled. (But GS always needs it.) */
7573bf215546Sopenharmony_ci         if (stage != MESA_SHADER_GEOMETRY)
7574bf215546Sopenharmony_ci            rsrc2 = (rsrc2 & C_00B22C_LDS_SIZE) | S_00B22C_LDS_SIZE(v->info.num_lds_blocks_when_not_culling);
7575bf215546Sopenharmony_ci      }
7576bf215546Sopenharmony_ci
7577bf215546Sopenharmony_ci      /* When the pipeline is dirty and not yet emitted, don't write it here
7578bf215546Sopenharmony_ci       * because radv_emit_graphics_pipeline will overwrite this register.
7579bf215546Sopenharmony_ci       */
7580bf215546Sopenharmony_ci      if (!(cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) ||
7581bf215546Sopenharmony_ci          cmd_buffer->state.emitted_graphics_pipeline == pipeline) {
7582bf215546Sopenharmony_ci         radeon_set_sh_reg(cmd_buffer->cs, R_00B22C_SPI_SHADER_PGM_RSRC2_GS, rsrc2);
7583bf215546Sopenharmony_ci      }
7584bf215546Sopenharmony_ci   }
7585bf215546Sopenharmony_ci
7586bf215546Sopenharmony_ci   cmd_buffer->state.last_nggc_settings = nggc_settings;
7587bf215546Sopenharmony_ci   cmd_buffer->state.last_nggc_settings_sgpr_idx = nggc_sgpr_idx;
7588bf215546Sopenharmony_ci}
7589bf215546Sopenharmony_ci
7590bf215546Sopenharmony_cistatic void
7591bf215546Sopenharmony_ciradv_emit_all_graphics_states(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
7592bf215546Sopenharmony_ci                              bool pipeline_is_dirty)
7593bf215546Sopenharmony_ci{
7594bf215546Sopenharmony_ci   bool late_scissor_emission;
7595bf215546Sopenharmony_ci
7596bf215546Sopenharmony_ci   if ((cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER) ||
7597bf215546Sopenharmony_ci       cmd_buffer->state.emitted_graphics_pipeline != cmd_buffer->state.graphics_pipeline)
7598bf215546Sopenharmony_ci      radv_emit_rbplus_state(cmd_buffer);
7599bf215546Sopenharmony_ci
7600bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->use_ngg_culling &&
7601bf215546Sopenharmony_ci       cmd_buffer->state.graphics_pipeline->is_ngg)
7602bf215546Sopenharmony_ci      radv_emit_ngg_culling_state(cmd_buffer, info);
7603bf215546Sopenharmony_ci
7604bf215546Sopenharmony_ci   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE)
7605bf215546Sopenharmony_ci      radv_emit_graphics_pipeline(cmd_buffer);
7606bf215546Sopenharmony_ci
7607bf215546Sopenharmony_ci   /* This should be before the cmd_buffer->state.dirty is cleared
7608bf215546Sopenharmony_ci    * (excluding RADV_CMD_DIRTY_PIPELINE) and after
7609bf215546Sopenharmony_ci    * cmd_buffer->state.context_roll_without_scissor_emitted is set. */
7610bf215546Sopenharmony_ci   late_scissor_emission = radv_need_late_scissor_emission(cmd_buffer, info);
7611bf215546Sopenharmony_ci
7612bf215546Sopenharmony_ci   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
7613bf215546Sopenharmony_ci      radv_emit_framebuffer_state(cmd_buffer);
7614bf215546Sopenharmony_ci
7615bf215546Sopenharmony_ci   if (info->indexed) {
7616bf215546Sopenharmony_ci      if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_INDEX_BUFFER)
7617bf215546Sopenharmony_ci         radv_emit_index_buffer(cmd_buffer, info->indirect);
7618bf215546Sopenharmony_ci   } else {
7619bf215546Sopenharmony_ci      /* On GFX7 and later, non-indexed draws overwrite VGT_INDEX_TYPE,
7620bf215546Sopenharmony_ci       * so the state must be re-emitted before the next indexed
7621bf215546Sopenharmony_ci       * draw.
7622bf215546Sopenharmony_ci       */
7623bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
7624bf215546Sopenharmony_ci         cmd_buffer->state.last_index_type = -1;
7625bf215546Sopenharmony_ci         cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
7626bf215546Sopenharmony_ci      }
7627bf215546Sopenharmony_ci   }
7628bf215546Sopenharmony_ci
7629bf215546Sopenharmony_ci   if (cmd_buffer->device->force_vrs != RADV_FORCE_VRS_1x1) {
7630bf215546Sopenharmony_ci      struct radv_dynamic_state *d = &cmd_buffer->state.dynamic;
7631bf215546Sopenharmony_ci      uint64_t dynamic_states =
7632bf215546Sopenharmony_ci         cmd_buffer->state.dirty & cmd_buffer->state.emitted_graphics_pipeline->needed_dynamic_state;
7633bf215546Sopenharmony_ci
7634bf215546Sopenharmony_ci      if ((dynamic_states & RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE) &&
7635bf215546Sopenharmony_ci          d->fragment_shading_rate.size.width == 1 &&
7636bf215546Sopenharmony_ci          d->fragment_shading_rate.size.height == 1 &&
7637bf215546Sopenharmony_ci          d->fragment_shading_rate.combiner_ops[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&
7638bf215546Sopenharmony_ci          d->fragment_shading_rate.combiner_ops[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR) {
7639bf215546Sopenharmony_ci         /* When per-vertex VRS is forced and the dynamic fragment shading rate is a no-op, ignore
7640bf215546Sopenharmony_ci          * it. This is needed for vkd3d-proton because it always declares per-draw VRS as dynamic.
7641bf215546Sopenharmony_ci          */
7642bf215546Sopenharmony_ci         cmd_buffer->state.dirty &= ~RADV_CMD_DIRTY_DYNAMIC_FRAGMENT_SHADING_RATE;
7643bf215546Sopenharmony_ci      }
7644bf215546Sopenharmony_ci   }
7645bf215546Sopenharmony_ci
7646bf215546Sopenharmony_ci   radv_cmd_buffer_flush_dynamic_state(cmd_buffer, pipeline_is_dirty);
7647bf215546Sopenharmony_ci
7648bf215546Sopenharmony_ci   radv_emit_draw_registers(cmd_buffer, info);
7649bf215546Sopenharmony_ci
7650bf215546Sopenharmony_ci   if (late_scissor_emission)
7651bf215546Sopenharmony_ci      radv_emit_scissor(cmd_buffer);
7652bf215546Sopenharmony_ci}
7653bf215546Sopenharmony_ci
7654bf215546Sopenharmony_ci/* MUST inline this function to avoid massive perf loss in drawoverhead */
7655bf215546Sopenharmony_ciALWAYS_INLINE static bool
7656bf215546Sopenharmony_ciradv_before_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info, uint32_t drawCount)
7657bf215546Sopenharmony_ci{
7658bf215546Sopenharmony_ci   const bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
7659bf215546Sopenharmony_ci   const bool pipeline_is_dirty = (cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE) &&
7660bf215546Sopenharmony_ci                                  cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline;
7661bf215546Sopenharmony_ci
7662bf215546Sopenharmony_ci   ASSERTED const unsigned cdw_max =
7663bf215546Sopenharmony_ci      radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 4096 + 128 * (drawCount - 1));
7664bf215546Sopenharmony_ci
7665bf215546Sopenharmony_ci   if (likely(!info->indirect)) {
7666bf215546Sopenharmony_ci      /* GFX6-GFX7 treat instance_count==0 as instance_count==1. There is
7667bf215546Sopenharmony_ci       * no workaround for indirect draws, but we can at least skip
7668bf215546Sopenharmony_ci       * direct draws.
7669bf215546Sopenharmony_ci       */
7670bf215546Sopenharmony_ci      if (unlikely(!info->instance_count))
7671bf215546Sopenharmony_ci         return false;
7672bf215546Sopenharmony_ci
7673bf215546Sopenharmony_ci      /* Handle count == 0. */
7674bf215546Sopenharmony_ci      if (unlikely(!info->count && !info->strmout_buffer))
7675bf215546Sopenharmony_ci         return false;
7676bf215546Sopenharmony_ci   }
7677bf215546Sopenharmony_ci
7678bf215546Sopenharmony_ci   /* Need to apply this workaround early as it can set flush flags. */
7679bf215546Sopenharmony_ci   if (cmd_buffer->state.dirty & RADV_CMD_DIRTY_FRAMEBUFFER)
7680bf215546Sopenharmony_ci      radv_emit_fb_mip_change_flush(cmd_buffer);
7681bf215546Sopenharmony_ci
7682bf215546Sopenharmony_ci   /* Use optimal packet order based on whether we need to sync the
7683bf215546Sopenharmony_ci    * pipeline.
7684bf215546Sopenharmony_ci    */
7685bf215546Sopenharmony_ci   if (cmd_buffer->state.flush_bits &
7686bf215546Sopenharmony_ci       (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
7687bf215546Sopenharmony_ci        RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
7688bf215546Sopenharmony_ci      /* If we have to wait for idle, set all states first, so that
7689bf215546Sopenharmony_ci       * all SET packets are processed in parallel with previous draw
7690bf215546Sopenharmony_ci       * calls. Then upload descriptors, set shader pointers, and
7691bf215546Sopenharmony_ci       * draw, and prefetch at the end. This ensures that the time
7692bf215546Sopenharmony_ci       * the CUs are idle is very short. (there are only SET_SH
7693bf215546Sopenharmony_ci       * packets between the wait and the draw)
7694bf215546Sopenharmony_ci       */
7695bf215546Sopenharmony_ci      radv_emit_all_graphics_states(cmd_buffer, info, pipeline_is_dirty);
7696bf215546Sopenharmony_ci      si_emit_cache_flush(cmd_buffer);
7697bf215546Sopenharmony_ci      /* <-- CUs are idle here --> */
7698bf215546Sopenharmony_ci
7699bf215546Sopenharmony_ci      radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
7700bf215546Sopenharmony_ci   } else {
7701bf215546Sopenharmony_ci      /* If we don't wait for idle, start prefetches first, then set
7702bf215546Sopenharmony_ci       * states, and draw at the end.
7703bf215546Sopenharmony_ci       */
7704bf215546Sopenharmony_ci      si_emit_cache_flush(cmd_buffer);
7705bf215546Sopenharmony_ci
7706bf215546Sopenharmony_ci      if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
7707bf215546Sopenharmony_ci         /* Only prefetch the vertex shader and VBO descriptors
7708bf215546Sopenharmony_ci          * in order to start the draw as soon as possible.
7709bf215546Sopenharmony_ci          */
7710bf215546Sopenharmony_ci         radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.graphics_pipeline, true);
7711bf215546Sopenharmony_ci      }
7712bf215546Sopenharmony_ci
7713bf215546Sopenharmony_ci      radv_upload_graphics_shader_descriptors(cmd_buffer, pipeline_is_dirty);
7714bf215546Sopenharmony_ci
7715bf215546Sopenharmony_ci      radv_emit_all_graphics_states(cmd_buffer, info, pipeline_is_dirty);
7716bf215546Sopenharmony_ci   }
7717bf215546Sopenharmony_ci
7718bf215546Sopenharmony_ci   radv_describe_draw(cmd_buffer);
7719bf215546Sopenharmony_ci   if (likely(!info->indirect)) {
7720bf215546Sopenharmony_ci      struct radv_cmd_state *state = &cmd_buffer->state;
7721bf215546Sopenharmony_ci      struct radeon_cmdbuf *cs = cmd_buffer->cs;
7722bf215546Sopenharmony_ci      assert(state->graphics_pipeline->vtx_base_sgpr);
7723bf215546Sopenharmony_ci      if (state->last_num_instances != info->instance_count) {
7724bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_NUM_INSTANCES, 0, false));
7725bf215546Sopenharmony_ci         radeon_emit(cs, info->instance_count);
7726bf215546Sopenharmony_ci         state->last_num_instances = info->instance_count;
7727bf215546Sopenharmony_ci      }
7728bf215546Sopenharmony_ci   }
7729bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
7730bf215546Sopenharmony_ci
7731bf215546Sopenharmony_ci   return true;
7732bf215546Sopenharmony_ci}
7733bf215546Sopenharmony_ci
7734bf215546Sopenharmony_ciALWAYS_INLINE static bool
7735bf215546Sopenharmony_ciradv_before_taskmesh_draw(struct radv_cmd_buffer *cmd_buffer, const struct radv_draw_info *info,
7736bf215546Sopenharmony_ci                          uint32_t drawCount)
7737bf215546Sopenharmony_ci{
7738bf215546Sopenharmony_ci   struct radv_descriptor_state *descriptors_state =
7739bf215546Sopenharmony_ci      radv_get_descriptors_state(cmd_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS);
7740bf215546Sopenharmony_ci   const bool pipeline_is_dirty =
7741bf215546Sopenharmony_ci      cmd_buffer->state.dirty & RADV_CMD_DIRTY_PIPELINE &&
7742bf215546Sopenharmony_ci      cmd_buffer->state.graphics_pipeline != cmd_buffer->state.emitted_graphics_pipeline;
7743bf215546Sopenharmony_ci   const bool push_dirty = descriptors_state->push_dirty;
7744bf215546Sopenharmony_ci   const uint32_t desc_dirty = descriptors_state->dirty;
7745bf215546Sopenharmony_ci
7746bf215546Sopenharmony_ci   const bool gfx_result = radv_before_draw(cmd_buffer, info, drawCount);
7747bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
7748bf215546Sopenharmony_ci   struct radv_shader *task_shader = radv_get_shader(&pipeline->base, MESA_SHADER_TASK);
7749bf215546Sopenharmony_ci
7750bf215546Sopenharmony_ci   /* If there is no task shader, no need to do anything special. */
7751bf215546Sopenharmony_ci   if (!task_shader)
7752bf215546Sopenharmony_ci      return gfx_result;
7753bf215546Sopenharmony_ci
7754bf215546Sopenharmony_ci   /* Need to check the count even for indirect draws to work around
7755bf215546Sopenharmony_ci    * an issue with DISPATCH_TASKMESH_INDIRECT_MULTI_ACE.
7756bf215546Sopenharmony_ci    */
7757bf215546Sopenharmony_ci   if (!info->count || !gfx_result)
7758bf215546Sopenharmony_ci      return false;
7759bf215546Sopenharmony_ci
7760bf215546Sopenharmony_ci   const bool need_task_semaphore = radv_flush_gfx2ace_semaphore(cmd_buffer);
7761bf215546Sopenharmony_ci   struct radv_physical_device *pdevice = cmd_buffer->device->physical_device;
7762bf215546Sopenharmony_ci   struct radeon_cmdbuf *ace_cs = cmd_buffer->ace_internal.cs;
7763bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
7764bf215546Sopenharmony_ci
7765bf215546Sopenharmony_ci   assert(ace_cs);
7766bf215546Sopenharmony_ci   ASSERTED const unsigned ace_cdw_max =
7767bf215546Sopenharmony_ci      radeon_check_space(ws, ace_cs, 4096 + 128 * (drawCount - 1));
7768bf215546Sopenharmony_ci
7769bf215546Sopenharmony_ci   if (need_task_semaphore)
7770bf215546Sopenharmony_ci      radv_wait_gfx2ace_semaphore(cmd_buffer);
7771bf215546Sopenharmony_ci
7772bf215546Sopenharmony_ci   if (pipeline_is_dirty) {
7773bf215546Sopenharmony_ci      radv_pipeline_emit_hw_cs(pdevice, ace_cs, task_shader);
7774bf215546Sopenharmony_ci      radv_pipeline_emit_compute_state(pdevice, ace_cs, task_shader);
7775bf215546Sopenharmony_ci   }
7776bf215546Sopenharmony_ci
7777bf215546Sopenharmony_ci   radv_ace_internal_cache_flush(cmd_buffer);
7778bf215546Sopenharmony_ci
7779bf215546Sopenharmony_ci   /* Restore dirty state of descriptors
7780bf215546Sopenharmony_ci    * They were marked non-dirty in radv_before_draw,
7781bf215546Sopenharmony_ci    * but they need to be re-emitted now to the ACE cmdbuf.
7782bf215546Sopenharmony_ci    */
7783bf215546Sopenharmony_ci   descriptors_state->push_dirty = push_dirty;
7784bf215546Sopenharmony_ci   descriptors_state->dirty = desc_dirty;
7785bf215546Sopenharmony_ci
7786bf215546Sopenharmony_ci   /* Flush descriptors and push constants for task shaders. */
7787bf215546Sopenharmony_ci   radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_TASK_BIT_NV, &pipeline->base,
7788bf215546Sopenharmony_ci                          VK_PIPELINE_BIND_POINT_GRAPHICS);
7789bf215546Sopenharmony_ci   radv_flush_constants(cmd_buffer, VK_SHADER_STAGE_TASK_BIT_NV, &pipeline->base,
7790bf215546Sopenharmony_ci                        VK_PIPELINE_BIND_POINT_GRAPHICS);
7791bf215546Sopenharmony_ci
7792bf215546Sopenharmony_ci   assert(ace_cs->cdw <= ace_cdw_max);
7793bf215546Sopenharmony_ci   return true;
7794bf215546Sopenharmony_ci}
7795bf215546Sopenharmony_ci
7796bf215546Sopenharmony_cistatic void
7797bf215546Sopenharmony_ciradv_after_draw(struct radv_cmd_buffer *cmd_buffer)
7798bf215546Sopenharmony_ci{
7799bf215546Sopenharmony_ci   const struct radeon_info *rad_info = &cmd_buffer->device->physical_device->rad_info;
7800bf215546Sopenharmony_ci   bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
7801bf215546Sopenharmony_ci   /* Start prefetches after the draw has been started. Both will
7802bf215546Sopenharmony_ci    * run in parallel, but starting the draw first is more
7803bf215546Sopenharmony_ci    * important.
7804bf215546Sopenharmony_ci    */
7805bf215546Sopenharmony_ci   if (has_prefetch && cmd_buffer->state.prefetch_L2_mask) {
7806bf215546Sopenharmony_ci      radv_emit_prefetch_L2(cmd_buffer, cmd_buffer->state.graphics_pipeline, false);
7807bf215546Sopenharmony_ci   }
7808bf215546Sopenharmony_ci
7809bf215546Sopenharmony_ci   /* Workaround for a VGT hang when streamout is enabled.
7810bf215546Sopenharmony_ci    * It must be done after drawing.
7811bf215546Sopenharmony_ci    */
7812bf215546Sopenharmony_ci   if (radv_is_streamout_enabled(cmd_buffer) &&
7813bf215546Sopenharmony_ci       (rad_info->family == CHIP_HAWAII || rad_info->family == CHIP_TONGA ||
7814bf215546Sopenharmony_ci        rad_info->family == CHIP_FIJI)) {
7815bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VGT_STREAMOUT_SYNC;
7816bf215546Sopenharmony_ci   }
7817bf215546Sopenharmony_ci
7818bf215546Sopenharmony_ci   radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_PS_PARTIAL_FLUSH);
7819bf215546Sopenharmony_ci}
7820bf215546Sopenharmony_ci
7821bf215546Sopenharmony_cistatic struct radv_buffer
7822bf215546Sopenharmony_ciradv_nv_mesh_indirect_bo(struct radv_cmd_buffer *cmd_buffer,
7823bf215546Sopenharmony_ci                         struct radv_buffer *buffer, VkDeviceSize offset,
7824bf215546Sopenharmony_ci                         uint32_t draw_count, uint32_t stride)
7825bf215546Sopenharmony_ci{
7826bf215546Sopenharmony_ci   /* Translates the indirect BO format used by NV_mesh_shader API
7827bf215546Sopenharmony_ci    * to the BO format used by DRAW_INDIRECT / DRAW_INDIRECT_MULTI.
7828bf215546Sopenharmony_ci    */
7829bf215546Sopenharmony_ci
7830bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
7831bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
7832bf215546Sopenharmony_ci
7833bf215546Sopenharmony_ci   const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
7834bf215546Sopenharmony_ci   const size_t dst_stride = sizeof(VkDrawIndirectCommand);
7835bf215546Sopenharmony_ci   const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount);
7836bf215546Sopenharmony_ci   const size_t src_off_first_task = offsetof(VkDrawMeshTasksIndirectCommandNV, firstTask);
7837bf215546Sopenharmony_ci   const size_t dst_off_vertex_count = offsetof(VkDrawIndirectCommand, vertexCount);
7838bf215546Sopenharmony_ci   const size_t dst_off_first_vertex = offsetof(VkDrawIndirectCommand, firstVertex);
7839bf215546Sopenharmony_ci
7840bf215546Sopenharmony_ci   /* Fill the buffer with all zeroes except instanceCount = 1.
7841bf215546Sopenharmony_ci    * This helps emit fewer copy packets below.
7842bf215546Sopenharmony_ci    */
7843bf215546Sopenharmony_ci   VkDrawIndirectCommand *fill_data = (VkDrawIndirectCommand *) alloca(dst_stride * draw_count);
7844bf215546Sopenharmony_ci   const VkDrawIndirectCommand filler = { .instanceCount = 1 };
7845bf215546Sopenharmony_ci   for (unsigned i = 0; i < draw_count; ++i)
7846bf215546Sopenharmony_ci      fill_data[i] = filler;
7847bf215546Sopenharmony_ci
7848bf215546Sopenharmony_ci   /* We'll have to copy data from the API BO. */
7849bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
7850bf215546Sopenharmony_ci   radv_cs_add_buffer(ws, cs, buffer->bo);
7851bf215546Sopenharmony_ci
7852bf215546Sopenharmony_ci   /* Allocate some space in the upload BO. */
7853bf215546Sopenharmony_ci   unsigned out_offset;
7854bf215546Sopenharmony_ci   radv_cmd_buffer_upload_data(cmd_buffer, dst_stride * draw_count, fill_data, &out_offset);
7855bf215546Sopenharmony_ci   const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset;
7856bf215546Sopenharmony_ci
7857bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 12 * draw_count + 2);
7858bf215546Sopenharmony_ci
7859bf215546Sopenharmony_ci   /* Copy data from the API BO so that the format is suitable for the
7860bf215546Sopenharmony_ci    * indirect draw packet:
7861bf215546Sopenharmony_ci    * - vertexCount = taskCount (copied here)
7862bf215546Sopenharmony_ci    * - instanceCount = 1 (filled by CPU above)
7863bf215546Sopenharmony_ci    * - firstVertex = firstTask (copied here)
7864bf215546Sopenharmony_ci    * - firstInstance = 0 (filled by CPU above)
7865bf215546Sopenharmony_ci    */
7866bf215546Sopenharmony_ci   for (unsigned i = 0; i < draw_count; ++i) {
7867bf215546Sopenharmony_ci      const uint64_t src_task_count = va + i * src_stride + src_off_task_count;
7868bf215546Sopenharmony_ci      const uint64_t src_first_task = va + i * src_stride + src_off_first_task;
7869bf215546Sopenharmony_ci      const uint64_t dst_vertex_count = new_va + i * dst_stride + dst_off_vertex_count;
7870bf215546Sopenharmony_ci      const uint64_t dst_first_vertex = new_va + i * dst_stride + dst_off_first_vertex;
7871bf215546Sopenharmony_ci
7872bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
7873bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
7874bf215546Sopenharmony_ci                      COPY_DATA_WR_CONFIRM);
7875bf215546Sopenharmony_ci      radeon_emit(cs, src_task_count);
7876bf215546Sopenharmony_ci      radeon_emit(cs, src_task_count >> 32);
7877bf215546Sopenharmony_ci      radeon_emit(cs, dst_vertex_count);
7878bf215546Sopenharmony_ci      radeon_emit(cs, dst_vertex_count >> 32);
7879bf215546Sopenharmony_ci
7880bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
7881bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
7882bf215546Sopenharmony_ci                      COPY_DATA_WR_CONFIRM);
7883bf215546Sopenharmony_ci      radeon_emit(cs, src_first_task);
7884bf215546Sopenharmony_ci      radeon_emit(cs, src_first_task >> 32);
7885bf215546Sopenharmony_ci      radeon_emit(cs, dst_first_vertex);
7886bf215546Sopenharmony_ci      radeon_emit(cs, dst_first_vertex >> 32);
7887bf215546Sopenharmony_ci   }
7888bf215546Sopenharmony_ci
7889bf215546Sopenharmony_ci   /* Wait for the copies to finish */
7890bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
7891bf215546Sopenharmony_ci   radeon_emit(cs, 0);
7892bf215546Sopenharmony_ci
7893bf215546Sopenharmony_ci   /* The draw packet can now use this buffer: */
7894bf215546Sopenharmony_ci   struct radv_buffer buf = *buffer;
7895bf215546Sopenharmony_ci   buf.bo = cmd_buffer->upload.upload_bo;
7896bf215546Sopenharmony_ci   buf.offset = out_offset;
7897bf215546Sopenharmony_ci
7898bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
7899bf215546Sopenharmony_ci
7900bf215546Sopenharmony_ci   return buf;
7901bf215546Sopenharmony_ci}
7902bf215546Sopenharmony_ci
7903bf215546Sopenharmony_cistatic struct radv_buffer
7904bf215546Sopenharmony_ciradv_nv_task_indirect_bo(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
7905bf215546Sopenharmony_ci                         VkDeviceSize offset, uint32_t draw_count, uint32_t stride)
7906bf215546Sopenharmony_ci{
7907bf215546Sopenharmony_ci   /* Translates the indirect BO format used by NV_mesh_shader API
7908bf215546Sopenharmony_ci    * to the BO format used by DISPATCH_TASKMESH_INDIRECT_MULTI_ACE.
7909bf215546Sopenharmony_ci    */
7910bf215546Sopenharmony_ci
7911bf215546Sopenharmony_ci   assert(draw_count);
7912bf215546Sopenharmony_ci   static_assert(sizeof(VkDispatchIndirectCommand) == 12, "Incorrect size of taskmesh command.");
7913bf215546Sopenharmony_ci
7914bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->ace_internal.cs;
7915bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
7916bf215546Sopenharmony_ci
7917bf215546Sopenharmony_ci   const size_t src_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
7918bf215546Sopenharmony_ci   const size_t dst_stride = sizeof(VkDispatchIndirectCommand);
7919bf215546Sopenharmony_ci   const size_t src_off_task_count = offsetof(VkDrawMeshTasksIndirectCommandNV, taskCount);
7920bf215546Sopenharmony_ci   const size_t dst_off_x = offsetof(VkDispatchIndirectCommand, x);
7921bf215546Sopenharmony_ci
7922bf215546Sopenharmony_ci   const unsigned new_disp_size = dst_stride * draw_count;
7923bf215546Sopenharmony_ci
7924bf215546Sopenharmony_ci   const uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
7925bf215546Sopenharmony_ci   radv_cs_add_buffer(ws, cs, buffer->bo);
7926bf215546Sopenharmony_ci
7927bf215546Sopenharmony_ci   /* Fill the buffer with X=0, Y=1, Z=1. */
7928bf215546Sopenharmony_ci   VkDispatchIndirectCommand *fill_data = (VkDispatchIndirectCommand *)alloca(new_disp_size);
7929bf215546Sopenharmony_ci   for (unsigned i = 0; i < draw_count; ++i) {
7930bf215546Sopenharmony_ci      fill_data[i].x = 0;
7931bf215546Sopenharmony_ci      fill_data[i].y = 1;
7932bf215546Sopenharmony_ci      fill_data[i].z = 1;
7933bf215546Sopenharmony_ci   }
7934bf215546Sopenharmony_ci
7935bf215546Sopenharmony_ci   /* Allocate space in the upload BO. */
7936bf215546Sopenharmony_ci   unsigned out_offset;
7937bf215546Sopenharmony_ci   ASSERTED bool uploaded =
7938bf215546Sopenharmony_ci      radv_cmd_buffer_upload_data(cmd_buffer, new_disp_size, fill_data, &out_offset);
7939bf215546Sopenharmony_ci   const uint64_t new_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + out_offset;
7940bf215546Sopenharmony_ci   assert(uploaded);
7941bf215546Sopenharmony_ci
7942bf215546Sopenharmony_ci   /* Clamp draw count to fit the actual size of the buffer.
7943bf215546Sopenharmony_ci    * This is to avoid potential out of bounds copies (eg. for draws with an indirect count buffer).
7944bf215546Sopenharmony_ci    * The remaining indirect draws will stay filled with X=0, Y=1, Z=1 which is harmless.
7945bf215546Sopenharmony_ci    */
7946bf215546Sopenharmony_ci   draw_count = MIN2(draw_count, (buffer->vk.size - buffer->offset - offset) / src_stride);
7947bf215546Sopenharmony_ci
7948bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 6 * draw_count + 2);
7949bf215546Sopenharmony_ci
7950bf215546Sopenharmony_ci   /* Copy taskCount from the NV API BO to the X dispatch size of the compatible BO. */
7951bf215546Sopenharmony_ci   for (unsigned i = 0; i < draw_count; ++i) {
7952bf215546Sopenharmony_ci      const uint64_t src_task_count = va + i * src_stride + src_off_task_count;
7953bf215546Sopenharmony_ci      const uint64_t dst_x = new_va + i * dst_stride + dst_off_x;
7954bf215546Sopenharmony_ci
7955bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, cmd_buffer->state.predicating));
7956bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
7957bf215546Sopenharmony_ci                         COPY_DATA_WR_CONFIRM);
7958bf215546Sopenharmony_ci      radeon_emit(cs, src_task_count);
7959bf215546Sopenharmony_ci      radeon_emit(cs, src_task_count >> 32);
7960bf215546Sopenharmony_ci      radeon_emit(cs, dst_x);
7961bf215546Sopenharmony_ci      radeon_emit(cs, dst_x >> 32);
7962bf215546Sopenharmony_ci   }
7963bf215546Sopenharmony_ci
7964bf215546Sopenharmony_ci   assert(cs->cdw <= cdw_max);
7965bf215546Sopenharmony_ci
7966bf215546Sopenharmony_ci   /* The draw packet can now use this buffer: */
7967bf215546Sopenharmony_ci   struct radv_buffer buf = *buffer;
7968bf215546Sopenharmony_ci   buf.bo = cmd_buffer->upload.upload_bo;
7969bf215546Sopenharmony_ci   buf.offset = out_offset;
7970bf215546Sopenharmony_ci
7971bf215546Sopenharmony_ci   return buf;
7972bf215546Sopenharmony_ci}
7973bf215546Sopenharmony_ci
7974bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
7975bf215546Sopenharmony_ciradv_CmdDraw(VkCommandBuffer commandBuffer, uint32_t vertexCount, uint32_t instanceCount,
7976bf215546Sopenharmony_ci             uint32_t firstVertex, uint32_t firstInstance)
7977bf215546Sopenharmony_ci{
7978bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
7979bf215546Sopenharmony_ci   struct radv_draw_info info;
7980bf215546Sopenharmony_ci
7981bf215546Sopenharmony_ci   info.count = vertexCount;
7982bf215546Sopenharmony_ci   info.instance_count = instanceCount;
7983bf215546Sopenharmony_ci   info.first_instance = firstInstance;
7984bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
7985bf215546Sopenharmony_ci   info.indirect = NULL;
7986bf215546Sopenharmony_ci   info.indexed = false;
7987bf215546Sopenharmony_ci
7988bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
7989bf215546Sopenharmony_ci      return;
7990bf215546Sopenharmony_ci   const VkMultiDrawInfoEXT minfo = { firstVertex, vertexCount };
7991bf215546Sopenharmony_ci   radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, 0, 0);
7992bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
7993bf215546Sopenharmony_ci}
7994bf215546Sopenharmony_ci
7995bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
7996bf215546Sopenharmony_ciradv_CmdDrawMultiEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawInfoEXT *pVertexInfo,
7997bf215546Sopenharmony_ci                          uint32_t instanceCount, uint32_t firstInstance, uint32_t stride)
7998bf215546Sopenharmony_ci{
7999bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8000bf215546Sopenharmony_ci   struct radv_draw_info info;
8001bf215546Sopenharmony_ci
8002bf215546Sopenharmony_ci   if (!drawCount)
8003bf215546Sopenharmony_ci      return;
8004bf215546Sopenharmony_ci
8005bf215546Sopenharmony_ci   info.count = pVertexInfo->vertexCount;
8006bf215546Sopenharmony_ci   info.instance_count = instanceCount;
8007bf215546Sopenharmony_ci   info.first_instance = firstInstance;
8008bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8009bf215546Sopenharmony_ci   info.indirect = NULL;
8010bf215546Sopenharmony_ci   info.indexed = false;
8011bf215546Sopenharmony_ci
8012bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, drawCount))
8013bf215546Sopenharmony_ci      return;
8014bf215546Sopenharmony_ci   radv_emit_direct_draw_packets(cmd_buffer, &info, drawCount, pVertexInfo, 0, stride);
8015bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8016bf215546Sopenharmony_ci}
8017bf215546Sopenharmony_ci
8018bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8019bf215546Sopenharmony_ciradv_CmdDrawIndexed(VkCommandBuffer commandBuffer, uint32_t indexCount, uint32_t instanceCount,
8020bf215546Sopenharmony_ci                    uint32_t firstIndex, int32_t vertexOffset, uint32_t firstInstance)
8021bf215546Sopenharmony_ci{
8022bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8023bf215546Sopenharmony_ci   struct radv_draw_info info;
8024bf215546Sopenharmony_ci
8025bf215546Sopenharmony_ci   info.indexed = true;
8026bf215546Sopenharmony_ci   info.count = indexCount;
8027bf215546Sopenharmony_ci   info.instance_count = instanceCount;
8028bf215546Sopenharmony_ci   info.first_instance = firstInstance;
8029bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8030bf215546Sopenharmony_ci   info.indirect = NULL;
8031bf215546Sopenharmony_ci
8032bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
8033bf215546Sopenharmony_ci      return;
8034bf215546Sopenharmony_ci   const VkMultiDrawIndexedInfoEXT minfo = { firstIndex, indexCount, vertexOffset };
8035bf215546Sopenharmony_ci   radv_emit_draw_packets_indexed(cmd_buffer, &info, 1, &minfo, 0, NULL);
8036bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8037bf215546Sopenharmony_ci}
8038bf215546Sopenharmony_ci
8039bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8040bf215546Sopenharmony_ciradv_CmdDrawMultiIndexedEXT(VkCommandBuffer commandBuffer, uint32_t drawCount, const VkMultiDrawIndexedInfoEXT *pIndexInfo,
8041bf215546Sopenharmony_ci                            uint32_t instanceCount, uint32_t firstInstance, uint32_t stride, const int32_t *pVertexOffset)
8042bf215546Sopenharmony_ci{
8043bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8044bf215546Sopenharmony_ci   struct radv_draw_info info;
8045bf215546Sopenharmony_ci
8046bf215546Sopenharmony_ci   if (!drawCount)
8047bf215546Sopenharmony_ci      return;
8048bf215546Sopenharmony_ci
8049bf215546Sopenharmony_ci   const VkMultiDrawIndexedInfoEXT *minfo = pIndexInfo;
8050bf215546Sopenharmony_ci   info.indexed = true;
8051bf215546Sopenharmony_ci   info.count = minfo->indexCount;
8052bf215546Sopenharmony_ci   info.instance_count = instanceCount;
8053bf215546Sopenharmony_ci   info.first_instance = firstInstance;
8054bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8055bf215546Sopenharmony_ci   info.indirect = NULL;
8056bf215546Sopenharmony_ci
8057bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, drawCount))
8058bf215546Sopenharmony_ci      return;
8059bf215546Sopenharmony_ci   radv_emit_draw_packets_indexed(cmd_buffer, &info, drawCount, pIndexInfo, stride, pVertexOffset);
8060bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8061bf215546Sopenharmony_ci}
8062bf215546Sopenharmony_ci
8063bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8064bf215546Sopenharmony_ciradv_CmdDrawIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
8065bf215546Sopenharmony_ci                     uint32_t drawCount, uint32_t stride)
8066bf215546Sopenharmony_ci{
8067bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8068bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8069bf215546Sopenharmony_ci   struct radv_draw_info info;
8070bf215546Sopenharmony_ci
8071bf215546Sopenharmony_ci   info.count = drawCount;
8072bf215546Sopenharmony_ci   info.indirect = buffer;
8073bf215546Sopenharmony_ci   info.indirect_offset = offset;
8074bf215546Sopenharmony_ci   info.stride = stride;
8075bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8076bf215546Sopenharmony_ci   info.count_buffer = NULL;
8077bf215546Sopenharmony_ci   info.indexed = false;
8078bf215546Sopenharmony_ci   info.instance_count = 0;
8079bf215546Sopenharmony_ci
8080bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
8081bf215546Sopenharmony_ci      return;
8082bf215546Sopenharmony_ci   radv_emit_indirect_draw_packets(cmd_buffer, &info);
8083bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8084bf215546Sopenharmony_ci}
8085bf215546Sopenharmony_ci
8086bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8087bf215546Sopenharmony_ciradv_CmdDrawIndexedIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
8088bf215546Sopenharmony_ci                            uint32_t drawCount, uint32_t stride)
8089bf215546Sopenharmony_ci{
8090bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8091bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8092bf215546Sopenharmony_ci   struct radv_draw_info info;
8093bf215546Sopenharmony_ci
8094bf215546Sopenharmony_ci   info.indexed = true;
8095bf215546Sopenharmony_ci   info.count = drawCount;
8096bf215546Sopenharmony_ci   info.indirect = buffer;
8097bf215546Sopenharmony_ci   info.indirect_offset = offset;
8098bf215546Sopenharmony_ci   info.stride = stride;
8099bf215546Sopenharmony_ci   info.count_buffer = NULL;
8100bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8101bf215546Sopenharmony_ci   info.instance_count = 0;
8102bf215546Sopenharmony_ci
8103bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
8104bf215546Sopenharmony_ci      return;
8105bf215546Sopenharmony_ci   radv_emit_indirect_draw_packets(cmd_buffer, &info);
8106bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8107bf215546Sopenharmony_ci}
8108bf215546Sopenharmony_ci
8109bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8110bf215546Sopenharmony_ciradv_CmdDrawIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset,
8111bf215546Sopenharmony_ci                          VkBuffer _countBuffer, VkDeviceSize countBufferOffset,
8112bf215546Sopenharmony_ci                          uint32_t maxDrawCount, uint32_t stride)
8113bf215546Sopenharmony_ci{
8114bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8115bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8116bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
8117bf215546Sopenharmony_ci   struct radv_draw_info info;
8118bf215546Sopenharmony_ci
8119bf215546Sopenharmony_ci   info.count = maxDrawCount;
8120bf215546Sopenharmony_ci   info.indirect = buffer;
8121bf215546Sopenharmony_ci   info.indirect_offset = offset;
8122bf215546Sopenharmony_ci   info.count_buffer = count_buffer;
8123bf215546Sopenharmony_ci   info.count_buffer_offset = countBufferOffset;
8124bf215546Sopenharmony_ci   info.stride = stride;
8125bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8126bf215546Sopenharmony_ci   info.indexed = false;
8127bf215546Sopenharmony_ci   info.instance_count = 0;
8128bf215546Sopenharmony_ci
8129bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
8130bf215546Sopenharmony_ci      return;
8131bf215546Sopenharmony_ci   radv_emit_indirect_draw_packets(cmd_buffer, &info);
8132bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8133bf215546Sopenharmony_ci}
8134bf215546Sopenharmony_ci
8135bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8136bf215546Sopenharmony_ciradv_CmdDrawIndexedIndirectCount(VkCommandBuffer commandBuffer, VkBuffer _buffer,
8137bf215546Sopenharmony_ci                                 VkDeviceSize offset, VkBuffer _countBuffer,
8138bf215546Sopenharmony_ci                                 VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
8139bf215546Sopenharmony_ci                                 uint32_t stride)
8140bf215546Sopenharmony_ci{
8141bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8142bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8143bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
8144bf215546Sopenharmony_ci   struct radv_draw_info info;
8145bf215546Sopenharmony_ci
8146bf215546Sopenharmony_ci   info.indexed = true;
8147bf215546Sopenharmony_ci   info.count = maxDrawCount;
8148bf215546Sopenharmony_ci   info.indirect = buffer;
8149bf215546Sopenharmony_ci   info.indirect_offset = offset;
8150bf215546Sopenharmony_ci   info.count_buffer = count_buffer;
8151bf215546Sopenharmony_ci   info.count_buffer_offset = countBufferOffset;
8152bf215546Sopenharmony_ci   info.stride = stride;
8153bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8154bf215546Sopenharmony_ci   info.instance_count = 0;
8155bf215546Sopenharmony_ci
8156bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
8157bf215546Sopenharmony_ci      return;
8158bf215546Sopenharmony_ci   radv_emit_indirect_draw_packets(cmd_buffer, &info);
8159bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8160bf215546Sopenharmony_ci}
8161bf215546Sopenharmony_ci
8162bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8163bf215546Sopenharmony_ciradv_CmdDrawMeshTasksNV(VkCommandBuffer commandBuffer, uint32_t taskCount, uint32_t firstTask)
8164bf215546Sopenharmony_ci{
8165bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8166bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
8167bf215546Sopenharmony_ci   struct radv_draw_info info;
8168bf215546Sopenharmony_ci
8169bf215546Sopenharmony_ci   info.count = taskCount;
8170bf215546Sopenharmony_ci   info.instance_count = 1;
8171bf215546Sopenharmony_ci   info.first_instance = 0;
8172bf215546Sopenharmony_ci   info.stride = 0;
8173bf215546Sopenharmony_ci   info.indexed = false;
8174bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8175bf215546Sopenharmony_ci   info.count_buffer = NULL;
8176bf215546Sopenharmony_ci   info.indirect = NULL;
8177bf215546Sopenharmony_ci
8178bf215546Sopenharmony_ci   if (!radv_before_taskmesh_draw(cmd_buffer, &info, 1))
8179bf215546Sopenharmony_ci      return;
8180bf215546Sopenharmony_ci
8181bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
8182bf215546Sopenharmony_ci      radv_emit_direct_taskmesh_draw_packets(cmd_buffer, taskCount, 1, 1, firstTask);
8183bf215546Sopenharmony_ci   } else {
8184bf215546Sopenharmony_ci      radv_emit_direct_mesh_draw_packet(cmd_buffer, taskCount, 1, 1, firstTask);
8185bf215546Sopenharmony_ci   }
8186bf215546Sopenharmony_ci
8187bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8188bf215546Sopenharmony_ci}
8189bf215546Sopenharmony_ci
8190bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8191bf215546Sopenharmony_ciradv_CmdDrawMeshTasksIndirectNV(VkCommandBuffer commandBuffer, VkBuffer _buffer,
8192bf215546Sopenharmony_ci                                VkDeviceSize offset, uint32_t drawCount, uint32_t stride)
8193bf215546Sopenharmony_ci{
8194bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8195bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8196bf215546Sopenharmony_ci
8197bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
8198bf215546Sopenharmony_ci   struct radv_draw_info info;
8199bf215546Sopenharmony_ci
8200bf215546Sopenharmony_ci   info.indirect = buffer;
8201bf215546Sopenharmony_ci   info.indirect_offset = offset;
8202bf215546Sopenharmony_ci   info.stride = stride;
8203bf215546Sopenharmony_ci   info.count = drawCount;
8204bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8205bf215546Sopenharmony_ci   info.count_buffer = NULL;
8206bf215546Sopenharmony_ci   info.indexed = false;
8207bf215546Sopenharmony_ci   info.instance_count = 0;
8208bf215546Sopenharmony_ci
8209bf215546Sopenharmony_ci   if (!radv_before_taskmesh_draw(cmd_buffer, &info, drawCount))
8210bf215546Sopenharmony_ci      return;
8211bf215546Sopenharmony_ci
8212bf215546Sopenharmony_ci   /* Indirect draw with mesh shader only:
8213bf215546Sopenharmony_ci    * Use DRAW_INDIRECT / DRAW_INDIRECT_MULTI like normal indirect draws.
8214bf215546Sopenharmony_ci    * Needed because DISPATCH_MESH_INDIRECT_MULTI doesn't support firstTask.
8215bf215546Sopenharmony_ci    *
8216bf215546Sopenharmony_ci    * Indirect draw with task + mesh shaders:
8217bf215546Sopenharmony_ci    * Use DISPATCH_TASKMESH_INDIRECT_MULTI_ACE + DISPATCH_TASKMESH_GFX.
8218bf215546Sopenharmony_ci    * These packets don't support firstTask so we implement that by
8219bf215546Sopenharmony_ci    * reading the NV command's indirect buffer in the shader.
8220bf215546Sopenharmony_ci    *
8221bf215546Sopenharmony_ci    * The indirect BO layout from the NV_mesh_shader API is incompatible
8222bf215546Sopenharmony_ci    * with AMD HW. To make it work, we allocate some space
8223bf215546Sopenharmony_ci    * in the upload buffer and copy the data to it.
8224bf215546Sopenharmony_ci    */
8225bf215546Sopenharmony_ci
8226bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
8227bf215546Sopenharmony_ci      uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
8228bf215546Sopenharmony_ci      uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
8229bf215546Sopenharmony_ci      struct radv_buffer buf =
8230bf215546Sopenharmony_ci         radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride);
8231bf215546Sopenharmony_ci      info.indirect = &buf;
8232bf215546Sopenharmony_ci      info.indirect_offset = 0;
8233bf215546Sopenharmony_ci      info.stride = sizeof(VkDispatchIndirectCommand);
8234bf215546Sopenharmony_ci
8235bf215546Sopenharmony_ci      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride);
8236bf215546Sopenharmony_ci   } else {
8237bf215546Sopenharmony_ci      struct radv_buffer buf =
8238bf215546Sopenharmony_ci         radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, drawCount, stride);
8239bf215546Sopenharmony_ci      info.indirect = &buf;
8240bf215546Sopenharmony_ci      info.indirect_offset = 0;
8241bf215546Sopenharmony_ci      info.stride = sizeof(VkDrawIndirectCommand);
8242bf215546Sopenharmony_ci
8243bf215546Sopenharmony_ci      radv_emit_indirect_draw_packets(cmd_buffer, &info);
8244bf215546Sopenharmony_ci   }
8245bf215546Sopenharmony_ci
8246bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8247bf215546Sopenharmony_ci}
8248bf215546Sopenharmony_ci
8249bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8250bf215546Sopenharmony_ciradv_CmdDrawMeshTasksIndirectCountNV(VkCommandBuffer commandBuffer, VkBuffer _buffer,
8251bf215546Sopenharmony_ci                                     VkDeviceSize offset, VkBuffer _countBuffer,
8252bf215546Sopenharmony_ci                                     VkDeviceSize countBufferOffset, uint32_t maxDrawCount,
8253bf215546Sopenharmony_ci                                     uint32_t stride)
8254bf215546Sopenharmony_ci{
8255bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8256bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8257bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, count_buffer, _countBuffer);
8258bf215546Sopenharmony_ci
8259bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
8260bf215546Sopenharmony_ci   struct radv_draw_info info;
8261bf215546Sopenharmony_ci
8262bf215546Sopenharmony_ci   info.indirect = buffer;
8263bf215546Sopenharmony_ci   info.indirect_offset = offset;
8264bf215546Sopenharmony_ci   info.stride = stride;
8265bf215546Sopenharmony_ci   info.count = maxDrawCount;
8266bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8267bf215546Sopenharmony_ci   info.count_buffer = count_buffer;
8268bf215546Sopenharmony_ci   info.count_buffer_offset = countBufferOffset;
8269bf215546Sopenharmony_ci   info.indexed = false;
8270bf215546Sopenharmony_ci   info.instance_count = 0;
8271bf215546Sopenharmony_ci
8272bf215546Sopenharmony_ci   if (!radv_before_taskmesh_draw(cmd_buffer, &info, maxDrawCount))
8273bf215546Sopenharmony_ci      return;
8274bf215546Sopenharmony_ci
8275bf215546Sopenharmony_ci   if (radv_pipeline_has_stage(pipeline, MESA_SHADER_TASK)) {
8276bf215546Sopenharmony_ci      uint64_t nv_ib_va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
8277bf215546Sopenharmony_ci      uint32_t nv_ib_stride = MAX2(stride, sizeof(VkDrawMeshTasksIndirectCommandNV));
8278bf215546Sopenharmony_ci      struct radv_buffer buf =
8279bf215546Sopenharmony_ci         radv_nv_task_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride);
8280bf215546Sopenharmony_ci      info.indirect = &buf;
8281bf215546Sopenharmony_ci      info.indirect_offset = 0;
8282bf215546Sopenharmony_ci      info.stride = sizeof(VkDispatchIndirectCommand);
8283bf215546Sopenharmony_ci
8284bf215546Sopenharmony_ci      radv_emit_indirect_taskmesh_draw_packets(cmd_buffer, &info, nv_ib_va, nv_ib_stride);
8285bf215546Sopenharmony_ci   } else {
8286bf215546Sopenharmony_ci      struct radv_buffer buf =
8287bf215546Sopenharmony_ci         radv_nv_mesh_indirect_bo(cmd_buffer, buffer, offset, maxDrawCount, stride);
8288bf215546Sopenharmony_ci      info.indirect = &buf;
8289bf215546Sopenharmony_ci      info.indirect_offset = 0;
8290bf215546Sopenharmony_ci      info.stride = sizeof(VkDrawIndirectCommand);
8291bf215546Sopenharmony_ci
8292bf215546Sopenharmony_ci      radv_emit_indirect_draw_packets(cmd_buffer, &info);
8293bf215546Sopenharmony_ci   }
8294bf215546Sopenharmony_ci
8295bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8296bf215546Sopenharmony_ci}
8297bf215546Sopenharmony_ci
8298bf215546Sopenharmony_civoid
8299bf215546Sopenharmony_ciradv_CmdExecuteGeneratedCommandsNV(VkCommandBuffer commandBuffer, VkBool32 isPreprocessed,
8300bf215546Sopenharmony_ci                                   const VkGeneratedCommandsInfoNV *pGeneratedCommandsInfo)
8301bf215546Sopenharmony_ci{
8302bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8303bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_indirect_command_layout, layout,
8304bf215546Sopenharmony_ci                  pGeneratedCommandsInfo->indirectCommandsLayout);
8305bf215546Sopenharmony_ci   VK_FROM_HANDLE(radv_buffer, prep_buffer, pGeneratedCommandsInfo->preprocessBuffer);
8306bf215546Sopenharmony_ci
8307bf215546Sopenharmony_ci   /* The only actions that can be done are draws, so skip on other queues. */
8308bf215546Sopenharmony_ci   if (cmd_buffer->qf != RADV_QUEUE_GENERAL)
8309bf215546Sopenharmony_ci      return;
8310bf215546Sopenharmony_ci
8311bf215546Sopenharmony_ci   /* Secondary command buffers are needed for the full extension but can't use
8312bf215546Sopenharmony_ci    * PKT3_INDIRECT_BUFFER_CIK.
8313bf215546Sopenharmony_ci    */
8314bf215546Sopenharmony_ci   assert(cmd_buffer->vk.level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
8315bf215546Sopenharmony_ci
8316bf215546Sopenharmony_ci   radv_prepare_dgc(cmd_buffer, pGeneratedCommandsInfo);
8317bf215546Sopenharmony_ci
8318bf215546Sopenharmony_ci   struct radv_draw_info info;
8319bf215546Sopenharmony_ci
8320bf215546Sopenharmony_ci   info.count = pGeneratedCommandsInfo->sequencesCount;
8321bf215546Sopenharmony_ci   info.indirect = prep_buffer; /* We're not really going use it this way, but a good signal
8322bf215546Sopenharmony_ci                                   that this is not direct. */
8323bf215546Sopenharmony_ci   info.indirect_offset = 0;
8324bf215546Sopenharmony_ci   info.stride = 0;
8325bf215546Sopenharmony_ci   info.strmout_buffer = NULL;
8326bf215546Sopenharmony_ci   info.count_buffer = NULL;
8327bf215546Sopenharmony_ci   info.indexed = layout->indexed;
8328bf215546Sopenharmony_ci   info.instance_count = 0;
8329bf215546Sopenharmony_ci
8330bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
8331bf215546Sopenharmony_ci      return;
8332bf215546Sopenharmony_ci
8333bf215546Sopenharmony_ci   uint32_t cmdbuf_size = radv_get_indirect_cmdbuf_size(pGeneratedCommandsInfo);
8334bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(prep_buffer->bo) + prep_buffer->offset +
8335bf215546Sopenharmony_ci                 pGeneratedCommandsInfo->preprocessOffset;
8336bf215546Sopenharmony_ci   const uint32_t view_mask = cmd_buffer->state.subpass->view_mask;
8337bf215546Sopenharmony_ci
8338bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_PFP_SYNC_ME, 0, cmd_buffer->state.predicating));
8339bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, 0);
8340bf215546Sopenharmony_ci
8341bf215546Sopenharmony_ci   if (!view_mask) {
8342bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
8343bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, va);
8344bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, va >> 32);
8345bf215546Sopenharmony_ci      radeon_emit(cmd_buffer->cs, cmdbuf_size >> 2);
8346bf215546Sopenharmony_ci   } else {
8347bf215546Sopenharmony_ci      u_foreach_bit (view, view_mask) {
8348bf215546Sopenharmony_ci         radv_emit_view_index(cmd_buffer, view);
8349bf215546Sopenharmony_ci
8350bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
8351bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, va);
8352bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, va >> 32);
8353bf215546Sopenharmony_ci         radeon_emit(cmd_buffer->cs, cmdbuf_size >> 2);
8354bf215546Sopenharmony_ci      }
8355bf215546Sopenharmony_ci   }
8356bf215546Sopenharmony_ci
8357bf215546Sopenharmony_ci   if (layout->binds_index_buffer) {
8358bf215546Sopenharmony_ci      cmd_buffer->state.last_index_type = -1;
8359bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_INDEX_BUFFER;
8360bf215546Sopenharmony_ci   }
8361bf215546Sopenharmony_ci
8362bf215546Sopenharmony_ci   if (layout->bind_vbo_mask)
8363bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_VERTEX_BUFFER;
8364bf215546Sopenharmony_ci
8365bf215546Sopenharmony_ci   if (layout->binds_state)
8366bf215546Sopenharmony_ci      cmd_buffer->state.dirty |= RADV_CMD_DIRTY_DYNAMIC_FRONT_FACE;
8367bf215546Sopenharmony_ci
8368bf215546Sopenharmony_ci   cmd_buffer->push_constant_stages |= ~0;
8369bf215546Sopenharmony_ci
8370bf215546Sopenharmony_ci   cmd_buffer->state.last_index_type = -1;
8371bf215546Sopenharmony_ci   cmd_buffer->state.last_num_instances = -1;
8372bf215546Sopenharmony_ci   cmd_buffer->state.last_vertex_offset = -1;
8373bf215546Sopenharmony_ci   cmd_buffer->state.last_first_instance = -1;
8374bf215546Sopenharmony_ci   cmd_buffer->state.last_drawid = -1;
8375bf215546Sopenharmony_ci
8376bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
8377bf215546Sopenharmony_ci}
8378bf215546Sopenharmony_ci
8379bf215546Sopenharmony_cistruct radv_dispatch_info {
8380bf215546Sopenharmony_ci   /**
8381bf215546Sopenharmony_ci    * Determine the layout of the grid (in block units) to be used.
8382bf215546Sopenharmony_ci    */
8383bf215546Sopenharmony_ci   uint32_t blocks[3];
8384bf215546Sopenharmony_ci
8385bf215546Sopenharmony_ci   /**
8386bf215546Sopenharmony_ci    * A starting offset for the grid. If unaligned is set, the offset
8387bf215546Sopenharmony_ci    * must still be aligned.
8388bf215546Sopenharmony_ci    */
8389bf215546Sopenharmony_ci   uint32_t offsets[3];
8390bf215546Sopenharmony_ci   /**
8391bf215546Sopenharmony_ci    * Whether it's an unaligned compute dispatch.
8392bf215546Sopenharmony_ci    */
8393bf215546Sopenharmony_ci   bool unaligned;
8394bf215546Sopenharmony_ci
8395bf215546Sopenharmony_ci   /**
8396bf215546Sopenharmony_ci    * Indirect compute parameters resource.
8397bf215546Sopenharmony_ci    */
8398bf215546Sopenharmony_ci   struct radeon_winsys_bo *indirect;
8399bf215546Sopenharmony_ci   uint64_t va;
8400bf215546Sopenharmony_ci};
8401bf215546Sopenharmony_ci
8402bf215546Sopenharmony_cistatic void
8403bf215546Sopenharmony_ciradv_emit_dispatch_packets(struct radv_cmd_buffer *cmd_buffer,
8404bf215546Sopenharmony_ci                           struct radv_compute_pipeline *pipeline,
8405bf215546Sopenharmony_ci                           const struct radv_dispatch_info *info)
8406bf215546Sopenharmony_ci{
8407bf215546Sopenharmony_ci   struct radv_shader *compute_shader = pipeline->base.shaders[MESA_SHADER_COMPUTE];
8408bf215546Sopenharmony_ci   unsigned dispatch_initiator = cmd_buffer->device->dispatch_initiator;
8409bf215546Sopenharmony_ci   struct radeon_winsys *ws = cmd_buffer->device->ws;
8410bf215546Sopenharmony_ci   bool predicating = cmd_buffer->state.predicating;
8411bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
8412bf215546Sopenharmony_ci   struct radv_userdata_info *loc;
8413bf215546Sopenharmony_ci
8414bf215546Sopenharmony_ci   radv_describe_dispatch(cmd_buffer, info->blocks[0], info->blocks[1], info->blocks[2]);
8415bf215546Sopenharmony_ci
8416bf215546Sopenharmony_ci   loc = radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_GRID_SIZE);
8417bf215546Sopenharmony_ci
8418bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max = radeon_check_space(ws, cs, 30);
8419bf215546Sopenharmony_ci
8420bf215546Sopenharmony_ci   if (compute_shader->info.wave_size == 32) {
8421bf215546Sopenharmony_ci      assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
8422bf215546Sopenharmony_ci      dispatch_initiator |= S_00B800_CS_W32_EN(1);
8423bf215546Sopenharmony_ci   }
8424bf215546Sopenharmony_ci
8425bf215546Sopenharmony_ci   if (info->va) {
8426bf215546Sopenharmony_ci      if (info->indirect)
8427bf215546Sopenharmony_ci         radv_cs_add_buffer(ws, cs, info->indirect);
8428bf215546Sopenharmony_ci
8429bf215546Sopenharmony_ci      if (info->unaligned) {
8430bf215546Sopenharmony_ci         radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
8431bf215546Sopenharmony_ci         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[0]));
8432bf215546Sopenharmony_ci         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[1]));
8433bf215546Sopenharmony_ci         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(compute_shader->info.cs.block_size[2]));
8434bf215546Sopenharmony_ci
8435bf215546Sopenharmony_ci         dispatch_initiator |= S_00B800_USE_THREAD_DIMENSIONS(1);
8436bf215546Sopenharmony_ci      }
8437bf215546Sopenharmony_ci
8438bf215546Sopenharmony_ci      if (loc->sgpr_idx != -1) {
8439bf215546Sopenharmony_ci         unsigned reg = R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4;
8440bf215546Sopenharmony_ci
8441bf215546Sopenharmony_ci         if (cmd_buffer->device->load_grid_size_from_user_sgpr) {
8442bf215546Sopenharmony_ci            assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10_3);
8443bf215546Sopenharmony_ci            radeon_emit(cs, PKT3(PKT3_LOAD_SH_REG_INDEX, 3, 0));
8444bf215546Sopenharmony_ci            radeon_emit(cs, info->va);
8445bf215546Sopenharmony_ci            radeon_emit(cs, info->va >> 32);
8446bf215546Sopenharmony_ci            radeon_emit(cs, (reg - SI_SH_REG_OFFSET) >> 2);
8447bf215546Sopenharmony_ci            radeon_emit(cs, 3);
8448bf215546Sopenharmony_ci         } else {
8449bf215546Sopenharmony_ci            radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs, reg, info->va, true);
8450bf215546Sopenharmony_ci         }
8451bf215546Sopenharmony_ci      }
8452bf215546Sopenharmony_ci
8453bf215546Sopenharmony_ci      if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
8454bf215546Sopenharmony_ci         radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
8455bf215546Sopenharmony_ci                                          &cmd_buffer->mec_inv_pred_emitted,
8456bf215546Sopenharmony_ci                                          4 /* DISPATCH_INDIRECT size */);
8457bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 2, 0) | PKT3_SHADER_TYPE_S(1));
8458bf215546Sopenharmony_ci         radeon_emit(cs, info->va);
8459bf215546Sopenharmony_ci         radeon_emit(cs, info->va >> 32);
8460bf215546Sopenharmony_ci         radeon_emit(cs, dispatch_initiator);
8461bf215546Sopenharmony_ci      } else {
8462bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_SET_BASE, 2, 0) | PKT3_SHADER_TYPE_S(1));
8463bf215546Sopenharmony_ci         radeon_emit(cs, 1);
8464bf215546Sopenharmony_ci         radeon_emit(cs, info->va);
8465bf215546Sopenharmony_ci         radeon_emit(cs, info->va >> 32);
8466bf215546Sopenharmony_ci
8467bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_DISPATCH_INDIRECT, 1, predicating) | PKT3_SHADER_TYPE_S(1));
8468bf215546Sopenharmony_ci         radeon_emit(cs, 0);
8469bf215546Sopenharmony_ci         radeon_emit(cs, dispatch_initiator);
8470bf215546Sopenharmony_ci      }
8471bf215546Sopenharmony_ci   } else {
8472bf215546Sopenharmony_ci      unsigned blocks[3] = {info->blocks[0], info->blocks[1], info->blocks[2]};
8473bf215546Sopenharmony_ci      unsigned offsets[3] = {info->offsets[0], info->offsets[1], info->offsets[2]};
8474bf215546Sopenharmony_ci
8475bf215546Sopenharmony_ci      if (info->unaligned) {
8476bf215546Sopenharmony_ci         unsigned *cs_block_size = compute_shader->info.cs.block_size;
8477bf215546Sopenharmony_ci         unsigned remainder[3];
8478bf215546Sopenharmony_ci
8479bf215546Sopenharmony_ci         /* If aligned, these should be an entire block size,
8480bf215546Sopenharmony_ci          * not 0.
8481bf215546Sopenharmony_ci          */
8482bf215546Sopenharmony_ci         remainder[0] = blocks[0] + cs_block_size[0] - align_u32_npot(blocks[0], cs_block_size[0]);
8483bf215546Sopenharmony_ci         remainder[1] = blocks[1] + cs_block_size[1] - align_u32_npot(blocks[1], cs_block_size[1]);
8484bf215546Sopenharmony_ci         remainder[2] = blocks[2] + cs_block_size[2] - align_u32_npot(blocks[2], cs_block_size[2]);
8485bf215546Sopenharmony_ci
8486bf215546Sopenharmony_ci         blocks[0] = round_up_u32(blocks[0], cs_block_size[0]);
8487bf215546Sopenharmony_ci         blocks[1] = round_up_u32(blocks[1], cs_block_size[1]);
8488bf215546Sopenharmony_ci         blocks[2] = round_up_u32(blocks[2], cs_block_size[2]);
8489bf215546Sopenharmony_ci
8490bf215546Sopenharmony_ci         for (unsigned i = 0; i < 3; ++i) {
8491bf215546Sopenharmony_ci            assert(offsets[i] % cs_block_size[i] == 0);
8492bf215546Sopenharmony_ci            offsets[i] /= cs_block_size[i];
8493bf215546Sopenharmony_ci         }
8494bf215546Sopenharmony_ci
8495bf215546Sopenharmony_ci         radeon_set_sh_reg_seq(cs, R_00B81C_COMPUTE_NUM_THREAD_X, 3);
8496bf215546Sopenharmony_ci         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[0]) |
8497bf215546Sopenharmony_ci                            S_00B81C_NUM_THREAD_PARTIAL(remainder[0]));
8498bf215546Sopenharmony_ci         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[1]) |
8499bf215546Sopenharmony_ci                            S_00B81C_NUM_THREAD_PARTIAL(remainder[1]));
8500bf215546Sopenharmony_ci         radeon_emit(cs, S_00B81C_NUM_THREAD_FULL(cs_block_size[2]) |
8501bf215546Sopenharmony_ci                            S_00B81C_NUM_THREAD_PARTIAL(remainder[2]));
8502bf215546Sopenharmony_ci
8503bf215546Sopenharmony_ci         dispatch_initiator |= S_00B800_PARTIAL_TG_EN(1);
8504bf215546Sopenharmony_ci      }
8505bf215546Sopenharmony_ci
8506bf215546Sopenharmony_ci      if (loc->sgpr_idx != -1) {
8507bf215546Sopenharmony_ci         if (cmd_buffer->device->load_grid_size_from_user_sgpr) {
8508bf215546Sopenharmony_ci            assert(loc->num_sgprs == 3);
8509bf215546Sopenharmony_ci
8510bf215546Sopenharmony_ci            radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, 3);
8511bf215546Sopenharmony_ci            radeon_emit(cs, blocks[0]);
8512bf215546Sopenharmony_ci            radeon_emit(cs, blocks[1]);
8513bf215546Sopenharmony_ci            radeon_emit(cs, blocks[2]);
8514bf215546Sopenharmony_ci         } else {
8515bf215546Sopenharmony_ci            uint32_t offset;
8516bf215546Sopenharmony_ci            if (!radv_cmd_buffer_upload_data(cmd_buffer, 12, blocks, &offset))
8517bf215546Sopenharmony_ci               return;
8518bf215546Sopenharmony_ci
8519bf215546Sopenharmony_ci            uint64_t va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
8520bf215546Sopenharmony_ci            radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
8521bf215546Sopenharmony_ci                                     R_00B900_COMPUTE_USER_DATA_0 + loc->sgpr_idx * 4, va, true);
8522bf215546Sopenharmony_ci         }
8523bf215546Sopenharmony_ci      }
8524bf215546Sopenharmony_ci
8525bf215546Sopenharmony_ci      if (offsets[0] || offsets[1] || offsets[2]) {
8526bf215546Sopenharmony_ci         radeon_set_sh_reg_seq(cs, R_00B810_COMPUTE_START_X, 3);
8527bf215546Sopenharmony_ci         radeon_emit(cs, offsets[0]);
8528bf215546Sopenharmony_ci         radeon_emit(cs, offsets[1]);
8529bf215546Sopenharmony_ci         radeon_emit(cs, offsets[2]);
8530bf215546Sopenharmony_ci
8531bf215546Sopenharmony_ci         /* The blocks in the packet are not counts but end values. */
8532bf215546Sopenharmony_ci         for (unsigned i = 0; i < 3; ++i)
8533bf215546Sopenharmony_ci            blocks[i] += offsets[i];
8534bf215546Sopenharmony_ci      } else {
8535bf215546Sopenharmony_ci         dispatch_initiator |= S_00B800_FORCE_START_AT_000(1);
8536bf215546Sopenharmony_ci      }
8537bf215546Sopenharmony_ci
8538bf215546Sopenharmony_ci      if (radv_cmd_buffer_uses_mec(cmd_buffer)) {
8539bf215546Sopenharmony_ci         radv_cs_emit_compute_predication(&cmd_buffer->state, cs, cmd_buffer->mec_inv_pred_va,
8540bf215546Sopenharmony_ci                                          &cmd_buffer->mec_inv_pred_emitted,
8541bf215546Sopenharmony_ci                                          5 /* DISPATCH_DIRECT size */);
8542bf215546Sopenharmony_ci         predicating = false;
8543bf215546Sopenharmony_ci      }
8544bf215546Sopenharmony_ci
8545bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_DISPATCH_DIRECT, 3, predicating) | PKT3_SHADER_TYPE_S(1));
8546bf215546Sopenharmony_ci      radeon_emit(cs, blocks[0]);
8547bf215546Sopenharmony_ci      radeon_emit(cs, blocks[1]);
8548bf215546Sopenharmony_ci      radeon_emit(cs, blocks[2]);
8549bf215546Sopenharmony_ci      radeon_emit(cs, dispatch_initiator);
8550bf215546Sopenharmony_ci   }
8551bf215546Sopenharmony_ci
8552bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
8553bf215546Sopenharmony_ci}
8554bf215546Sopenharmony_ci
8555bf215546Sopenharmony_cistatic void
8556bf215546Sopenharmony_ciradv_upload_compute_shader_descriptors(struct radv_cmd_buffer *cmd_buffer,
8557bf215546Sopenharmony_ci                                       struct radv_compute_pipeline *pipeline,
8558bf215546Sopenharmony_ci                                       VkPipelineBindPoint bind_point)
8559bf215546Sopenharmony_ci{
8560bf215546Sopenharmony_ci   radv_flush_descriptors(cmd_buffer, VK_SHADER_STAGE_COMPUTE_BIT, &pipeline->base, bind_point);
8561bf215546Sopenharmony_ci   radv_flush_constants(cmd_buffer,
8562bf215546Sopenharmony_ci                        bind_point == VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
8563bf215546Sopenharmony_ci                           ? RADV_RT_STAGE_BITS
8564bf215546Sopenharmony_ci                           : VK_SHADER_STAGE_COMPUTE_BIT,
8565bf215546Sopenharmony_ci                        &pipeline->base, bind_point);
8566bf215546Sopenharmony_ci}
8567bf215546Sopenharmony_ci
8568bf215546Sopenharmony_cistatic void
8569bf215546Sopenharmony_ciradv_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info,
8570bf215546Sopenharmony_ci              struct radv_compute_pipeline *pipeline, VkPipelineBindPoint bind_point)
8571bf215546Sopenharmony_ci{
8572bf215546Sopenharmony_ci   bool has_prefetch = cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7;
8573bf215546Sopenharmony_ci   bool pipeline_is_dirty = pipeline != cmd_buffer->state.emitted_compute_pipeline;
8574bf215546Sopenharmony_ci
8575bf215546Sopenharmony_ci   if (pipeline->cs_regalloc_hang_bug)
8576bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_PS_PARTIAL_FLUSH |
8577bf215546Sopenharmony_ci                                      RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
8578bf215546Sopenharmony_ci
8579bf215546Sopenharmony_ci   if (cmd_buffer->state.flush_bits &
8580bf215546Sopenharmony_ci       (RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_DB |
8581bf215546Sopenharmony_ci        RADV_CMD_FLAG_PS_PARTIAL_FLUSH | RADV_CMD_FLAG_CS_PARTIAL_FLUSH)) {
8582bf215546Sopenharmony_ci      /* If we have to wait for idle, set all states first, so that
8583bf215546Sopenharmony_ci       * all SET packets are processed in parallel with previous draw
8584bf215546Sopenharmony_ci       * calls. Then upload descriptors, set shader pointers, and
8585bf215546Sopenharmony_ci       * dispatch, and prefetch at the end. This ensures that the
8586bf215546Sopenharmony_ci       * time the CUs are idle is very short. (there are only SET_SH
8587bf215546Sopenharmony_ci       * packets between the wait and the draw)
8588bf215546Sopenharmony_ci       */
8589bf215546Sopenharmony_ci      radv_emit_compute_pipeline(cmd_buffer, pipeline);
8590bf215546Sopenharmony_ci      si_emit_cache_flush(cmd_buffer);
8591bf215546Sopenharmony_ci      /* <-- CUs are idle here --> */
8592bf215546Sopenharmony_ci
8593bf215546Sopenharmony_ci      radv_upload_compute_shader_descriptors(cmd_buffer, pipeline, bind_point);
8594bf215546Sopenharmony_ci
8595bf215546Sopenharmony_ci      radv_emit_dispatch_packets(cmd_buffer, pipeline, info);
8596bf215546Sopenharmony_ci      /* <-- CUs are busy here --> */
8597bf215546Sopenharmony_ci
8598bf215546Sopenharmony_ci      /* Start prefetches after the dispatch has been started. Both
8599bf215546Sopenharmony_ci       * will run in parallel, but starting the dispatch first is
8600bf215546Sopenharmony_ci       * more important.
8601bf215546Sopenharmony_ci       */
8602bf215546Sopenharmony_ci      if (has_prefetch && pipeline_is_dirty) {
8603bf215546Sopenharmony_ci         radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
8604bf215546Sopenharmony_ci      }
8605bf215546Sopenharmony_ci   } else {
8606bf215546Sopenharmony_ci      /* If we don't wait for idle, start prefetches first, then set
8607bf215546Sopenharmony_ci       * states, and dispatch at the end.
8608bf215546Sopenharmony_ci       */
8609bf215546Sopenharmony_ci      si_emit_cache_flush(cmd_buffer);
8610bf215546Sopenharmony_ci
8611bf215546Sopenharmony_ci      if (has_prefetch && pipeline_is_dirty) {
8612bf215546Sopenharmony_ci         radv_emit_shader_prefetch(cmd_buffer, pipeline->base.shaders[MESA_SHADER_COMPUTE]);
8613bf215546Sopenharmony_ci      }
8614bf215546Sopenharmony_ci
8615bf215546Sopenharmony_ci      radv_upload_compute_shader_descriptors(cmd_buffer, pipeline, bind_point);
8616bf215546Sopenharmony_ci
8617bf215546Sopenharmony_ci      radv_emit_compute_pipeline(cmd_buffer, pipeline);
8618bf215546Sopenharmony_ci      radv_emit_dispatch_packets(cmd_buffer, pipeline, info);
8619bf215546Sopenharmony_ci   }
8620bf215546Sopenharmony_ci
8621bf215546Sopenharmony_ci   if (pipeline_is_dirty) {
8622bf215546Sopenharmony_ci      /* Raytracing uses compute shaders but has separate bind points and pipelines.
8623bf215546Sopenharmony_ci       * So if we set compute userdata & shader registers we should dirty the raytracing
8624bf215546Sopenharmony_ci       * ones and the other way around.
8625bf215546Sopenharmony_ci       *
8626bf215546Sopenharmony_ci       * We only need to do this when the pipeline is dirty because when we switch between
8627bf215546Sopenharmony_ci       * the two we always need to switch pipelines.
8628bf215546Sopenharmony_ci       */
8629bf215546Sopenharmony_ci      radv_mark_descriptor_sets_dirty(cmd_buffer, bind_point == VK_PIPELINE_BIND_POINT_COMPUTE
8630bf215546Sopenharmony_ci                                                     ? VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR
8631bf215546Sopenharmony_ci                                                     : VK_PIPELINE_BIND_POINT_COMPUTE);
8632bf215546Sopenharmony_ci   }
8633bf215546Sopenharmony_ci
8634bf215546Sopenharmony_ci   if (pipeline->cs_regalloc_hang_bug)
8635bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH;
8636bf215546Sopenharmony_ci
8637bf215546Sopenharmony_ci   radv_cmd_buffer_after_draw(cmd_buffer, RADV_CMD_FLAG_CS_PARTIAL_FLUSH);
8638bf215546Sopenharmony_ci}
8639bf215546Sopenharmony_ci
8640bf215546Sopenharmony_cistatic void
8641bf215546Sopenharmony_ciradv_compute_dispatch(struct radv_cmd_buffer *cmd_buffer, const struct radv_dispatch_info *info)
8642bf215546Sopenharmony_ci{
8643bf215546Sopenharmony_ci   radv_dispatch(cmd_buffer, info, cmd_buffer->state.compute_pipeline,
8644bf215546Sopenharmony_ci                 VK_PIPELINE_BIND_POINT_COMPUTE);
8645bf215546Sopenharmony_ci}
8646bf215546Sopenharmony_ci
8647bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8648bf215546Sopenharmony_ciradv_CmdDispatchBase(VkCommandBuffer commandBuffer, uint32_t base_x, uint32_t base_y,
8649bf215546Sopenharmony_ci                     uint32_t base_z, uint32_t x, uint32_t y, uint32_t z)
8650bf215546Sopenharmony_ci{
8651bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8652bf215546Sopenharmony_ci   struct radv_dispatch_info info = {0};
8653bf215546Sopenharmony_ci
8654bf215546Sopenharmony_ci   info.blocks[0] = x;
8655bf215546Sopenharmony_ci   info.blocks[1] = y;
8656bf215546Sopenharmony_ci   info.blocks[2] = z;
8657bf215546Sopenharmony_ci
8658bf215546Sopenharmony_ci   info.offsets[0] = base_x;
8659bf215546Sopenharmony_ci   info.offsets[1] = base_y;
8660bf215546Sopenharmony_ci   info.offsets[2] = base_z;
8661bf215546Sopenharmony_ci   radv_compute_dispatch(cmd_buffer, &info);
8662bf215546Sopenharmony_ci}
8663bf215546Sopenharmony_ci
8664bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8665bf215546Sopenharmony_ciradv_CmdDispatch(VkCommandBuffer commandBuffer, uint32_t x, uint32_t y, uint32_t z)
8666bf215546Sopenharmony_ci{
8667bf215546Sopenharmony_ci   radv_CmdDispatchBase(commandBuffer, 0, 0, 0, x, y, z);
8668bf215546Sopenharmony_ci}
8669bf215546Sopenharmony_ci
8670bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8671bf215546Sopenharmony_ciradv_CmdDispatchIndirect(VkCommandBuffer commandBuffer, VkBuffer _buffer, VkDeviceSize offset)
8672bf215546Sopenharmony_ci{
8673bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8674bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
8675bf215546Sopenharmony_ci   struct radv_dispatch_info info = {0};
8676bf215546Sopenharmony_ci
8677bf215546Sopenharmony_ci   info.indirect = buffer->bo;
8678bf215546Sopenharmony_ci   info.va = radv_buffer_get_va(buffer->bo) + buffer->offset + offset;
8679bf215546Sopenharmony_ci
8680bf215546Sopenharmony_ci   radv_compute_dispatch(cmd_buffer, &info);
8681bf215546Sopenharmony_ci}
8682bf215546Sopenharmony_ci
8683bf215546Sopenharmony_civoid
8684bf215546Sopenharmony_ciradv_unaligned_dispatch(struct radv_cmd_buffer *cmd_buffer, uint32_t x, uint32_t y, uint32_t z)
8685bf215546Sopenharmony_ci{
8686bf215546Sopenharmony_ci   struct radv_dispatch_info info = {0};
8687bf215546Sopenharmony_ci
8688bf215546Sopenharmony_ci   info.blocks[0] = x;
8689bf215546Sopenharmony_ci   info.blocks[1] = y;
8690bf215546Sopenharmony_ci   info.blocks[2] = z;
8691bf215546Sopenharmony_ci   info.unaligned = 1;
8692bf215546Sopenharmony_ci
8693bf215546Sopenharmony_ci   radv_compute_dispatch(cmd_buffer, &info);
8694bf215546Sopenharmony_ci}
8695bf215546Sopenharmony_ci
8696bf215546Sopenharmony_civoid
8697bf215546Sopenharmony_ciradv_indirect_dispatch(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *bo, uint64_t va)
8698bf215546Sopenharmony_ci{
8699bf215546Sopenharmony_ci   struct radv_dispatch_info info = {0};
8700bf215546Sopenharmony_ci
8701bf215546Sopenharmony_ci   info.indirect = bo;
8702bf215546Sopenharmony_ci   info.va = va;
8703bf215546Sopenharmony_ci
8704bf215546Sopenharmony_ci   radv_compute_dispatch(cmd_buffer, &info);
8705bf215546Sopenharmony_ci}
8706bf215546Sopenharmony_ci
8707bf215546Sopenharmony_cienum radv_rt_mode {
8708bf215546Sopenharmony_ci   radv_rt_mode_direct,
8709bf215546Sopenharmony_ci   radv_rt_mode_indirect,
8710bf215546Sopenharmony_ci   radv_rt_mode_indirect2,
8711bf215546Sopenharmony_ci};
8712bf215546Sopenharmony_ci
8713bf215546Sopenharmony_cistatic void
8714bf215546Sopenharmony_ciradv_trace_rays(struct radv_cmd_buffer *cmd_buffer, const VkTraceRaysIndirectCommand2KHR *tables,
8715bf215546Sopenharmony_ci                uint64_t indirect_va, enum radv_rt_mode mode)
8716bf215546Sopenharmony_ci{
8717bf215546Sopenharmony_ci   struct radv_compute_pipeline *pipeline = cmd_buffer->state.rt_pipeline;
8718bf215546Sopenharmony_ci   uint32_t base_reg = pipeline->base.user_data_0[MESA_SHADER_COMPUTE];
8719bf215546Sopenharmony_ci
8720bf215546Sopenharmony_ci   struct radv_dispatch_info info = {0};
8721bf215546Sopenharmony_ci   info.unaligned = true;
8722bf215546Sopenharmony_ci
8723bf215546Sopenharmony_ci   uint64_t launch_size_va;
8724bf215546Sopenharmony_ci   uint64_t sbt_va;
8725bf215546Sopenharmony_ci
8726bf215546Sopenharmony_ci   if (mode != radv_rt_mode_indirect2) {
8727bf215546Sopenharmony_ci      uint32_t upload_size = mode == radv_rt_mode_direct
8728bf215546Sopenharmony_ci                                ? sizeof(VkTraceRaysIndirectCommand2KHR)
8729bf215546Sopenharmony_ci                                : offsetof(VkTraceRaysIndirectCommand2KHR, width);
8730bf215546Sopenharmony_ci
8731bf215546Sopenharmony_ci      uint32_t offset;
8732bf215546Sopenharmony_ci      if (!radv_cmd_buffer_upload_data(cmd_buffer, upload_size, tables, &offset))
8733bf215546Sopenharmony_ci         return;
8734bf215546Sopenharmony_ci
8735bf215546Sopenharmony_ci      uint64_t upload_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + offset;
8736bf215546Sopenharmony_ci
8737bf215546Sopenharmony_ci      launch_size_va = (mode == radv_rt_mode_direct)
8738bf215546Sopenharmony_ci                          ? upload_va + offsetof(VkTraceRaysIndirectCommand2KHR, width)
8739bf215546Sopenharmony_ci                          : indirect_va;
8740bf215546Sopenharmony_ci      sbt_va = upload_va;
8741bf215546Sopenharmony_ci   } else {
8742bf215546Sopenharmony_ci      launch_size_va = indirect_va + offsetof(VkTraceRaysIndirectCommand2KHR, width);
8743bf215546Sopenharmony_ci      sbt_va = indirect_va;
8744bf215546Sopenharmony_ci   }
8745bf215546Sopenharmony_ci
8746bf215546Sopenharmony_ci   if (mode == radv_rt_mode_direct) {
8747bf215546Sopenharmony_ci      info.blocks[0] = tables->width;
8748bf215546Sopenharmony_ci      info.blocks[1] = tables->height;
8749bf215546Sopenharmony_ci      info.blocks[2] = tables->depth;
8750bf215546Sopenharmony_ci   } else
8751bf215546Sopenharmony_ci      info.va = launch_size_va;
8752bf215546Sopenharmony_ci
8753bf215546Sopenharmony_ci   struct radv_userdata_info *desc_loc =
8754bf215546Sopenharmony_ci      radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_SBT_DESCRIPTORS);
8755bf215546Sopenharmony_ci   if (desc_loc->sgpr_idx != -1) {
8756bf215546Sopenharmony_ci      radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
8757bf215546Sopenharmony_ci                               base_reg + desc_loc->sgpr_idx * 4, sbt_va, true);
8758bf215546Sopenharmony_ci   }
8759bf215546Sopenharmony_ci
8760bf215546Sopenharmony_ci   struct radv_userdata_info *size_loc =
8761bf215546Sopenharmony_ci      radv_lookup_user_sgpr(&pipeline->base, MESA_SHADER_COMPUTE, AC_UD_CS_RAY_LAUNCH_SIZE_ADDR);
8762bf215546Sopenharmony_ci   if (size_loc->sgpr_idx != -1) {
8763bf215546Sopenharmony_ci      radv_emit_shader_pointer(cmd_buffer->device, cmd_buffer->cs,
8764bf215546Sopenharmony_ci                               base_reg + size_loc->sgpr_idx * 4, launch_size_va, true);
8765bf215546Sopenharmony_ci   }
8766bf215546Sopenharmony_ci
8767bf215546Sopenharmony_ci   radv_dispatch(cmd_buffer, &info, pipeline, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR);
8768bf215546Sopenharmony_ci}
8769bf215546Sopenharmony_ci
8770bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8771bf215546Sopenharmony_ciradv_CmdTraceRaysKHR(VkCommandBuffer commandBuffer,
8772bf215546Sopenharmony_ci                     const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
8773bf215546Sopenharmony_ci                     const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
8774bf215546Sopenharmony_ci                     const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
8775bf215546Sopenharmony_ci                     const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
8776bf215546Sopenharmony_ci                     uint32_t width, uint32_t height, uint32_t depth)
8777bf215546Sopenharmony_ci{
8778bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8779bf215546Sopenharmony_ci
8780bf215546Sopenharmony_ci   VkTraceRaysIndirectCommand2KHR tables = {
8781bf215546Sopenharmony_ci      .raygenShaderRecordAddress = pRaygenShaderBindingTable->deviceAddress,
8782bf215546Sopenharmony_ci      .raygenShaderRecordSize = pRaygenShaderBindingTable->size,
8783bf215546Sopenharmony_ci      .missShaderBindingTableAddress = pMissShaderBindingTable->deviceAddress,
8784bf215546Sopenharmony_ci      .missShaderBindingTableSize = pMissShaderBindingTable->size,
8785bf215546Sopenharmony_ci      .missShaderBindingTableStride = pMissShaderBindingTable->stride,
8786bf215546Sopenharmony_ci      .hitShaderBindingTableAddress = pHitShaderBindingTable->deviceAddress,
8787bf215546Sopenharmony_ci      .hitShaderBindingTableSize = pHitShaderBindingTable->size,
8788bf215546Sopenharmony_ci      .hitShaderBindingTableStride = pHitShaderBindingTable->stride,
8789bf215546Sopenharmony_ci      .callableShaderBindingTableAddress = pCallableShaderBindingTable->deviceAddress,
8790bf215546Sopenharmony_ci      .callableShaderBindingTableSize = pCallableShaderBindingTable->size,
8791bf215546Sopenharmony_ci      .callableShaderBindingTableStride = pCallableShaderBindingTable->stride,
8792bf215546Sopenharmony_ci      .width = width,
8793bf215546Sopenharmony_ci      .height = height,
8794bf215546Sopenharmony_ci      .depth = depth,
8795bf215546Sopenharmony_ci   };
8796bf215546Sopenharmony_ci
8797bf215546Sopenharmony_ci   radv_trace_rays(cmd_buffer, &tables, 0, radv_rt_mode_direct);
8798bf215546Sopenharmony_ci}
8799bf215546Sopenharmony_ci
8800bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8801bf215546Sopenharmony_ciradv_CmdTraceRaysIndirectKHR(VkCommandBuffer commandBuffer,
8802bf215546Sopenharmony_ci                             const VkStridedDeviceAddressRegionKHR *pRaygenShaderBindingTable,
8803bf215546Sopenharmony_ci                             const VkStridedDeviceAddressRegionKHR *pMissShaderBindingTable,
8804bf215546Sopenharmony_ci                             const VkStridedDeviceAddressRegionKHR *pHitShaderBindingTable,
8805bf215546Sopenharmony_ci                             const VkStridedDeviceAddressRegionKHR *pCallableShaderBindingTable,
8806bf215546Sopenharmony_ci                             VkDeviceAddress indirectDeviceAddress)
8807bf215546Sopenharmony_ci{
8808bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8809bf215546Sopenharmony_ci
8810bf215546Sopenharmony_ci   assert(cmd_buffer->device->use_global_bo_list);
8811bf215546Sopenharmony_ci
8812bf215546Sopenharmony_ci   VkTraceRaysIndirectCommand2KHR tables = {
8813bf215546Sopenharmony_ci      .raygenShaderRecordAddress = pRaygenShaderBindingTable->deviceAddress,
8814bf215546Sopenharmony_ci      .raygenShaderRecordSize = pRaygenShaderBindingTable->size,
8815bf215546Sopenharmony_ci      .missShaderBindingTableAddress = pMissShaderBindingTable->deviceAddress,
8816bf215546Sopenharmony_ci      .missShaderBindingTableSize = pMissShaderBindingTable->size,
8817bf215546Sopenharmony_ci      .missShaderBindingTableStride = pMissShaderBindingTable->stride,
8818bf215546Sopenharmony_ci      .hitShaderBindingTableAddress = pHitShaderBindingTable->deviceAddress,
8819bf215546Sopenharmony_ci      .hitShaderBindingTableSize = pHitShaderBindingTable->size,
8820bf215546Sopenharmony_ci      .hitShaderBindingTableStride = pHitShaderBindingTable->stride,
8821bf215546Sopenharmony_ci      .callableShaderBindingTableAddress = pCallableShaderBindingTable->deviceAddress,
8822bf215546Sopenharmony_ci      .callableShaderBindingTableSize = pCallableShaderBindingTable->size,
8823bf215546Sopenharmony_ci      .callableShaderBindingTableStride = pCallableShaderBindingTable->stride,
8824bf215546Sopenharmony_ci   };
8825bf215546Sopenharmony_ci
8826bf215546Sopenharmony_ci   radv_trace_rays(cmd_buffer, &tables, indirectDeviceAddress, radv_rt_mode_indirect);
8827bf215546Sopenharmony_ci}
8828bf215546Sopenharmony_ci
8829bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8830bf215546Sopenharmony_ciradv_CmdTraceRaysIndirect2KHR(VkCommandBuffer commandBuffer, VkDeviceAddress indirectDeviceAddress)
8831bf215546Sopenharmony_ci{
8832bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8833bf215546Sopenharmony_ci
8834bf215546Sopenharmony_ci   assert(cmd_buffer->device->use_global_bo_list);
8835bf215546Sopenharmony_ci
8836bf215546Sopenharmony_ci   radv_trace_rays(cmd_buffer, NULL, indirectDeviceAddress, radv_rt_mode_indirect2);
8837bf215546Sopenharmony_ci}
8838bf215546Sopenharmony_ci
8839bf215546Sopenharmony_cistatic void
8840bf215546Sopenharmony_ciradv_set_rt_stack_size(struct radv_cmd_buffer *cmd_buffer, uint32_t size)
8841bf215546Sopenharmony_ci{
8842bf215546Sopenharmony_ci   unsigned wave_size = 0;
8843bf215546Sopenharmony_ci   unsigned scratch_bytes_per_wave = 0;
8844bf215546Sopenharmony_ci
8845bf215546Sopenharmony_ci   if (cmd_buffer->state.rt_pipeline) {
8846bf215546Sopenharmony_ci      scratch_bytes_per_wave = cmd_buffer->state.rt_pipeline->base.scratch_bytes_per_wave;
8847bf215546Sopenharmony_ci      wave_size = cmd_buffer->state.rt_pipeline->base.shaders[MESA_SHADER_COMPUTE]->info.wave_size;
8848bf215546Sopenharmony_ci   }
8849bf215546Sopenharmony_ci
8850bf215546Sopenharmony_ci   /* The hardware register is specified as a multiple of 256 DWORDS. */
8851bf215546Sopenharmony_ci   scratch_bytes_per_wave += align(size * wave_size, 1024);
8852bf215546Sopenharmony_ci
8853bf215546Sopenharmony_ci   cmd_buffer->compute_scratch_size_per_wave_needed =
8854bf215546Sopenharmony_ci      MAX2(cmd_buffer->compute_scratch_size_per_wave_needed, scratch_bytes_per_wave);
8855bf215546Sopenharmony_ci}
8856bf215546Sopenharmony_ci
8857bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8858bf215546Sopenharmony_ciradv_CmdSetRayTracingPipelineStackSizeKHR(VkCommandBuffer commandBuffer, uint32_t size)
8859bf215546Sopenharmony_ci{
8860bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8861bf215546Sopenharmony_ci
8862bf215546Sopenharmony_ci   radv_set_rt_stack_size(cmd_buffer, size);
8863bf215546Sopenharmony_ci   cmd_buffer->state.rt_stack_size = size;
8864bf215546Sopenharmony_ci}
8865bf215546Sopenharmony_ci
8866bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8867bf215546Sopenharmony_ciradv_CmdEndRenderPass2(VkCommandBuffer commandBuffer, const VkSubpassEndInfo *pSubpassEndInfo)
8868bf215546Sopenharmony_ci{
8869bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8870bf215546Sopenharmony_ci
8871bf215546Sopenharmony_ci   radv_mark_noncoherent_rb(cmd_buffer);
8872bf215546Sopenharmony_ci
8873bf215546Sopenharmony_ci   radv_emit_subpass_barrier(cmd_buffer, &cmd_buffer->state.pass->end_barrier);
8874bf215546Sopenharmony_ci
8875bf215546Sopenharmony_ci   radv_cmd_buffer_end_subpass(cmd_buffer);
8876bf215546Sopenharmony_ci
8877bf215546Sopenharmony_ci   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.attachments);
8878bf215546Sopenharmony_ci   vk_free(&cmd_buffer->pool->vk.alloc, cmd_buffer->state.subpass_sample_locs);
8879bf215546Sopenharmony_ci
8880bf215546Sopenharmony_ci   cmd_buffer->state.pass = NULL;
8881bf215546Sopenharmony_ci   cmd_buffer->state.subpass = NULL;
8882bf215546Sopenharmony_ci   cmd_buffer->state.attachments = NULL;
8883bf215546Sopenharmony_ci   cmd_buffer->state.framebuffer = NULL;
8884bf215546Sopenharmony_ci   cmd_buffer->state.subpass_sample_locs = NULL;
8885bf215546Sopenharmony_ci}
8886bf215546Sopenharmony_ci
8887bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
8888bf215546Sopenharmony_ciradv_CmdBeginRendering(VkCommandBuffer commandBuffer, const VkRenderingInfo *pRenderingInfo)
8889bf215546Sopenharmony_ci{
8890bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
8891bf215546Sopenharmony_ci   const VkRenderingFragmentShadingRateAttachmentInfoKHR *vrs_info = vk_find_struct_const(
8892bf215546Sopenharmony_ci      pRenderingInfo->pNext, RENDERING_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR);
8893bf215546Sopenharmony_ci   VkResult result;
8894bf215546Sopenharmony_ci   /* (normal + resolve) for color attachments and ds and a VRS attachment */
8895bf215546Sopenharmony_ci   VkAttachmentDescription2 att_desc[MAX_RTS * 2 + 3];
8896bf215546Sopenharmony_ci   VkAttachmentDescriptionStencilLayout ds_stencil_att, ds_stencil_resolve_att;
8897bf215546Sopenharmony_ci   VkImageView iviews[MAX_RTS * 2 + 3];
8898bf215546Sopenharmony_ci   VkAttachmentReference2 color_refs[MAX_RTS], color_resolve_refs[MAX_RTS];
8899bf215546Sopenharmony_ci   VkAttachmentReference2 ds_ref, ds_resolve_ref, vrs_ref;
8900bf215546Sopenharmony_ci   VkAttachmentReferenceStencilLayout ds_stencil_ref, ds_stencil_resolve_ref;
8901bf215546Sopenharmony_ci   VkSubpassDescriptionDepthStencilResolve ds_resolve_info;
8902bf215546Sopenharmony_ci   VkFragmentShadingRateAttachmentInfoKHR vrs_subpass_info;
8903bf215546Sopenharmony_ci   VkClearValue clear_values[MAX_RTS * 2 + 3];
8904bf215546Sopenharmony_ci   unsigned att_count = 0;
8905bf215546Sopenharmony_ci
8906bf215546Sopenharmony_ci   VkSubpassDescription2 subpass = {
8907bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
8908bf215546Sopenharmony_ci      .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
8909bf215546Sopenharmony_ci      .viewMask = pRenderingInfo->viewMask,
8910bf215546Sopenharmony_ci      .colorAttachmentCount = pRenderingInfo->colorAttachmentCount,
8911bf215546Sopenharmony_ci      .pColorAttachments = color_refs,
8912bf215546Sopenharmony_ci      .pResolveAttachments = color_resolve_refs,
8913bf215546Sopenharmony_ci   };
8914bf215546Sopenharmony_ci
8915bf215546Sopenharmony_ci   for (unsigned i = 0; i < pRenderingInfo->colorAttachmentCount; ++i) {
8916bf215546Sopenharmony_ci      color_refs[i] = (VkAttachmentReference2){
8917bf215546Sopenharmony_ci         .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
8918bf215546Sopenharmony_ci         .attachment = VK_ATTACHMENT_UNUSED,
8919bf215546Sopenharmony_ci      };
8920bf215546Sopenharmony_ci      color_resolve_refs[i] = (VkAttachmentReference2){
8921bf215546Sopenharmony_ci         .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
8922bf215546Sopenharmony_ci         .attachment = VK_ATTACHMENT_UNUSED,
8923bf215546Sopenharmony_ci      };
8924bf215546Sopenharmony_ci
8925bf215546Sopenharmony_ci      if (pRenderingInfo->pColorAttachments[i].imageView == VK_NULL_HANDLE)
8926bf215546Sopenharmony_ci         continue;
8927bf215546Sopenharmony_ci
8928bf215546Sopenharmony_ci      const VkRenderingAttachmentInfo *info = &pRenderingInfo->pColorAttachments[i];
8929bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_image_view, iview, info->imageView);
8930bf215546Sopenharmony_ci      color_refs[i] = (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
8931bf215546Sopenharmony_ci                                               .attachment = att_count,
8932bf215546Sopenharmony_ci                                               .layout = info->imageLayout,
8933bf215546Sopenharmony_ci                                               .aspectMask = iview->vk.aspects};
8934bf215546Sopenharmony_ci
8935bf215546Sopenharmony_ci      iviews[att_count] = info->imageView;
8936bf215546Sopenharmony_ci      clear_values[att_count] = info->clearValue;
8937bf215546Sopenharmony_ci      VkAttachmentDescription2 *att = att_desc + att_count++;
8938bf215546Sopenharmony_ci
8939bf215546Sopenharmony_ci      memset(att, 0, sizeof(*att));
8940bf215546Sopenharmony_ci      att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
8941bf215546Sopenharmony_ci      att->format = iview->vk.format;
8942bf215546Sopenharmony_ci      att->samples = iview->image->info.samples;
8943bf215546Sopenharmony_ci      att->loadOp = info->loadOp;
8944bf215546Sopenharmony_ci      att->storeOp = info->storeOp;
8945bf215546Sopenharmony_ci      att->initialLayout = info->imageLayout;
8946bf215546Sopenharmony_ci      att->finalLayout = info->imageLayout;
8947bf215546Sopenharmony_ci
8948bf215546Sopenharmony_ci      if (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT)
8949bf215546Sopenharmony_ci         att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
8950bf215546Sopenharmony_ci
8951bf215546Sopenharmony_ci      if (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT)
8952bf215546Sopenharmony_ci         att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
8953bf215546Sopenharmony_ci
8954bf215546Sopenharmony_ci      if (info->resolveMode != VK_RESOLVE_MODE_NONE &&
8955bf215546Sopenharmony_ci          !(pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT)) {
8956bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_image_view, resolve_iview, info->resolveImageView);
8957bf215546Sopenharmony_ci         color_resolve_refs[i] =
8958bf215546Sopenharmony_ci            (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
8959bf215546Sopenharmony_ci                                     .attachment = att_count,
8960bf215546Sopenharmony_ci                                     .layout = info->resolveImageLayout,
8961bf215546Sopenharmony_ci                                     .aspectMask = resolve_iview->vk.aspects};
8962bf215546Sopenharmony_ci
8963bf215546Sopenharmony_ci         iviews[att_count] = info->resolveImageView;
8964bf215546Sopenharmony_ci         att = att_desc + att_count++;
8965bf215546Sopenharmony_ci
8966bf215546Sopenharmony_ci         memset(att, 0, sizeof(*att));
8967bf215546Sopenharmony_ci         att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
8968bf215546Sopenharmony_ci         att->format = resolve_iview->vk.format;
8969bf215546Sopenharmony_ci         att->samples = resolve_iview->image->info.samples;
8970bf215546Sopenharmony_ci         att->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
8971bf215546Sopenharmony_ci         att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
8972bf215546Sopenharmony_ci         att->initialLayout = info->resolveImageLayout;
8973bf215546Sopenharmony_ci         att->finalLayout = info->resolveImageLayout;
8974bf215546Sopenharmony_ci      }
8975bf215546Sopenharmony_ci   }
8976bf215546Sopenharmony_ci
8977bf215546Sopenharmony_ci   if (pRenderingInfo->pDepthAttachment || pRenderingInfo->pStencilAttachment) {
8978bf215546Sopenharmony_ci      const VkRenderingAttachmentInfo *common_info = pRenderingInfo->pDepthAttachment
8979bf215546Sopenharmony_ci                                                           ? pRenderingInfo->pDepthAttachment
8980bf215546Sopenharmony_ci                                                           : pRenderingInfo->pStencilAttachment;
8981bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_image_view, iview, common_info->imageView);
8982bf215546Sopenharmony_ci
8983bf215546Sopenharmony_ci      if (common_info->imageView != VK_NULL_HANDLE) {
8984bf215546Sopenharmony_ci         ds_ref = (VkAttachmentReference2){
8985bf215546Sopenharmony_ci            .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
8986bf215546Sopenharmony_ci            .attachment = att_count,
8987bf215546Sopenharmony_ci            .layout = common_info->imageLayout,
8988bf215546Sopenharmony_ci            .aspectMask = (pRenderingInfo->pDepthAttachment ? VK_IMAGE_ASPECT_DEPTH_BIT : 0) |
8989bf215546Sopenharmony_ci                          (pRenderingInfo->pStencilAttachment ? VK_IMAGE_ASPECT_STENCIL_BIT : 0)};
8990bf215546Sopenharmony_ci         subpass.pDepthStencilAttachment = &ds_ref;
8991bf215546Sopenharmony_ci
8992bf215546Sopenharmony_ci         iviews[att_count] = common_info->imageView;
8993bf215546Sopenharmony_ci         if (pRenderingInfo->pDepthAttachment)
8994bf215546Sopenharmony_ci            clear_values[att_count].depthStencil.depth =
8995bf215546Sopenharmony_ci               pRenderingInfo->pDepthAttachment->clearValue.depthStencil.depth;
8996bf215546Sopenharmony_ci         if (pRenderingInfo->pStencilAttachment)
8997bf215546Sopenharmony_ci            clear_values[att_count].depthStencil.stencil =
8998bf215546Sopenharmony_ci               pRenderingInfo->pStencilAttachment->clearValue.depthStencil.stencil;
8999bf215546Sopenharmony_ci         VkAttachmentDescription2 *att = att_desc + att_count++;
9000bf215546Sopenharmony_ci
9001bf215546Sopenharmony_ci         memset(att, 0, sizeof(*att));
9002bf215546Sopenharmony_ci         att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
9003bf215546Sopenharmony_ci         att->format = iview->vk.format;
9004bf215546Sopenharmony_ci         att->samples = iview->image->info.samples;
9005bf215546Sopenharmony_ci
9006bf215546Sopenharmony_ci         if (pRenderingInfo->pDepthAttachment) {
9007bf215546Sopenharmony_ci            att->loadOp = pRenderingInfo->pDepthAttachment->loadOp;
9008bf215546Sopenharmony_ci            att->storeOp = pRenderingInfo->pDepthAttachment->storeOp;
9009bf215546Sopenharmony_ci         } else {
9010bf215546Sopenharmony_ci            att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
9011bf215546Sopenharmony_ci            att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
9012bf215546Sopenharmony_ci         }
9013bf215546Sopenharmony_ci
9014bf215546Sopenharmony_ci         if (pRenderingInfo->pStencilAttachment) {
9015bf215546Sopenharmony_ci            att->stencilLoadOp = pRenderingInfo->pStencilAttachment->loadOp;
9016bf215546Sopenharmony_ci            att->stencilStoreOp = pRenderingInfo->pStencilAttachment->storeOp;
9017bf215546Sopenharmony_ci         } else {
9018bf215546Sopenharmony_ci            att->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
9019bf215546Sopenharmony_ci            att->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
9020bf215546Sopenharmony_ci         }
9021bf215546Sopenharmony_ci
9022bf215546Sopenharmony_ci         if (pRenderingInfo->flags & VK_RENDERING_RESUMING_BIT) {
9023bf215546Sopenharmony_ci            att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
9024bf215546Sopenharmony_ci            att->stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
9025bf215546Sopenharmony_ci         }
9026bf215546Sopenharmony_ci
9027bf215546Sopenharmony_ci         if (pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT) {
9028bf215546Sopenharmony_ci            att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
9029bf215546Sopenharmony_ci            att->stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;
9030bf215546Sopenharmony_ci         }
9031bf215546Sopenharmony_ci
9032bf215546Sopenharmony_ci         att->initialLayout = common_info->imageLayout;
9033bf215546Sopenharmony_ci         att->finalLayout = common_info->imageLayout;
9034bf215546Sopenharmony_ci
9035bf215546Sopenharmony_ci         if (pRenderingInfo->pDepthAttachment && pRenderingInfo->pStencilAttachment) {
9036bf215546Sopenharmony_ci            ds_ref.pNext = &ds_stencil_ref;
9037bf215546Sopenharmony_ci            ds_stencil_ref = (VkAttachmentReferenceStencilLayout){
9038bf215546Sopenharmony_ci               .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT,
9039bf215546Sopenharmony_ci               .stencilLayout = pRenderingInfo->pStencilAttachment->imageLayout};
9040bf215546Sopenharmony_ci
9041bf215546Sopenharmony_ci            att->pNext = &ds_stencil_att;
9042bf215546Sopenharmony_ci            ds_stencil_att = (VkAttachmentDescriptionStencilLayout){
9043bf215546Sopenharmony_ci               .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT,
9044bf215546Sopenharmony_ci               .stencilInitialLayout = pRenderingInfo->pStencilAttachment->imageLayout,
9045bf215546Sopenharmony_ci               .stencilFinalLayout = pRenderingInfo->pStencilAttachment->imageLayout,
9046bf215546Sopenharmony_ci            };
9047bf215546Sopenharmony_ci         }
9048bf215546Sopenharmony_ci
9049bf215546Sopenharmony_ci         if (((pRenderingInfo->pDepthAttachment &&
9050bf215546Sopenharmony_ci              pRenderingInfo->pDepthAttachment->resolveMode != VK_RESOLVE_MODE_NONE) ||
9051bf215546Sopenharmony_ci             (pRenderingInfo->pStencilAttachment &&
9052bf215546Sopenharmony_ci              pRenderingInfo->pStencilAttachment->resolveMode != VK_RESOLVE_MODE_NONE)) &&
9053bf215546Sopenharmony_ci             !(pRenderingInfo->flags & VK_RENDERING_SUSPENDING_BIT)) {
9054bf215546Sopenharmony_ci            RADV_FROM_HANDLE(radv_image_view, resolve_iview, common_info->resolveImageView);
9055bf215546Sopenharmony_ci            ds_resolve_ref =
9056bf215546Sopenharmony_ci               (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
9057bf215546Sopenharmony_ci                                        .attachment = att_count,
9058bf215546Sopenharmony_ci                                        .layout = common_info->resolveImageLayout,
9059bf215546Sopenharmony_ci                                        .aspectMask = resolve_iview->vk.aspects};
9060bf215546Sopenharmony_ci
9061bf215546Sopenharmony_ci            iviews[att_count] = common_info->resolveImageView;
9062bf215546Sopenharmony_ci            att = att_desc + att_count++;
9063bf215546Sopenharmony_ci
9064bf215546Sopenharmony_ci            memset(att, 0, sizeof(*att));
9065bf215546Sopenharmony_ci            att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
9066bf215546Sopenharmony_ci            att->format = resolve_iview->vk.format;
9067bf215546Sopenharmony_ci            att->samples = resolve_iview->image->info.samples;
9068bf215546Sopenharmony_ci            att->loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
9069bf215546Sopenharmony_ci            att->storeOp = VK_ATTACHMENT_STORE_OP_STORE;
9070bf215546Sopenharmony_ci            att->initialLayout = common_info->resolveImageLayout;
9071bf215546Sopenharmony_ci            att->finalLayout = common_info->resolveImageLayout;
9072bf215546Sopenharmony_ci
9073bf215546Sopenharmony_ci            ds_resolve_info = (VkSubpassDescriptionDepthStencilResolve){
9074bf215546Sopenharmony_ci               .sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE,
9075bf215546Sopenharmony_ci               .pNext = subpass.pNext,
9076bf215546Sopenharmony_ci               .depthResolveMode =
9077bf215546Sopenharmony_ci                  (pRenderingInfo->pDepthAttachment &&
9078bf215546Sopenharmony_ci                   pRenderingInfo->pDepthAttachment->resolveMode != VK_RESOLVE_MODE_NONE)
9079bf215546Sopenharmony_ci                     ? pRenderingInfo->pDepthAttachment->resolveMode
9080bf215546Sopenharmony_ci                     : VK_RESOLVE_MODE_NONE,
9081bf215546Sopenharmony_ci               .stencilResolveMode =
9082bf215546Sopenharmony_ci                  (pRenderingInfo->pStencilAttachment &&
9083bf215546Sopenharmony_ci                   pRenderingInfo->pStencilAttachment->resolveMode != VK_RESOLVE_MODE_NONE)
9084bf215546Sopenharmony_ci                     ? pRenderingInfo->pStencilAttachment->resolveMode
9085bf215546Sopenharmony_ci                     : VK_RESOLVE_MODE_NONE,
9086bf215546Sopenharmony_ci               .pDepthStencilResolveAttachment = &ds_resolve_ref};
9087bf215546Sopenharmony_ci            subpass.pNext = &ds_resolve_info;
9088bf215546Sopenharmony_ci
9089bf215546Sopenharmony_ci            if (pRenderingInfo->pDepthAttachment && pRenderingInfo->pStencilAttachment &&
9090bf215546Sopenharmony_ci                pRenderingInfo->pDepthAttachment->resolveMode != VK_RESOLVE_MODE_NONE &&
9091bf215546Sopenharmony_ci                pRenderingInfo->pStencilAttachment->resolveMode != VK_RESOLVE_MODE_NONE) {
9092bf215546Sopenharmony_ci               ds_resolve_ref.pNext = &ds_stencil_resolve_ref;
9093bf215546Sopenharmony_ci               ds_stencil_resolve_ref = (VkAttachmentReferenceStencilLayout){
9094bf215546Sopenharmony_ci                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_STENCIL_LAYOUT,
9095bf215546Sopenharmony_ci                  .stencilLayout = pRenderingInfo->pStencilAttachment->resolveImageLayout};
9096bf215546Sopenharmony_ci
9097bf215546Sopenharmony_ci               att->pNext = &ds_stencil_resolve_att;
9098bf215546Sopenharmony_ci               ds_stencil_resolve_att = (VkAttachmentDescriptionStencilLayout){
9099bf215546Sopenharmony_ci                  .sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_STENCIL_LAYOUT,
9100bf215546Sopenharmony_ci                  .stencilInitialLayout = pRenderingInfo->pStencilAttachment->resolveImageLayout,
9101bf215546Sopenharmony_ci                  .stencilFinalLayout = pRenderingInfo->pStencilAttachment->resolveImageLayout,
9102bf215546Sopenharmony_ci               };
9103bf215546Sopenharmony_ci            }
9104bf215546Sopenharmony_ci         }
9105bf215546Sopenharmony_ci      }
9106bf215546Sopenharmony_ci   }
9107bf215546Sopenharmony_ci
9108bf215546Sopenharmony_ci   if (vrs_info && vrs_info->imageView) {
9109bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_image_view, iview, vrs_info->imageView);
9110bf215546Sopenharmony_ci      vrs_ref = (VkAttachmentReference2){.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
9111bf215546Sopenharmony_ci                                         .attachment = att_count,
9112bf215546Sopenharmony_ci                                         .layout = vrs_info->imageLayout,
9113bf215546Sopenharmony_ci                                         .aspectMask = iview->vk.aspects};
9114bf215546Sopenharmony_ci
9115bf215546Sopenharmony_ci      iviews[att_count] = vrs_info->imageView;
9116bf215546Sopenharmony_ci      VkAttachmentDescription2 *att = att_desc + att_count++;
9117bf215546Sopenharmony_ci
9118bf215546Sopenharmony_ci      memset(att, 0, sizeof(*att));
9119bf215546Sopenharmony_ci      att->sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
9120bf215546Sopenharmony_ci      att->format = iview->vk.format;
9121bf215546Sopenharmony_ci      att->samples = iview->image->info.samples;
9122bf215546Sopenharmony_ci      att->loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
9123bf215546Sopenharmony_ci      att->storeOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
9124bf215546Sopenharmony_ci      att->initialLayout = vrs_info->imageLayout;
9125bf215546Sopenharmony_ci      att->finalLayout = vrs_info->imageLayout;
9126bf215546Sopenharmony_ci
9127bf215546Sopenharmony_ci      vrs_subpass_info = (VkFragmentShadingRateAttachmentInfoKHR){
9128bf215546Sopenharmony_ci         .sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR,
9129bf215546Sopenharmony_ci         .pNext = subpass.pNext,
9130bf215546Sopenharmony_ci         .pFragmentShadingRateAttachment = &vrs_ref,
9131bf215546Sopenharmony_ci         .shadingRateAttachmentTexelSize = vrs_info->shadingRateAttachmentTexelSize,
9132bf215546Sopenharmony_ci      };
9133bf215546Sopenharmony_ci      subpass.pNext = &vrs_subpass_info;
9134bf215546Sopenharmony_ci   }
9135bf215546Sopenharmony_ci
9136bf215546Sopenharmony_ci   VkRenderPassCreateInfo2 rp_create_info = {
9137bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
9138bf215546Sopenharmony_ci      .attachmentCount = att_count,
9139bf215546Sopenharmony_ci      .pAttachments = att_desc,
9140bf215546Sopenharmony_ci      .subpassCount = 1,
9141bf215546Sopenharmony_ci      .pSubpasses = &subpass,
9142bf215546Sopenharmony_ci   };
9143bf215546Sopenharmony_ci
9144bf215546Sopenharmony_ci   VkRenderPass rp;
9145bf215546Sopenharmony_ci   result =
9146bf215546Sopenharmony_ci      radv_CreateRenderPass2(radv_device_to_handle(cmd_buffer->device), &rp_create_info, NULL, &rp);
9147bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
9148bf215546Sopenharmony_ci      cmd_buffer->record_result = result;
9149bf215546Sopenharmony_ci      return;
9150bf215546Sopenharmony_ci   }
9151bf215546Sopenharmony_ci
9152bf215546Sopenharmony_ci   unsigned w = pRenderingInfo->renderArea.offset.x + pRenderingInfo->renderArea.extent.width;
9153bf215546Sopenharmony_ci   unsigned h = pRenderingInfo->renderArea.offset.y + pRenderingInfo->renderArea.extent.height;
9154bf215546Sopenharmony_ci   for (unsigned i = 0; i < att_count; ++i) {
9155bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_image_view, iview, iviews[i]);
9156bf215546Sopenharmony_ci
9157bf215546Sopenharmony_ci      if (vrs_info && vrs_info->imageView == iviews[i])
9158bf215546Sopenharmony_ci         continue;
9159bf215546Sopenharmony_ci
9160bf215546Sopenharmony_ci      w = MIN2(w, iview->extent.width);
9161bf215546Sopenharmony_ci      h = MIN2(h, iview->extent.height);
9162bf215546Sopenharmony_ci   }
9163bf215546Sopenharmony_ci   VkFramebufferCreateInfo fb_create_info = {
9164bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
9165bf215546Sopenharmony_ci      .renderPass = rp,
9166bf215546Sopenharmony_ci      .attachmentCount = att_count,
9167bf215546Sopenharmony_ci      .pAttachments = iviews,
9168bf215546Sopenharmony_ci      .width = w,
9169bf215546Sopenharmony_ci      .height = h,
9170bf215546Sopenharmony_ci      .layers = pRenderingInfo->layerCount,
9171bf215546Sopenharmony_ci   };
9172bf215546Sopenharmony_ci
9173bf215546Sopenharmony_ci   VkFramebuffer fb;
9174bf215546Sopenharmony_ci   result =
9175bf215546Sopenharmony_ci      vk_common_CreateFramebuffer(radv_device_to_handle(cmd_buffer->device), &fb_create_info, NULL, &fb);
9176bf215546Sopenharmony_ci   if (result != VK_SUCCESS) {
9177bf215546Sopenharmony_ci      radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device), rp, NULL);
9178bf215546Sopenharmony_ci      cmd_buffer->record_result = result;
9179bf215546Sopenharmony_ci      return;
9180bf215546Sopenharmony_ci   }
9181bf215546Sopenharmony_ci
9182bf215546Sopenharmony_ci   VkRenderPassBeginInfo begin_info = {.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
9183bf215546Sopenharmony_ci                                       .renderPass = rp,
9184bf215546Sopenharmony_ci                                       .framebuffer = fb,
9185bf215546Sopenharmony_ci                                       .renderArea = pRenderingInfo->renderArea,
9186bf215546Sopenharmony_ci                                       .clearValueCount = att_count,
9187bf215546Sopenharmony_ci                                       .pClearValues = clear_values};
9188bf215546Sopenharmony_ci
9189bf215546Sopenharmony_ci   const VkSubpassBeginInfo pass_begin_info = {
9190bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_SUBPASS_BEGIN_INFO,
9191bf215546Sopenharmony_ci      .contents = (pRenderingInfo->flags & VK_RENDERING_CONTENTS_SECONDARY_COMMAND_BUFFERS_BIT)
9192bf215546Sopenharmony_ci                     ? VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS
9193bf215546Sopenharmony_ci                     : VK_SUBPASS_CONTENTS_INLINE,
9194bf215546Sopenharmony_ci   };
9195bf215546Sopenharmony_ci
9196bf215546Sopenharmony_ci   radv_CmdBeginRenderPass2(commandBuffer, &begin_info, &pass_begin_info);
9197bf215546Sopenharmony_ci}
9198bf215546Sopenharmony_ci
9199bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9200bf215546Sopenharmony_ciradv_CmdEndRendering(VkCommandBuffer commandBuffer)
9201bf215546Sopenharmony_ci{
9202bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9203bf215546Sopenharmony_ci   struct radv_render_pass *pass = cmd_buffer->state.pass;
9204bf215546Sopenharmony_ci   struct vk_framebuffer *framebuffer = cmd_buffer->state.framebuffer;
9205bf215546Sopenharmony_ci
9206bf215546Sopenharmony_ci   radv_CmdEndRenderPass2(commandBuffer, NULL);
9207bf215546Sopenharmony_ci
9208bf215546Sopenharmony_ci   vk_common_DestroyFramebuffer(radv_device_to_handle(cmd_buffer->device),
9209bf215546Sopenharmony_ci                                vk_framebuffer_to_handle(framebuffer), NULL);
9210bf215546Sopenharmony_ci   radv_DestroyRenderPass(radv_device_to_handle(cmd_buffer->device),
9211bf215546Sopenharmony_ci                          radv_render_pass_to_handle(pass), NULL);
9212bf215546Sopenharmony_ci}
9213bf215546Sopenharmony_ci
9214bf215546Sopenharmony_ci/*
9215bf215546Sopenharmony_ci * For HTILE we have the following interesting clear words:
9216bf215546Sopenharmony_ci *   0xfffff30f: Uncompressed, full depth range, for depth+stencil HTILE
9217bf215546Sopenharmony_ci *   0xfffc000f: Uncompressed, full depth range, for depth only HTILE.
9218bf215546Sopenharmony_ci *   0xfffffff0: Clear depth to 1.0
9219bf215546Sopenharmony_ci *   0x00000000: Clear depth to 0.0
9220bf215546Sopenharmony_ci */
9221bf215546Sopenharmony_cistatic void
9222bf215546Sopenharmony_ciradv_initialize_htile(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9223bf215546Sopenharmony_ci                      const VkImageSubresourceRange *range)
9224bf215546Sopenharmony_ci{
9225bf215546Sopenharmony_ci   struct radv_cmd_state *state = &cmd_buffer->state;
9226bf215546Sopenharmony_ci   uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, image);
9227bf215546Sopenharmony_ci   VkClearDepthStencilValue value = {0};
9228bf215546Sopenharmony_ci   struct radv_barrier_data barrier = {0};
9229bf215546Sopenharmony_ci
9230bf215546Sopenharmony_ci   barrier.layout_transitions.init_mask_ram = 1;
9231bf215546Sopenharmony_ci   radv_describe_layout_transition(cmd_buffer, &barrier);
9232bf215546Sopenharmony_ci
9233bf215546Sopenharmony_ci   /* Transitioning from LAYOUT_UNDEFINED layout not everyone is consistent
9234bf215546Sopenharmony_ci    * in considering previous rendering work for WAW hazards. */
9235bf215546Sopenharmony_ci   state->flush_bits |=
9236bf215546Sopenharmony_ci      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, image);
9237bf215546Sopenharmony_ci
9238bf215546Sopenharmony_ci   if (image->planes[0].surface.has_stencil &&
9239bf215546Sopenharmony_ci       !(range->aspectMask == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {
9240bf215546Sopenharmony_ci      /* Flush caches before performing a separate aspect initialization because it's a
9241bf215546Sopenharmony_ci       * read-modify-write operation.
9242bf215546Sopenharmony_ci       */
9243bf215546Sopenharmony_ci      state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_READ_BIT, image);
9244bf215546Sopenharmony_ci   }
9245bf215546Sopenharmony_ci
9246bf215546Sopenharmony_ci   state->flush_bits |= radv_clear_htile(cmd_buffer, image, range, htile_value);
9247bf215546Sopenharmony_ci
9248bf215546Sopenharmony_ci   radv_set_ds_clear_metadata(cmd_buffer, image, range, value, range->aspectMask);
9249bf215546Sopenharmony_ci
9250bf215546Sopenharmony_ci   if (radv_image_is_tc_compat_htile(image) && (range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT)) {
9251bf215546Sopenharmony_ci      /* Initialize the TC-compat metada value to 0 because by
9252bf215546Sopenharmony_ci       * default DB_Z_INFO.RANGE_PRECISION is set to 1, and we only
9253bf215546Sopenharmony_ci       * need have to conditionally update its value when performing
9254bf215546Sopenharmony_ci       * a fast depth clear.
9255bf215546Sopenharmony_ci       */
9256bf215546Sopenharmony_ci      radv_set_tc_compat_zrange_metadata(cmd_buffer, image, range, 0);
9257bf215546Sopenharmony_ci   }
9258bf215546Sopenharmony_ci}
9259bf215546Sopenharmony_ci
9260bf215546Sopenharmony_cistatic void
9261bf215546Sopenharmony_ciradv_handle_depth_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9262bf215546Sopenharmony_ci                                   VkImageLayout src_layout, bool src_render_loop,
9263bf215546Sopenharmony_ci                                   VkImageLayout dst_layout, bool dst_render_loop,
9264bf215546Sopenharmony_ci                                   unsigned src_queue_mask, unsigned dst_queue_mask,
9265bf215546Sopenharmony_ci                                   const VkImageSubresourceRange *range,
9266bf215546Sopenharmony_ci                                   struct radv_sample_locations_state *sample_locs)
9267bf215546Sopenharmony_ci{
9268bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
9269bf215546Sopenharmony_ci
9270bf215546Sopenharmony_ci   if (!radv_htile_enabled(image, range->baseMipLevel))
9271bf215546Sopenharmony_ci      return;
9272bf215546Sopenharmony_ci
9273bf215546Sopenharmony_ci   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
9274bf215546Sopenharmony_ci      radv_initialize_htile(cmd_buffer, image, range);
9275bf215546Sopenharmony_ci   } else if (!radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop,
9276bf215546Sopenharmony_ci                                               src_queue_mask) &&
9277bf215546Sopenharmony_ci              radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop,
9278bf215546Sopenharmony_ci                                              dst_queue_mask)) {
9279bf215546Sopenharmony_ci      radv_initialize_htile(cmd_buffer, image, range);
9280bf215546Sopenharmony_ci   } else if (radv_layout_is_htile_compressed(device, image, src_layout, src_render_loop,
9281bf215546Sopenharmony_ci                                              src_queue_mask) &&
9282bf215546Sopenharmony_ci              !radv_layout_is_htile_compressed(device, image, dst_layout, dst_render_loop,
9283bf215546Sopenharmony_ci                                               dst_queue_mask)) {
9284bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
9285bf215546Sopenharmony_ci         RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
9286bf215546Sopenharmony_ci
9287bf215546Sopenharmony_ci      radv_expand_depth_stencil(cmd_buffer, image, range, sample_locs);
9288bf215546Sopenharmony_ci
9289bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
9290bf215546Sopenharmony_ci         RADV_CMD_FLAG_FLUSH_AND_INV_DB | RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;
9291bf215546Sopenharmony_ci   }
9292bf215546Sopenharmony_ci}
9293bf215546Sopenharmony_ci
9294bf215546Sopenharmony_cistatic uint32_t
9295bf215546Sopenharmony_ciradv_init_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9296bf215546Sopenharmony_ci                const VkImageSubresourceRange *range, uint32_t value)
9297bf215546Sopenharmony_ci{
9298bf215546Sopenharmony_ci   struct radv_barrier_data barrier = {0};
9299bf215546Sopenharmony_ci
9300bf215546Sopenharmony_ci   barrier.layout_transitions.init_mask_ram = 1;
9301bf215546Sopenharmony_ci   radv_describe_layout_transition(cmd_buffer, &barrier);
9302bf215546Sopenharmony_ci
9303bf215546Sopenharmony_ci   return radv_clear_cmask(cmd_buffer, image, range, value);
9304bf215546Sopenharmony_ci}
9305bf215546Sopenharmony_ci
9306bf215546Sopenharmony_ciuint32_t
9307bf215546Sopenharmony_ciradv_init_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9308bf215546Sopenharmony_ci                const VkImageSubresourceRange *range)
9309bf215546Sopenharmony_ci{
9310bf215546Sopenharmony_ci   static const uint32_t fmask_clear_values[4] = {0x00000000, 0x02020202, 0xE4E4E4E4, 0x76543210};
9311bf215546Sopenharmony_ci   uint32_t log2_samples = util_logbase2(image->info.samples);
9312bf215546Sopenharmony_ci   uint32_t value = fmask_clear_values[log2_samples];
9313bf215546Sopenharmony_ci   struct radv_barrier_data barrier = {0};
9314bf215546Sopenharmony_ci
9315bf215546Sopenharmony_ci   barrier.layout_transitions.init_mask_ram = 1;
9316bf215546Sopenharmony_ci   radv_describe_layout_transition(cmd_buffer, &barrier);
9317bf215546Sopenharmony_ci
9318bf215546Sopenharmony_ci   return radv_clear_fmask(cmd_buffer, image, range, value);
9319bf215546Sopenharmony_ci}
9320bf215546Sopenharmony_ci
9321bf215546Sopenharmony_ciuint32_t
9322bf215546Sopenharmony_ciradv_init_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9323bf215546Sopenharmony_ci              const VkImageSubresourceRange *range, uint32_t value)
9324bf215546Sopenharmony_ci{
9325bf215546Sopenharmony_ci   struct radv_barrier_data barrier = {0};
9326bf215546Sopenharmony_ci   uint32_t flush_bits = 0;
9327bf215546Sopenharmony_ci   unsigned size = 0;
9328bf215546Sopenharmony_ci
9329bf215546Sopenharmony_ci   barrier.layout_transitions.init_mask_ram = 1;
9330bf215546Sopenharmony_ci   radv_describe_layout_transition(cmd_buffer, &barrier);
9331bf215546Sopenharmony_ci
9332bf215546Sopenharmony_ci   flush_bits |= radv_clear_dcc(cmd_buffer, image, range, value);
9333bf215546Sopenharmony_ci
9334bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX8) {
9335bf215546Sopenharmony_ci      /* When DCC is enabled with mipmaps, some levels might not
9336bf215546Sopenharmony_ci       * support fast clears and we have to initialize them as "fully
9337bf215546Sopenharmony_ci       * expanded".
9338bf215546Sopenharmony_ci       */
9339bf215546Sopenharmony_ci      /* Compute the size of all fast clearable DCC levels. */
9340bf215546Sopenharmony_ci      for (unsigned i = 0; i < image->planes[0].surface.num_meta_levels; i++) {
9341bf215546Sopenharmony_ci         struct legacy_surf_dcc_level *dcc_level = &image->planes[0].surface.u.legacy.color.dcc_level[i];
9342bf215546Sopenharmony_ci         unsigned dcc_fast_clear_size =
9343bf215546Sopenharmony_ci            dcc_level->dcc_slice_fast_clear_size * image->info.array_size;
9344bf215546Sopenharmony_ci
9345bf215546Sopenharmony_ci         if (!dcc_fast_clear_size)
9346bf215546Sopenharmony_ci            break;
9347bf215546Sopenharmony_ci
9348bf215546Sopenharmony_ci         size = dcc_level->dcc_offset + dcc_fast_clear_size;
9349bf215546Sopenharmony_ci      }
9350bf215546Sopenharmony_ci
9351bf215546Sopenharmony_ci      /* Initialize the mipmap levels without DCC. */
9352bf215546Sopenharmony_ci      if (size != image->planes[0].surface.meta_size) {
9353bf215546Sopenharmony_ci         flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bindings[0].bo,
9354bf215546Sopenharmony_ci                                        radv_buffer_get_va(image->bindings[0].bo) +
9355bf215546Sopenharmony_ci                                           image->bindings[0].offset +
9356bf215546Sopenharmony_ci                                           image->planes[0].surface.meta_offset + size,
9357bf215546Sopenharmony_ci                                        image->planes[0].surface.meta_size - size, 0xffffffff);
9358bf215546Sopenharmony_ci      }
9359bf215546Sopenharmony_ci   }
9360bf215546Sopenharmony_ci
9361bf215546Sopenharmony_ci   return flush_bits;
9362bf215546Sopenharmony_ci}
9363bf215546Sopenharmony_ci
9364bf215546Sopenharmony_ci/**
9365bf215546Sopenharmony_ci * Initialize DCC/FMASK/CMASK metadata for a color image.
9366bf215546Sopenharmony_ci */
9367bf215546Sopenharmony_cistatic void
9368bf215546Sopenharmony_ciradv_init_color_image_metadata(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9369bf215546Sopenharmony_ci                               VkImageLayout src_layout, bool src_render_loop,
9370bf215546Sopenharmony_ci                               VkImageLayout dst_layout, bool dst_render_loop,
9371bf215546Sopenharmony_ci                               unsigned src_queue_mask, unsigned dst_queue_mask,
9372bf215546Sopenharmony_ci                               const VkImageSubresourceRange *range)
9373bf215546Sopenharmony_ci{
9374bf215546Sopenharmony_ci   uint32_t flush_bits = 0;
9375bf215546Sopenharmony_ci
9376bf215546Sopenharmony_ci   /* Transitioning from LAYOUT_UNDEFINED layout not everyone is
9377bf215546Sopenharmony_ci    * consistent in considering previous rendering work for WAW hazards.
9378bf215546Sopenharmony_ci    */
9379bf215546Sopenharmony_ci   cmd_buffer->state.flush_bits |=
9380bf215546Sopenharmony_ci      radv_src_access_flush(cmd_buffer, VK_ACCESS_2_COLOR_ATTACHMENT_WRITE_BIT, image);
9381bf215546Sopenharmony_ci
9382bf215546Sopenharmony_ci   if (radv_image_has_cmask(image)) {
9383bf215546Sopenharmony_ci      uint32_t value;
9384bf215546Sopenharmony_ci
9385bf215546Sopenharmony_ci      if (cmd_buffer->device->physical_device->rad_info.gfx_level == GFX9) {
9386bf215546Sopenharmony_ci         /* TODO: Fix clearing CMASK layers on GFX9. */
9387bf215546Sopenharmony_ci         if (radv_image_is_tc_compat_cmask(image) ||
9388bf215546Sopenharmony_ci             (radv_image_has_fmask(image) &&
9389bf215546Sopenharmony_ci              radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel, dst_layout,
9390bf215546Sopenharmony_ci                                         dst_render_loop, dst_queue_mask))) {
9391bf215546Sopenharmony_ci            value = 0xccccccccu;
9392bf215546Sopenharmony_ci         } else {
9393bf215546Sopenharmony_ci            value = 0xffffffffu;
9394bf215546Sopenharmony_ci         }
9395bf215546Sopenharmony_ci      } else {
9396bf215546Sopenharmony_ci         static const uint32_t cmask_clear_values[4] = {0xffffffff, 0xdddddddd, 0xeeeeeeee, 0xffffffff};
9397bf215546Sopenharmony_ci         uint32_t log2_samples = util_logbase2(image->info.samples);
9398bf215546Sopenharmony_ci
9399bf215546Sopenharmony_ci         value = cmask_clear_values[log2_samples];
9400bf215546Sopenharmony_ci      }
9401bf215546Sopenharmony_ci
9402bf215546Sopenharmony_ci      flush_bits |= radv_init_cmask(cmd_buffer, image, range, value);
9403bf215546Sopenharmony_ci   }
9404bf215546Sopenharmony_ci
9405bf215546Sopenharmony_ci   if (radv_image_has_fmask(image)) {
9406bf215546Sopenharmony_ci      flush_bits |= radv_init_fmask(cmd_buffer, image, range);
9407bf215546Sopenharmony_ci   }
9408bf215546Sopenharmony_ci
9409bf215546Sopenharmony_ci   if (radv_dcc_enabled(image, range->baseMipLevel)) {
9410bf215546Sopenharmony_ci      uint32_t value = 0xffffffffu; /* Fully expanded mode. */
9411bf215546Sopenharmony_ci
9412bf215546Sopenharmony_ci      if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
9413bf215546Sopenharmony_ci                                     dst_layout, dst_render_loop, dst_queue_mask)) {
9414bf215546Sopenharmony_ci         value = 0u;
9415bf215546Sopenharmony_ci      }
9416bf215546Sopenharmony_ci
9417bf215546Sopenharmony_ci      flush_bits |= radv_init_dcc(cmd_buffer, image, range, value);
9418bf215546Sopenharmony_ci   }
9419bf215546Sopenharmony_ci
9420bf215546Sopenharmony_ci   if (radv_image_has_cmask(image) || radv_dcc_enabled(image, range->baseMipLevel)) {
9421bf215546Sopenharmony_ci      radv_update_fce_metadata(cmd_buffer, image, range, false);
9422bf215546Sopenharmony_ci
9423bf215546Sopenharmony_ci      uint32_t color_values[2] = {0};
9424bf215546Sopenharmony_ci      radv_set_color_clear_metadata(cmd_buffer, image, range, color_values);
9425bf215546Sopenharmony_ci   }
9426bf215546Sopenharmony_ci
9427bf215546Sopenharmony_ci   cmd_buffer->state.flush_bits |= flush_bits;
9428bf215546Sopenharmony_ci}
9429bf215546Sopenharmony_ci
9430bf215546Sopenharmony_cistatic void
9431bf215546Sopenharmony_ciradv_retile_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9432bf215546Sopenharmony_ci                       VkImageLayout src_layout, VkImageLayout dst_layout, unsigned dst_queue_mask)
9433bf215546Sopenharmony_ci{
9434bf215546Sopenharmony_ci   /* If the image is read-only, we don't have to retile DCC because it can't change. */
9435bf215546Sopenharmony_ci   if (!(image->vk.usage & RADV_IMAGE_USAGE_WRITE_BITS))
9436bf215546Sopenharmony_ci      return;
9437bf215546Sopenharmony_ci
9438bf215546Sopenharmony_ci   if (src_layout != VK_IMAGE_LAYOUT_PRESENT_SRC_KHR &&
9439bf215546Sopenharmony_ci       (dst_layout == VK_IMAGE_LAYOUT_PRESENT_SRC_KHR ||
9440bf215546Sopenharmony_ci        (dst_queue_mask & (1u << RADV_QUEUE_FOREIGN))))
9441bf215546Sopenharmony_ci      radv_retile_dcc(cmd_buffer, image);
9442bf215546Sopenharmony_ci}
9443bf215546Sopenharmony_ci
9444bf215546Sopenharmony_cistatic bool
9445bf215546Sopenharmony_ciradv_image_need_retile(const struct radv_image *image)
9446bf215546Sopenharmony_ci{
9447bf215546Sopenharmony_ci   return image->planes[0].surface.display_dcc_offset &&
9448bf215546Sopenharmony_ci          image->planes[0].surface.display_dcc_offset != image->planes[0].surface.meta_offset;
9449bf215546Sopenharmony_ci}
9450bf215546Sopenharmony_ci
9451bf215546Sopenharmony_ci/**
9452bf215546Sopenharmony_ci * Handle color image transitions for DCC/FMASK/CMASK.
9453bf215546Sopenharmony_ci */
9454bf215546Sopenharmony_cistatic void
9455bf215546Sopenharmony_ciradv_handle_color_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9456bf215546Sopenharmony_ci                                   VkImageLayout src_layout, bool src_render_loop,
9457bf215546Sopenharmony_ci                                   VkImageLayout dst_layout, bool dst_render_loop,
9458bf215546Sopenharmony_ci                                   unsigned src_queue_mask, unsigned dst_queue_mask,
9459bf215546Sopenharmony_ci                                   const VkImageSubresourceRange *range)
9460bf215546Sopenharmony_ci{
9461bf215546Sopenharmony_ci   bool dcc_decompressed = false, fast_clear_flushed = false;
9462bf215546Sopenharmony_ci
9463bf215546Sopenharmony_ci   if (!radv_image_has_cmask(image) && !radv_image_has_fmask(image) &&
9464bf215546Sopenharmony_ci       !radv_dcc_enabled(image, range->baseMipLevel))
9465bf215546Sopenharmony_ci      return;
9466bf215546Sopenharmony_ci
9467bf215546Sopenharmony_ci   if (src_layout == VK_IMAGE_LAYOUT_UNDEFINED) {
9468bf215546Sopenharmony_ci      radv_init_color_image_metadata(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
9469bf215546Sopenharmony_ci                                     dst_render_loop, src_queue_mask, dst_queue_mask, range);
9470bf215546Sopenharmony_ci
9471bf215546Sopenharmony_ci      if (radv_image_need_retile(image))
9472bf215546Sopenharmony_ci         radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
9473bf215546Sopenharmony_ci      return;
9474bf215546Sopenharmony_ci   }
9475bf215546Sopenharmony_ci
9476bf215546Sopenharmony_ci   if (radv_dcc_enabled(image, range->baseMipLevel)) {
9477bf215546Sopenharmony_ci      if (src_layout == VK_IMAGE_LAYOUT_PREINITIALIZED) {
9478bf215546Sopenharmony_ci         cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, range, 0xffffffffu);
9479bf215546Sopenharmony_ci      } else if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
9480bf215546Sopenharmony_ci                                            src_layout, src_render_loop, src_queue_mask) &&
9481bf215546Sopenharmony_ci                 !radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,
9482bf215546Sopenharmony_ci                                             dst_layout, dst_render_loop, dst_queue_mask)) {
9483bf215546Sopenharmony_ci         radv_decompress_dcc(cmd_buffer, image, range);
9484bf215546Sopenharmony_ci         dcc_decompressed = true;
9485bf215546Sopenharmony_ci      } else if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
9486bf215546Sopenharmony_ci                                            src_layout, src_render_loop, src_queue_mask) &&
9487bf215546Sopenharmony_ci                 !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
9488bf215546Sopenharmony_ci                                             dst_layout, dst_render_loop, dst_queue_mask)) {
9489bf215546Sopenharmony_ci         radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
9490bf215546Sopenharmony_ci         fast_clear_flushed = true;
9491bf215546Sopenharmony_ci      }
9492bf215546Sopenharmony_ci
9493bf215546Sopenharmony_ci      if (radv_image_need_retile(image))
9494bf215546Sopenharmony_ci         radv_retile_transition(cmd_buffer, image, src_layout, dst_layout, dst_queue_mask);
9495bf215546Sopenharmony_ci   } else if (radv_image_has_cmask(image) || radv_image_has_fmask(image)) {
9496bf215546Sopenharmony_ci      if (radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
9497bf215546Sopenharmony_ci                                     src_layout, src_render_loop, src_queue_mask) &&
9498bf215546Sopenharmony_ci          !radv_layout_can_fast_clear(cmd_buffer->device, image, range->baseMipLevel,
9499bf215546Sopenharmony_ci                                      dst_layout, dst_render_loop, dst_queue_mask)) {
9500bf215546Sopenharmony_ci         radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
9501bf215546Sopenharmony_ci         fast_clear_flushed = true;
9502bf215546Sopenharmony_ci      }
9503bf215546Sopenharmony_ci   }
9504bf215546Sopenharmony_ci
9505bf215546Sopenharmony_ci   /* MSAA color decompress. */
9506bf215546Sopenharmony_ci   if (radv_image_has_fmask(image) &&
9507bf215546Sopenharmony_ci       (image->vk.usage & (VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_TRANSFER_DST_BIT)) &&
9508bf215546Sopenharmony_ci       radv_layout_fmask_compressed(cmd_buffer->device, image, src_layout, src_queue_mask) &&
9509bf215546Sopenharmony_ci       !radv_layout_fmask_compressed(cmd_buffer->device, image, dst_layout, dst_queue_mask)) {
9510bf215546Sopenharmony_ci      if (radv_dcc_enabled(image, range->baseMipLevel) &&
9511bf215546Sopenharmony_ci          !radv_image_use_dcc_image_stores(cmd_buffer->device, image) && !dcc_decompressed) {
9512bf215546Sopenharmony_ci         /* A DCC decompress is required before expanding FMASK
9513bf215546Sopenharmony_ci          * when DCC stores aren't supported to avoid being in
9514bf215546Sopenharmony_ci          * a state where DCC is compressed and the main
9515bf215546Sopenharmony_ci          * surface is uncompressed.
9516bf215546Sopenharmony_ci          */
9517bf215546Sopenharmony_ci         radv_decompress_dcc(cmd_buffer, image, range);
9518bf215546Sopenharmony_ci      } else if (!fast_clear_flushed) {
9519bf215546Sopenharmony_ci         /* A FMASK decompress is required before expanding
9520bf215546Sopenharmony_ci          * FMASK.
9521bf215546Sopenharmony_ci          */
9522bf215546Sopenharmony_ci         radv_fast_clear_flush_image_inplace(cmd_buffer, image, range);
9523bf215546Sopenharmony_ci      }
9524bf215546Sopenharmony_ci
9525bf215546Sopenharmony_ci      struct radv_barrier_data barrier = {0};
9526bf215546Sopenharmony_ci      barrier.layout_transitions.fmask_color_expand = 1;
9527bf215546Sopenharmony_ci      radv_describe_layout_transition(cmd_buffer, &barrier);
9528bf215546Sopenharmony_ci
9529bf215546Sopenharmony_ci      radv_expand_fmask_image_inplace(cmd_buffer, image, range);
9530bf215546Sopenharmony_ci   }
9531bf215546Sopenharmony_ci}
9532bf215546Sopenharmony_ci
9533bf215546Sopenharmony_cistatic void
9534bf215546Sopenharmony_ciradv_handle_image_transition(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
9535bf215546Sopenharmony_ci                             VkImageLayout src_layout, bool src_render_loop,
9536bf215546Sopenharmony_ci                             VkImageLayout dst_layout, bool dst_render_loop, uint32_t src_family_index,
9537bf215546Sopenharmony_ci                             uint32_t dst_family_index, const VkImageSubresourceRange *range,
9538bf215546Sopenharmony_ci                             struct radv_sample_locations_state *sample_locs)
9539bf215546Sopenharmony_ci{
9540bf215546Sopenharmony_ci   enum radv_queue_family src_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, src_family_index);
9541bf215546Sopenharmony_ci   enum radv_queue_family dst_qf = vk_queue_to_radv(cmd_buffer->device->physical_device, dst_family_index);
9542bf215546Sopenharmony_ci   if (image->exclusive && src_family_index != dst_family_index) {
9543bf215546Sopenharmony_ci      /* This is an acquire or a release operation and there will be
9544bf215546Sopenharmony_ci       * a corresponding release/acquire. Do the transition in the
9545bf215546Sopenharmony_ci       * most flexible queue. */
9546bf215546Sopenharmony_ci
9547bf215546Sopenharmony_ci      assert(src_qf == cmd_buffer->qf ||
9548bf215546Sopenharmony_ci             dst_qf == cmd_buffer->qf);
9549bf215546Sopenharmony_ci
9550bf215546Sopenharmony_ci      if (src_family_index == VK_QUEUE_FAMILY_EXTERNAL || src_family_index == VK_QUEUE_FAMILY_FOREIGN_EXT)
9551bf215546Sopenharmony_ci         return;
9552bf215546Sopenharmony_ci
9553bf215546Sopenharmony_ci      if (cmd_buffer->qf == RADV_QUEUE_TRANSFER)
9554bf215546Sopenharmony_ci         return;
9555bf215546Sopenharmony_ci
9556bf215546Sopenharmony_ci      if (cmd_buffer->qf == RADV_QUEUE_COMPUTE &&
9557bf215546Sopenharmony_ci          (src_qf == RADV_QUEUE_GENERAL || dst_qf == RADV_QUEUE_GENERAL))
9558bf215546Sopenharmony_ci         return;
9559bf215546Sopenharmony_ci   }
9560bf215546Sopenharmony_ci
9561bf215546Sopenharmony_ci   unsigned src_queue_mask =
9562bf215546Sopenharmony_ci      radv_image_queue_family_mask(image, src_qf, cmd_buffer->qf);
9563bf215546Sopenharmony_ci   unsigned dst_queue_mask =
9564bf215546Sopenharmony_ci      radv_image_queue_family_mask(image, dst_qf, cmd_buffer->qf);
9565bf215546Sopenharmony_ci
9566bf215546Sopenharmony_ci   if (src_layout == dst_layout && src_render_loop == dst_render_loop && src_queue_mask == dst_queue_mask)
9567bf215546Sopenharmony_ci      return;
9568bf215546Sopenharmony_ci
9569bf215546Sopenharmony_ci   if (vk_format_has_depth(image->vk.format)) {
9570bf215546Sopenharmony_ci      radv_handle_depth_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
9571bf215546Sopenharmony_ci                                         dst_render_loop, src_queue_mask, dst_queue_mask, range,
9572bf215546Sopenharmony_ci                                         sample_locs);
9573bf215546Sopenharmony_ci   } else {
9574bf215546Sopenharmony_ci      radv_handle_color_image_transition(cmd_buffer, image, src_layout, src_render_loop, dst_layout,
9575bf215546Sopenharmony_ci                                         dst_render_loop, src_queue_mask, dst_queue_mask, range);
9576bf215546Sopenharmony_ci   }
9577bf215546Sopenharmony_ci}
9578bf215546Sopenharmony_ci
9579bf215546Sopenharmony_cistatic void
9580bf215546Sopenharmony_ciradv_cp_dma_wait_for_stages(struct radv_cmd_buffer *cmd_buffer, VkPipelineStageFlags2 stage_mask)
9581bf215546Sopenharmony_ci{
9582bf215546Sopenharmony_ci   /* Make sure CP DMA is idle because the driver might have performed a DMA operation for copying a
9583bf215546Sopenharmony_ci    * buffer (or a MSAA image using FMASK). Note that updating a buffer is considered a clear
9584bf215546Sopenharmony_ci    * operation but it might also use a CP DMA copy in some rare situations. Other operations using
9585bf215546Sopenharmony_ci    * a CP DMA clear are implicitly synchronized (see CP_DMA_SYNC).
9586bf215546Sopenharmony_ci    */
9587bf215546Sopenharmony_ci   if (stage_mask & (VK_PIPELINE_STAGE_2_COPY_BIT | VK_PIPELINE_STAGE_2_CLEAR_BIT |
9588bf215546Sopenharmony_ci                     VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT | VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT |
9589bf215546Sopenharmony_ci                     VK_PIPELINE_STAGE_2_ALL_COMMANDS_BIT))
9590bf215546Sopenharmony_ci      si_cp_dma_wait_for_idle(cmd_buffer);
9591bf215546Sopenharmony_ci}
9592bf215546Sopenharmony_ci
9593bf215546Sopenharmony_cistatic void
9594bf215546Sopenharmony_ciradv_barrier(struct radv_cmd_buffer *cmd_buffer, const VkDependencyInfo *dep_info,
9595bf215546Sopenharmony_ci             enum rgp_barrier_reason reason)
9596bf215546Sopenharmony_ci{
9597bf215546Sopenharmony_ci   enum radv_cmd_flush_bits src_flush_bits = 0;
9598bf215546Sopenharmony_ci   enum radv_cmd_flush_bits dst_flush_bits = 0;
9599bf215546Sopenharmony_ci   VkPipelineStageFlags2 src_stage_mask = 0;
9600bf215546Sopenharmony_ci   VkPipelineStageFlags2 dst_stage_mask = 0;
9601bf215546Sopenharmony_ci
9602bf215546Sopenharmony_ci   if (cmd_buffer->state.subpass)
9603bf215546Sopenharmony_ci      radv_mark_noncoherent_rb(cmd_buffer);
9604bf215546Sopenharmony_ci
9605bf215546Sopenharmony_ci   radv_describe_barrier_start(cmd_buffer, reason);
9606bf215546Sopenharmony_ci
9607bf215546Sopenharmony_ci   for (uint32_t i = 0; i < dep_info->memoryBarrierCount; i++) {
9608bf215546Sopenharmony_ci      src_stage_mask |= dep_info->pMemoryBarriers[i].srcStageMask;
9609bf215546Sopenharmony_ci      src_flush_bits |=
9610bf215546Sopenharmony_ci         radv_src_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].srcAccessMask, NULL);
9611bf215546Sopenharmony_ci      dst_stage_mask |= dep_info->pMemoryBarriers[i].dstStageMask;
9612bf215546Sopenharmony_ci      dst_flush_bits |=
9613bf215546Sopenharmony_ci         radv_dst_access_flush(cmd_buffer, dep_info->pMemoryBarriers[i].dstAccessMask, NULL);
9614bf215546Sopenharmony_ci   }
9615bf215546Sopenharmony_ci
9616bf215546Sopenharmony_ci   for (uint32_t i = 0; i < dep_info->bufferMemoryBarrierCount; i++) {
9617bf215546Sopenharmony_ci      src_stage_mask |= dep_info->pBufferMemoryBarriers[i].srcStageMask;
9618bf215546Sopenharmony_ci      src_flush_bits |=
9619bf215546Sopenharmony_ci         radv_src_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].srcAccessMask, NULL);
9620bf215546Sopenharmony_ci      dst_stage_mask |= dep_info->pBufferMemoryBarriers[i].dstStageMask;
9621bf215546Sopenharmony_ci      dst_flush_bits |=
9622bf215546Sopenharmony_ci         radv_dst_access_flush(cmd_buffer, dep_info->pBufferMemoryBarriers[i].dstAccessMask, NULL);
9623bf215546Sopenharmony_ci   }
9624bf215546Sopenharmony_ci
9625bf215546Sopenharmony_ci   for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
9626bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image);
9627bf215546Sopenharmony_ci
9628bf215546Sopenharmony_ci      src_stage_mask |= dep_info->pImageMemoryBarriers[i].srcStageMask;
9629bf215546Sopenharmony_ci      src_flush_bits |=
9630bf215546Sopenharmony_ci         radv_src_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].srcAccessMask, image);
9631bf215546Sopenharmony_ci      dst_stage_mask |= dep_info->pImageMemoryBarriers[i].dstStageMask;
9632bf215546Sopenharmony_ci      dst_flush_bits |=
9633bf215546Sopenharmony_ci         radv_dst_access_flush(cmd_buffer, dep_info->pImageMemoryBarriers[i].dstAccessMask, image);
9634bf215546Sopenharmony_ci   }
9635bf215546Sopenharmony_ci
9636bf215546Sopenharmony_ci   /* The Vulkan spec 1.1.98 says:
9637bf215546Sopenharmony_ci    *
9638bf215546Sopenharmony_ci    * "An execution dependency with only
9639bf215546Sopenharmony_ci    *  VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT in the destination stage mask
9640bf215546Sopenharmony_ci    *  will only prevent that stage from executing in subsequently
9641bf215546Sopenharmony_ci    *  submitted commands. As this stage does not perform any actual
9642bf215546Sopenharmony_ci    *  execution, this is not observable - in effect, it does not delay
9643bf215546Sopenharmony_ci    *  processing of subsequent commands. Similarly an execution dependency
9644bf215546Sopenharmony_ci    *  with only VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT in the source stage mask
9645bf215546Sopenharmony_ci    *  will effectively not wait for any prior commands to complete."
9646bf215546Sopenharmony_ci    */
9647bf215546Sopenharmony_ci   if (dst_stage_mask != VK_PIPELINE_STAGE_2_BOTTOM_OF_PIPE_BIT)
9648bf215546Sopenharmony_ci      radv_stage_flush(cmd_buffer, src_stage_mask);
9649bf215546Sopenharmony_ci   cmd_buffer->state.flush_bits |= src_flush_bits;
9650bf215546Sopenharmony_ci
9651bf215546Sopenharmony_ci   radv_ace_internal_barrier(cmd_buffer, src_stage_mask, 0);
9652bf215546Sopenharmony_ci
9653bf215546Sopenharmony_ci   for (uint32_t i = 0; i < dep_info->imageMemoryBarrierCount; i++) {
9654bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_image, image, dep_info->pImageMemoryBarriers[i].image);
9655bf215546Sopenharmony_ci
9656bf215546Sopenharmony_ci      const struct VkSampleLocationsInfoEXT *sample_locs_info =
9657bf215546Sopenharmony_ci         vk_find_struct_const(dep_info->pImageMemoryBarriers[i].pNext, SAMPLE_LOCATIONS_INFO_EXT);
9658bf215546Sopenharmony_ci      struct radv_sample_locations_state sample_locations;
9659bf215546Sopenharmony_ci
9660bf215546Sopenharmony_ci      if (sample_locs_info) {
9661bf215546Sopenharmony_ci         assert(image->vk.create_flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);
9662bf215546Sopenharmony_ci         sample_locations.per_pixel = sample_locs_info->sampleLocationsPerPixel;
9663bf215546Sopenharmony_ci         sample_locations.grid_size = sample_locs_info->sampleLocationGridSize;
9664bf215546Sopenharmony_ci         sample_locations.count = sample_locs_info->sampleLocationsCount;
9665bf215546Sopenharmony_ci         typed_memcpy(&sample_locations.locations[0], sample_locs_info->pSampleLocations,
9666bf215546Sopenharmony_ci                      sample_locs_info->sampleLocationsCount);
9667bf215546Sopenharmony_ci      }
9668bf215546Sopenharmony_ci
9669bf215546Sopenharmony_ci      radv_handle_image_transition(
9670bf215546Sopenharmony_ci         cmd_buffer, image, dep_info->pImageMemoryBarriers[i].oldLayout,
9671bf215546Sopenharmony_ci         false, /* Outside of a renderpass we are never in a renderloop */
9672bf215546Sopenharmony_ci         dep_info->pImageMemoryBarriers[i].newLayout,
9673bf215546Sopenharmony_ci         false, /* Outside of a renderpass we are never in a renderloop */
9674bf215546Sopenharmony_ci         dep_info->pImageMemoryBarriers[i].srcQueueFamilyIndex,
9675bf215546Sopenharmony_ci         dep_info->pImageMemoryBarriers[i].dstQueueFamilyIndex,
9676bf215546Sopenharmony_ci         &dep_info->pImageMemoryBarriers[i].subresourceRange, sample_locs_info ? &sample_locations : NULL);
9677bf215546Sopenharmony_ci   }
9678bf215546Sopenharmony_ci
9679bf215546Sopenharmony_ci   radv_ace_internal_barrier(cmd_buffer, 0, dst_stage_mask);
9680bf215546Sopenharmony_ci   radv_cp_dma_wait_for_stages(cmd_buffer, src_stage_mask);
9681bf215546Sopenharmony_ci
9682bf215546Sopenharmony_ci   cmd_buffer->state.flush_bits |= dst_flush_bits;
9683bf215546Sopenharmony_ci
9684bf215546Sopenharmony_ci   radv_describe_barrier_end(cmd_buffer);
9685bf215546Sopenharmony_ci}
9686bf215546Sopenharmony_ci
9687bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9688bf215546Sopenharmony_ciradv_CmdPipelineBarrier2(VkCommandBuffer commandBuffer,
9689bf215546Sopenharmony_ci                         const VkDependencyInfo *pDependencyInfo)
9690bf215546Sopenharmony_ci{
9691bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9692bf215546Sopenharmony_ci
9693bf215546Sopenharmony_ci   radv_barrier(cmd_buffer, pDependencyInfo, RGP_BARRIER_EXTERNAL_CMD_PIPELINE_BARRIER);
9694bf215546Sopenharmony_ci}
9695bf215546Sopenharmony_ci
9696bf215546Sopenharmony_cistatic void
9697bf215546Sopenharmony_ciwrite_event(struct radv_cmd_buffer *cmd_buffer, struct radv_event *event,
9698bf215546Sopenharmony_ci            VkPipelineStageFlags2 stageMask, unsigned value)
9699bf215546Sopenharmony_ci{
9700bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
9701bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(event->bo);
9702bf215546Sopenharmony_ci
9703bf215546Sopenharmony_ci   si_emit_cache_flush(cmd_buffer);
9704bf215546Sopenharmony_ci
9705bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
9706bf215546Sopenharmony_ci
9707bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 28);
9708bf215546Sopenharmony_ci
9709bf215546Sopenharmony_ci   if (stageMask & (VK_PIPELINE_STAGE_2_COPY_BIT |
9710bf215546Sopenharmony_ci                    VK_PIPELINE_STAGE_2_RESOLVE_BIT |
9711bf215546Sopenharmony_ci                    VK_PIPELINE_STAGE_2_BLIT_BIT |
9712bf215546Sopenharmony_ci                    VK_PIPELINE_STAGE_2_CLEAR_BIT)) {
9713bf215546Sopenharmony_ci      /* Be conservative for now. */
9714bf215546Sopenharmony_ci      stageMask |= VK_PIPELINE_STAGE_2_ALL_TRANSFER_BIT;
9715bf215546Sopenharmony_ci   }
9716bf215546Sopenharmony_ci
9717bf215546Sopenharmony_ci   /* Flags that only require a top-of-pipe event. */
9718bf215546Sopenharmony_ci   VkPipelineStageFlags2 top_of_pipe_flags = VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT;
9719bf215546Sopenharmony_ci
9720bf215546Sopenharmony_ci   /* Flags that only require a post-index-fetch event. */
9721bf215546Sopenharmony_ci   VkPipelineStageFlags2 post_index_fetch_flags =
9722bf215546Sopenharmony_ci      top_of_pipe_flags | VK_PIPELINE_STAGE_2_DRAW_INDIRECT_BIT | VK_PIPELINE_STAGE_2_VERTEX_INPUT_BIT;
9723bf215546Sopenharmony_ci
9724bf215546Sopenharmony_ci   /* Flags that only require signaling post PS. */
9725bf215546Sopenharmony_ci   VkPipelineStageFlags2 post_ps_flags =
9726bf215546Sopenharmony_ci      post_index_fetch_flags | VK_PIPELINE_STAGE_2_VERTEX_SHADER_BIT |
9727bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_TESSELLATION_CONTROL_SHADER_BIT |
9728bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_TESSELLATION_EVALUATION_SHADER_BIT | VK_PIPELINE_STAGE_2_GEOMETRY_SHADER_BIT |
9729bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_MESH_SHADER_BIT_NV |
9730bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_TRANSFORM_FEEDBACK_BIT_EXT |
9731bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_PRE_RASTERIZATION_SHADERS_BIT |
9732bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR |
9733bf215546Sopenharmony_ci      VK_PIPELINE_STAGE_2_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_2_FRAGMENT_SHADER_BIT;
9734bf215546Sopenharmony_ci
9735bf215546Sopenharmony_ci   /* Flags that only require signaling post CS. */
9736bf215546Sopenharmony_ci   VkPipelineStageFlags2 post_cs_flags = VK_PIPELINE_STAGE_2_COMPUTE_SHADER_BIT;
9737bf215546Sopenharmony_ci
9738bf215546Sopenharmony_ci   radv_cp_dma_wait_for_stages(cmd_buffer, stageMask);
9739bf215546Sopenharmony_ci
9740bf215546Sopenharmony_ci   if (!(stageMask & ~top_of_pipe_flags)) {
9741bf215546Sopenharmony_ci      /* Just need to sync the PFP engine. */
9742bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
9743bf215546Sopenharmony_ci      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_PFP));
9744bf215546Sopenharmony_ci      radeon_emit(cs, va);
9745bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
9746bf215546Sopenharmony_ci      radeon_emit(cs, value);
9747bf215546Sopenharmony_ci   } else if (!(stageMask & ~post_index_fetch_flags)) {
9748bf215546Sopenharmony_ci      /* Sync ME because PFP reads index and indirect buffers. */
9749bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
9750bf215546Sopenharmony_ci      radeon_emit(cs, S_370_DST_SEL(V_370_MEM) | S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
9751bf215546Sopenharmony_ci      radeon_emit(cs, va);
9752bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
9753bf215546Sopenharmony_ci      radeon_emit(cs, value);
9754bf215546Sopenharmony_ci   } else {
9755bf215546Sopenharmony_ci      unsigned event_type;
9756bf215546Sopenharmony_ci
9757bf215546Sopenharmony_ci      if (!(stageMask & ~post_ps_flags)) {
9758bf215546Sopenharmony_ci         /* Sync previous fragment shaders. */
9759bf215546Sopenharmony_ci         event_type = V_028A90_PS_DONE;
9760bf215546Sopenharmony_ci      } else if (!(stageMask & ~post_cs_flags)) {
9761bf215546Sopenharmony_ci         /* Sync previous compute shaders. */
9762bf215546Sopenharmony_ci         event_type = V_028A90_CS_DONE;
9763bf215546Sopenharmony_ci      } else {
9764bf215546Sopenharmony_ci         /* Otherwise, sync all prior GPU work. */
9765bf215546Sopenharmony_ci         event_type = V_028A90_BOTTOM_OF_PIPE_TS;
9766bf215546Sopenharmony_ci      }
9767bf215546Sopenharmony_ci
9768bf215546Sopenharmony_ci      si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
9769bf215546Sopenharmony_ci                                 radv_cmd_buffer_uses_mec(cmd_buffer), event_type, 0,
9770bf215546Sopenharmony_ci                                 EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, value,
9771bf215546Sopenharmony_ci                                 cmd_buffer->gfx9_eop_bug_va);
9772bf215546Sopenharmony_ci   }
9773bf215546Sopenharmony_ci
9774bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
9775bf215546Sopenharmony_ci}
9776bf215546Sopenharmony_ci
9777bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9778bf215546Sopenharmony_ciradv_CmdSetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
9779bf215546Sopenharmony_ci                  const VkDependencyInfo* pDependencyInfo)
9780bf215546Sopenharmony_ci{
9781bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9782bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_event, event, _event);
9783bf215546Sopenharmony_ci   VkPipelineStageFlags2 src_stage_mask = 0;
9784bf215546Sopenharmony_ci
9785bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pDependencyInfo->memoryBarrierCount; i++)
9786bf215546Sopenharmony_ci      src_stage_mask |= pDependencyInfo->pMemoryBarriers[i].srcStageMask;
9787bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pDependencyInfo->bufferMemoryBarrierCount; i++)
9788bf215546Sopenharmony_ci      src_stage_mask |= pDependencyInfo->pBufferMemoryBarriers[i].srcStageMask;
9789bf215546Sopenharmony_ci   for (uint32_t i = 0; i < pDependencyInfo->imageMemoryBarrierCount; i++)
9790bf215546Sopenharmony_ci      src_stage_mask |= pDependencyInfo->pImageMemoryBarriers[i].srcStageMask;
9791bf215546Sopenharmony_ci
9792bf215546Sopenharmony_ci   write_event(cmd_buffer, event, src_stage_mask, 1);
9793bf215546Sopenharmony_ci}
9794bf215546Sopenharmony_ci
9795bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9796bf215546Sopenharmony_ciradv_CmdResetEvent2(VkCommandBuffer commandBuffer, VkEvent _event,
9797bf215546Sopenharmony_ci                    VkPipelineStageFlags2 stageMask)
9798bf215546Sopenharmony_ci{
9799bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9800bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_event, event, _event);
9801bf215546Sopenharmony_ci
9802bf215546Sopenharmony_ci   write_event(cmd_buffer, event, stageMask, 0);
9803bf215546Sopenharmony_ci}
9804bf215546Sopenharmony_ci
9805bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9806bf215546Sopenharmony_ciradv_CmdWaitEvents2(VkCommandBuffer commandBuffer, uint32_t eventCount, const VkEvent *pEvents,
9807bf215546Sopenharmony_ci                    const VkDependencyInfo* pDependencyInfos)
9808bf215546Sopenharmony_ci{
9809bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9810bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
9811bf215546Sopenharmony_ci
9812bf215546Sopenharmony_ci   for (unsigned i = 0; i < eventCount; ++i) {
9813bf215546Sopenharmony_ci      RADV_FROM_HANDLE(radv_event, event, pEvents[i]);
9814bf215546Sopenharmony_ci      uint64_t va = radv_buffer_get_va(event->bo);
9815bf215546Sopenharmony_ci
9816bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cs, event->bo);
9817bf215546Sopenharmony_ci
9818bf215546Sopenharmony_ci      ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cs, 7);
9819bf215546Sopenharmony_ci
9820bf215546Sopenharmony_ci      radv_cp_wait_mem(cs, WAIT_REG_MEM_EQUAL, va, 1, 0xffffffff);
9821bf215546Sopenharmony_ci      assert(cmd_buffer->cs->cdw <= cdw_max);
9822bf215546Sopenharmony_ci   }
9823bf215546Sopenharmony_ci
9824bf215546Sopenharmony_ci   radv_barrier(cmd_buffer, pDependencyInfos, RGP_BARRIER_EXTERNAL_CMD_WAIT_EVENTS);
9825bf215546Sopenharmony_ci}
9826bf215546Sopenharmony_ci
9827bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9828bf215546Sopenharmony_ciradv_CmdSetDeviceMask(VkCommandBuffer commandBuffer, uint32_t deviceMask)
9829bf215546Sopenharmony_ci{
9830bf215546Sopenharmony_ci   /* No-op */
9831bf215546Sopenharmony_ci}
9832bf215546Sopenharmony_ci
9833bf215546Sopenharmony_ci/* VK_EXT_conditional_rendering */
9834bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9835bf215546Sopenharmony_ciradv_CmdBeginConditionalRenderingEXT(
9836bf215546Sopenharmony_ci   VkCommandBuffer commandBuffer,
9837bf215546Sopenharmony_ci   const VkConditionalRenderingBeginInfoEXT *pConditionalRenderingBegin)
9838bf215546Sopenharmony_ci{
9839bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9840bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, pConditionalRenderingBegin->buffer);
9841bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
9842bf215546Sopenharmony_ci   unsigned pred_op = PREDICATION_OP_BOOL32;
9843bf215546Sopenharmony_ci   bool draw_visible = true;
9844bf215546Sopenharmony_ci   uint64_t va;
9845bf215546Sopenharmony_ci
9846bf215546Sopenharmony_ci   va = radv_buffer_get_va(buffer->bo) + buffer->offset + pConditionalRenderingBegin->offset;
9847bf215546Sopenharmony_ci
9848bf215546Sopenharmony_ci   /* By default, if the 32-bit value at offset in buffer memory is zero,
9849bf215546Sopenharmony_ci    * then the rendering commands are discarded, otherwise they are
9850bf215546Sopenharmony_ci    * executed as normal. If the inverted flag is set, all commands are
9851bf215546Sopenharmony_ci    * discarded if the value is non zero.
9852bf215546Sopenharmony_ci    */
9853bf215546Sopenharmony_ci   if (pConditionalRenderingBegin->flags & VK_CONDITIONAL_RENDERING_INVERTED_BIT_EXT) {
9854bf215546Sopenharmony_ci      draw_visible = false;
9855bf215546Sopenharmony_ci   }
9856bf215546Sopenharmony_ci
9857bf215546Sopenharmony_ci   si_emit_cache_flush(cmd_buffer);
9858bf215546Sopenharmony_ci
9859bf215546Sopenharmony_ci   if (cmd_buffer->qf == RADV_QUEUE_GENERAL &&
9860bf215546Sopenharmony_ci       !cmd_buffer->device->physical_device->rad_info.has_32bit_predication) {
9861bf215546Sopenharmony_ci      uint64_t pred_value = 0, pred_va;
9862bf215546Sopenharmony_ci      unsigned pred_offset;
9863bf215546Sopenharmony_ci
9864bf215546Sopenharmony_ci      /* From the Vulkan spec 1.1.107:
9865bf215546Sopenharmony_ci       *
9866bf215546Sopenharmony_ci       * "If the 32-bit value at offset in buffer memory is zero,
9867bf215546Sopenharmony_ci       *  then the rendering commands are discarded, otherwise they
9868bf215546Sopenharmony_ci       *  are executed as normal. If the value of the predicate in
9869bf215546Sopenharmony_ci       *  buffer memory changes while conditional rendering is
9870bf215546Sopenharmony_ci       *  active, the rendering commands may be discarded in an
9871bf215546Sopenharmony_ci       *  implementation-dependent way. Some implementations may
9872bf215546Sopenharmony_ci       *  latch the value of the predicate upon beginning conditional
9873bf215546Sopenharmony_ci       *  rendering while others may read it before every rendering
9874bf215546Sopenharmony_ci       *  command."
9875bf215546Sopenharmony_ci       *
9876bf215546Sopenharmony_ci       * But, the AMD hardware treats the predicate as a 64-bit
9877bf215546Sopenharmony_ci       * value which means we need a workaround in the driver.
9878bf215546Sopenharmony_ci       * Luckily, it's not required to support if the value changes
9879bf215546Sopenharmony_ci       * when predication is active.
9880bf215546Sopenharmony_ci       *
9881bf215546Sopenharmony_ci       * The workaround is as follows:
9882bf215546Sopenharmony_ci       * 1) allocate a 64-value in the upload BO and initialize it
9883bf215546Sopenharmony_ci       *    to 0
9884bf215546Sopenharmony_ci       * 2) copy the 32-bit predicate value to the upload BO
9885bf215546Sopenharmony_ci       * 3) use the new allocated VA address for predication
9886bf215546Sopenharmony_ci       *
9887bf215546Sopenharmony_ci       * Based on the conditionalrender demo, it's faster to do the
9888bf215546Sopenharmony_ci       * COPY_DATA in ME  (+ sync PFP) instead of PFP.
9889bf215546Sopenharmony_ci       */
9890bf215546Sopenharmony_ci      radv_cmd_buffer_upload_data(cmd_buffer, 8, &pred_value, &pred_offset);
9891bf215546Sopenharmony_ci
9892bf215546Sopenharmony_ci      pred_va = radv_buffer_get_va(cmd_buffer->upload.upload_bo) + pred_offset;
9893bf215546Sopenharmony_ci
9894bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
9895bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_SRC_MEM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
9896bf215546Sopenharmony_ci                         COPY_DATA_WR_CONFIRM);
9897bf215546Sopenharmony_ci      radeon_emit(cs, va);
9898bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
9899bf215546Sopenharmony_ci      radeon_emit(cs, pred_va);
9900bf215546Sopenharmony_ci      radeon_emit(cs, pred_va >> 32);
9901bf215546Sopenharmony_ci
9902bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_PFP_SYNC_ME, 0, 0));
9903bf215546Sopenharmony_ci      radeon_emit(cs, 0);
9904bf215546Sopenharmony_ci
9905bf215546Sopenharmony_ci      va = pred_va;
9906bf215546Sopenharmony_ci      pred_op = PREDICATION_OP_BOOL64;
9907bf215546Sopenharmony_ci   }
9908bf215546Sopenharmony_ci
9909bf215546Sopenharmony_ci   /* MEC doesn't support predication, we emulate it elsewhere. */
9910bf215546Sopenharmony_ci   if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
9911bf215546Sopenharmony_ci      si_emit_set_predication_state(cmd_buffer, draw_visible, pred_op, va);
9912bf215546Sopenharmony_ci   }
9913bf215546Sopenharmony_ci
9914bf215546Sopenharmony_ci   /* Store conditional rendering user info. */
9915bf215546Sopenharmony_ci   cmd_buffer->state.predicating = true;
9916bf215546Sopenharmony_ci   cmd_buffer->state.predication_type = draw_visible;
9917bf215546Sopenharmony_ci   cmd_buffer->state.predication_op = pred_op;
9918bf215546Sopenharmony_ci   cmd_buffer->state.predication_va = va;
9919bf215546Sopenharmony_ci   cmd_buffer->mec_inv_pred_emitted = false;
9920bf215546Sopenharmony_ci}
9921bf215546Sopenharmony_ci
9922bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9923bf215546Sopenharmony_ciradv_CmdEndConditionalRenderingEXT(VkCommandBuffer commandBuffer)
9924bf215546Sopenharmony_ci{
9925bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9926bf215546Sopenharmony_ci
9927bf215546Sopenharmony_ci   /* MEC doesn't support predication, no need to emit anything here. */
9928bf215546Sopenharmony_ci   if (!radv_cmd_buffer_uses_mec(cmd_buffer)) {
9929bf215546Sopenharmony_ci      si_emit_set_predication_state(cmd_buffer, false, 0, 0);
9930bf215546Sopenharmony_ci   }
9931bf215546Sopenharmony_ci
9932bf215546Sopenharmony_ci   /* Reset conditional rendering user info. */
9933bf215546Sopenharmony_ci   cmd_buffer->state.predicating = false;
9934bf215546Sopenharmony_ci   cmd_buffer->state.predication_type = -1;
9935bf215546Sopenharmony_ci   cmd_buffer->state.predication_op = 0;
9936bf215546Sopenharmony_ci   cmd_buffer->state.predication_va = 0;
9937bf215546Sopenharmony_ci   cmd_buffer->mec_inv_pred_emitted = false;
9938bf215546Sopenharmony_ci}
9939bf215546Sopenharmony_ci
9940bf215546Sopenharmony_ci/* VK_EXT_transform_feedback */
9941bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
9942bf215546Sopenharmony_ciradv_CmdBindTransformFeedbackBuffersEXT(VkCommandBuffer commandBuffer, uint32_t firstBinding,
9943bf215546Sopenharmony_ci                                        uint32_t bindingCount, const VkBuffer *pBuffers,
9944bf215546Sopenharmony_ci                                        const VkDeviceSize *pOffsets, const VkDeviceSize *pSizes)
9945bf215546Sopenharmony_ci{
9946bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
9947bf215546Sopenharmony_ci   struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
9948bf215546Sopenharmony_ci   uint8_t enabled_mask = 0;
9949bf215546Sopenharmony_ci
9950bf215546Sopenharmony_ci   assert(firstBinding + bindingCount <= MAX_SO_BUFFERS);
9951bf215546Sopenharmony_ci   for (uint32_t i = 0; i < bindingCount; i++) {
9952bf215546Sopenharmony_ci      uint32_t idx = firstBinding + i;
9953bf215546Sopenharmony_ci
9954bf215546Sopenharmony_ci      sb[idx].buffer = radv_buffer_from_handle(pBuffers[i]);
9955bf215546Sopenharmony_ci      sb[idx].offset = pOffsets[i];
9956bf215546Sopenharmony_ci
9957bf215546Sopenharmony_ci      if (!pSizes || pSizes[i] == VK_WHOLE_SIZE) {
9958bf215546Sopenharmony_ci         sb[idx].size = sb[idx].buffer->vk.size - sb[idx].offset;
9959bf215546Sopenharmony_ci      } else {
9960bf215546Sopenharmony_ci         sb[idx].size = pSizes[i];
9961bf215546Sopenharmony_ci      }
9962bf215546Sopenharmony_ci
9963bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, sb[idx].buffer->bo);
9964bf215546Sopenharmony_ci
9965bf215546Sopenharmony_ci      enabled_mask |= 1 << idx;
9966bf215546Sopenharmony_ci   }
9967bf215546Sopenharmony_ci
9968bf215546Sopenharmony_ci   cmd_buffer->state.streamout.enabled_mask |= enabled_mask;
9969bf215546Sopenharmony_ci
9970bf215546Sopenharmony_ci   cmd_buffer->state.dirty |= RADV_CMD_DIRTY_STREAMOUT_BUFFER;
9971bf215546Sopenharmony_ci}
9972bf215546Sopenharmony_ci
9973bf215546Sopenharmony_cibool
9974bf215546Sopenharmony_ciradv_is_streamout_enabled(struct radv_cmd_buffer *cmd_buffer)
9975bf215546Sopenharmony_ci{
9976bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
9977bf215546Sopenharmony_ci
9978bf215546Sopenharmony_ci   /* Streamout must be enabled for the PRIMITIVES_GENERATED query to work. */
9979bf215546Sopenharmony_ci   return (so->streamout_enabled || cmd_buffer->state.prims_gen_query_enabled) &&
9980bf215546Sopenharmony_ci          !cmd_buffer->state.suspend_streamout;
9981bf215546Sopenharmony_ci}
9982bf215546Sopenharmony_ci
9983bf215546Sopenharmony_civoid
9984bf215546Sopenharmony_ciradv_emit_streamout_enable(struct radv_cmd_buffer *cmd_buffer)
9985bf215546Sopenharmony_ci{
9986bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
9987bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
9988bf215546Sopenharmony_ci   bool streamout_enabled = radv_is_streamout_enabled(cmd_buffer);
9989bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
9990bf215546Sopenharmony_ci   uint32_t enabled_stream_buffers_mask = 0;
9991bf215546Sopenharmony_ci
9992bf215546Sopenharmony_ci   if (pipeline && pipeline->streamout_shader) {
9993bf215546Sopenharmony_ci      enabled_stream_buffers_mask = pipeline->streamout_shader->info.so.enabled_stream_buffers_mask;
9994bf215546Sopenharmony_ci   }
9995bf215546Sopenharmony_ci
9996bf215546Sopenharmony_ci   radeon_set_context_reg_seq(cs, R_028B94_VGT_STRMOUT_CONFIG, 2);
9997bf215546Sopenharmony_ci   radeon_emit(cs, S_028B94_STREAMOUT_0_EN(streamout_enabled) | S_028B94_RAST_STREAM(0) |
9998bf215546Sopenharmony_ci                      S_028B94_STREAMOUT_1_EN(streamout_enabled) |
9999bf215546Sopenharmony_ci                      S_028B94_STREAMOUT_2_EN(streamout_enabled) |
10000bf215546Sopenharmony_ci                      S_028B94_STREAMOUT_3_EN(streamout_enabled));
10001bf215546Sopenharmony_ci   radeon_emit(cs, so->hw_enabled_mask & enabled_stream_buffers_mask);
10002bf215546Sopenharmony_ci
10003bf215546Sopenharmony_ci   cmd_buffer->state.context_roll_without_scissor_emitted = true;
10004bf215546Sopenharmony_ci}
10005bf215546Sopenharmony_ci
10006bf215546Sopenharmony_cistatic void
10007bf215546Sopenharmony_ciradv_set_streamout_enable(struct radv_cmd_buffer *cmd_buffer, bool enable)
10008bf215546Sopenharmony_ci{
10009bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
10010bf215546Sopenharmony_ci   bool old_streamout_enabled = radv_is_streamout_enabled(cmd_buffer);
10011bf215546Sopenharmony_ci   uint32_t old_hw_enabled_mask = so->hw_enabled_mask;
10012bf215546Sopenharmony_ci
10013bf215546Sopenharmony_ci   so->streamout_enabled = enable;
10014bf215546Sopenharmony_ci
10015bf215546Sopenharmony_ci   so->hw_enabled_mask = so->enabled_mask | (so->enabled_mask << 4) | (so->enabled_mask << 8) |
10016bf215546Sopenharmony_ci                         (so->enabled_mask << 12);
10017bf215546Sopenharmony_ci
10018bf215546Sopenharmony_ci   if (!cmd_buffer->device->physical_device->use_ngg_streamout &&
10019bf215546Sopenharmony_ci       ((old_streamout_enabled != radv_is_streamout_enabled(cmd_buffer)) ||
10020bf215546Sopenharmony_ci        (old_hw_enabled_mask != so->hw_enabled_mask)))
10021bf215546Sopenharmony_ci      radv_emit_streamout_enable(cmd_buffer);
10022bf215546Sopenharmony_ci
10023bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
10024bf215546Sopenharmony_ci      cmd_buffer->gds_needed = true;
10025bf215546Sopenharmony_ci      cmd_buffer->gds_oa_needed = true;
10026bf215546Sopenharmony_ci   }
10027bf215546Sopenharmony_ci}
10028bf215546Sopenharmony_ci
10029bf215546Sopenharmony_cistatic void
10030bf215546Sopenharmony_ciradv_flush_vgt_streamout(struct radv_cmd_buffer *cmd_buffer)
10031bf215546Sopenharmony_ci{
10032bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
10033bf215546Sopenharmony_ci   unsigned reg_strmout_cntl;
10034bf215546Sopenharmony_ci
10035bf215546Sopenharmony_ci   /* The register is at different places on different ASICs. */
10036bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX9) {
10037bf215546Sopenharmony_ci      reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
10038bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_WRITE_DATA, 3, 0));
10039bf215546Sopenharmony_ci      radeon_emit(cs, S_370_DST_SEL(V_370_MEM_MAPPED_REGISTER) | S_370_ENGINE_SEL(V_370_ME));
10040bf215546Sopenharmony_ci      radeon_emit(cs, R_0300FC_CP_STRMOUT_CNTL >> 2);
10041bf215546Sopenharmony_ci      radeon_emit(cs, 0);
10042bf215546Sopenharmony_ci      radeon_emit(cs, 0);
10043bf215546Sopenharmony_ci   } else if (cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX7) {
10044bf215546Sopenharmony_ci      reg_strmout_cntl = R_0300FC_CP_STRMOUT_CNTL;
10045bf215546Sopenharmony_ci      radeon_set_uconfig_reg(cs, reg_strmout_cntl, 0);
10046bf215546Sopenharmony_ci   } else {
10047bf215546Sopenharmony_ci      reg_strmout_cntl = R_0084FC_CP_STRMOUT_CNTL;
10048bf215546Sopenharmony_ci      radeon_set_config_reg(cs, reg_strmout_cntl, 0);
10049bf215546Sopenharmony_ci   }
10050bf215546Sopenharmony_ci
10051bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
10052bf215546Sopenharmony_ci   radeon_emit(cs, EVENT_TYPE(EVENT_TYPE_SO_VGTSTREAMOUT_FLUSH) | EVENT_INDEX(0));
10053bf215546Sopenharmony_ci
10054bf215546Sopenharmony_ci   radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0));
10055bf215546Sopenharmony_ci   radeon_emit(cs,
10056bf215546Sopenharmony_ci               WAIT_REG_MEM_EQUAL); /* wait until the register is equal to the reference value */
10057bf215546Sopenharmony_ci   radeon_emit(cs, reg_strmout_cntl >> 2); /* register */
10058bf215546Sopenharmony_ci   radeon_emit(cs, 0);
10059bf215546Sopenharmony_ci   radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* reference value */
10060bf215546Sopenharmony_ci   radeon_emit(cs, S_0084FC_OFFSET_UPDATE_DONE(1)); /* mask */
10061bf215546Sopenharmony_ci   radeon_emit(cs, 4);                              /* poll interval */
10062bf215546Sopenharmony_ci}
10063bf215546Sopenharmony_ci
10064bf215546Sopenharmony_cistatic void
10065bf215546Sopenharmony_ciradv_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
10066bf215546Sopenharmony_ci                          uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
10067bf215546Sopenharmony_ci                          const VkDeviceSize *pCounterBufferOffsets)
10068bf215546Sopenharmony_ci
10069bf215546Sopenharmony_ci{
10070bf215546Sopenharmony_ci   struct radv_streamout_binding *sb = cmd_buffer->streamout_bindings;
10071bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
10072bf215546Sopenharmony_ci   struct radv_graphics_pipeline *pipeline = cmd_buffer->state.graphics_pipeline;
10073bf215546Sopenharmony_ci   struct radv_shader_info *info = &pipeline->streamout_shader->info;
10074bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
10075bf215546Sopenharmony_ci
10076bf215546Sopenharmony_ci   radv_flush_vgt_streamout(cmd_buffer);
10077bf215546Sopenharmony_ci
10078bf215546Sopenharmony_ci   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
10079bf215546Sopenharmony_ci   u_foreach_bit(i, so->enabled_mask)
10080bf215546Sopenharmony_ci   {
10081bf215546Sopenharmony_ci      int32_t counter_buffer_idx = i - firstCounterBuffer;
10082bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
10083bf215546Sopenharmony_ci         counter_buffer_idx = -1;
10084bf215546Sopenharmony_ci
10085bf215546Sopenharmony_ci      /* AMD GCN binds streamout buffers as shader resources.
10086bf215546Sopenharmony_ci       * VGT only counts primitives and tells the shader through
10087bf215546Sopenharmony_ci       * SGPRs what to do.
10088bf215546Sopenharmony_ci       */
10089bf215546Sopenharmony_ci      radeon_set_context_reg_seq(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 2);
10090bf215546Sopenharmony_ci      radeon_emit(cs, sb[i].size >> 2);     /* BUFFER_SIZE (in DW) */
10091bf215546Sopenharmony_ci      radeon_emit(cs, info->so.strides[i]); /* VTX_STRIDE (in DW) */
10092bf215546Sopenharmony_ci
10093bf215546Sopenharmony_ci      cmd_buffer->state.context_roll_without_scissor_emitted = true;
10094bf215546Sopenharmony_ci
10095bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
10096bf215546Sopenharmony_ci         /* The array of counter buffers is optional. */
10097bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
10098bf215546Sopenharmony_ci         uint64_t va = radv_buffer_get_va(buffer->bo);
10099bf215546Sopenharmony_ci         uint64_t counter_buffer_offset = 0;
10100bf215546Sopenharmony_ci
10101bf215546Sopenharmony_ci         if (pCounterBufferOffsets)
10102bf215546Sopenharmony_ci            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
10103bf215546Sopenharmony_ci
10104bf215546Sopenharmony_ci         va += buffer->offset + counter_buffer_offset;
10105bf215546Sopenharmony_ci
10106bf215546Sopenharmony_ci         /* Append */
10107bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
10108bf215546Sopenharmony_ci         radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) |   /* offset in bytes */
10109bf215546Sopenharmony_ci                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_MEM)); /* control */
10110bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                                 /* unused */
10111bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                                 /* unused */
10112bf215546Sopenharmony_ci         radeon_emit(cs, va);                                                /* src address lo */
10113bf215546Sopenharmony_ci         radeon_emit(cs, va >> 32);                                          /* src address hi */
10114bf215546Sopenharmony_ci
10115bf215546Sopenharmony_ci         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
10116bf215546Sopenharmony_ci      } else {
10117bf215546Sopenharmony_ci         /* Start from the beginning. */
10118bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
10119bf215546Sopenharmony_ci         radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
10120bf215546Sopenharmony_ci                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_FROM_PACKET)); /* control */
10121bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                                    /* unused */
10122bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                                    /* unused */
10123bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                                    /* unused */
10124bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                                    /* unused */
10125bf215546Sopenharmony_ci      }
10126bf215546Sopenharmony_ci   }
10127bf215546Sopenharmony_ci
10128bf215546Sopenharmony_ci   radv_set_streamout_enable(cmd_buffer, true);
10129bf215546Sopenharmony_ci}
10130bf215546Sopenharmony_ci
10131bf215546Sopenharmony_cistatic void
10132bf215546Sopenharmony_cigfx10_emit_streamout_begin(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
10133bf215546Sopenharmony_ci                           uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
10134bf215546Sopenharmony_ci                           const VkDeviceSize *pCounterBufferOffsets)
10135bf215546Sopenharmony_ci{
10136bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
10137bf215546Sopenharmony_ci   unsigned last_target = util_last_bit(so->enabled_mask) - 1;
10138bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
10139bf215546Sopenharmony_ci
10140bf215546Sopenharmony_ci   assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
10141bf215546Sopenharmony_ci   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
10142bf215546Sopenharmony_ci
10143bf215546Sopenharmony_ci   /* Sync because the next streamout operation will overwrite GDS and we
10144bf215546Sopenharmony_ci    * have to make sure it's idle.
10145bf215546Sopenharmony_ci    * TODO: Improve by tracking if there is a streamout operation in
10146bf215546Sopenharmony_ci    * flight.
10147bf215546Sopenharmony_ci    */
10148bf215546Sopenharmony_ci   cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_VS_PARTIAL_FLUSH;
10149bf215546Sopenharmony_ci   si_emit_cache_flush(cmd_buffer);
10150bf215546Sopenharmony_ci
10151bf215546Sopenharmony_ci   u_foreach_bit(i, so->enabled_mask)
10152bf215546Sopenharmony_ci   {
10153bf215546Sopenharmony_ci      int32_t counter_buffer_idx = i - firstCounterBuffer;
10154bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
10155bf215546Sopenharmony_ci         counter_buffer_idx = -1;
10156bf215546Sopenharmony_ci
10157bf215546Sopenharmony_ci      bool append =
10158bf215546Sopenharmony_ci         counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx];
10159bf215546Sopenharmony_ci      uint64_t va = 0;
10160bf215546Sopenharmony_ci
10161bf215546Sopenharmony_ci      if (append) {
10162bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
10163bf215546Sopenharmony_ci         uint64_t counter_buffer_offset = 0;
10164bf215546Sopenharmony_ci
10165bf215546Sopenharmony_ci         if (pCounterBufferOffsets)
10166bf215546Sopenharmony_ci            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
10167bf215546Sopenharmony_ci
10168bf215546Sopenharmony_ci         va += radv_buffer_get_va(buffer->bo);
10169bf215546Sopenharmony_ci         va += buffer->offset + counter_buffer_offset;
10170bf215546Sopenharmony_ci
10171bf215546Sopenharmony_ci         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
10172bf215546Sopenharmony_ci      }
10173bf215546Sopenharmony_ci
10174bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_DMA_DATA, 5, 0));
10175bf215546Sopenharmony_ci      radeon_emit(cs, S_411_SRC_SEL(append ? V_411_SRC_ADDR_TC_L2 : V_411_DATA) |
10176bf215546Sopenharmony_ci                         S_411_DST_SEL(V_411_GDS) | S_411_CP_SYNC(i == last_target));
10177bf215546Sopenharmony_ci      radeon_emit(cs, va);
10178bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
10179bf215546Sopenharmony_ci      radeon_emit(cs, 4 * i); /* destination in GDS */
10180bf215546Sopenharmony_ci      radeon_emit(cs, 0);
10181bf215546Sopenharmony_ci      radeon_emit(cs, S_415_BYTE_COUNT_GFX9(4) | S_415_DISABLE_WR_CONFIRM_GFX9(i != last_target));
10182bf215546Sopenharmony_ci   }
10183bf215546Sopenharmony_ci
10184bf215546Sopenharmony_ci   radv_set_streamout_enable(cmd_buffer, true);
10185bf215546Sopenharmony_ci}
10186bf215546Sopenharmony_ci
10187bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
10188bf215546Sopenharmony_ciradv_CmdBeginTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
10189bf215546Sopenharmony_ci                                  uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
10190bf215546Sopenharmony_ci                                  const VkDeviceSize *pCounterBufferOffsets)
10191bf215546Sopenharmony_ci{
10192bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
10193bf215546Sopenharmony_ci
10194bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
10195bf215546Sopenharmony_ci      gfx10_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount,
10196bf215546Sopenharmony_ci                                 pCounterBuffers, pCounterBufferOffsets);
10197bf215546Sopenharmony_ci   } else {
10198bf215546Sopenharmony_ci      radv_emit_streamout_begin(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
10199bf215546Sopenharmony_ci                                pCounterBufferOffsets);
10200bf215546Sopenharmony_ci   }
10201bf215546Sopenharmony_ci}
10202bf215546Sopenharmony_ci
10203bf215546Sopenharmony_cistatic void
10204bf215546Sopenharmony_ciradv_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
10205bf215546Sopenharmony_ci                        uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
10206bf215546Sopenharmony_ci                        const VkDeviceSize *pCounterBufferOffsets)
10207bf215546Sopenharmony_ci{
10208bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
10209bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
10210bf215546Sopenharmony_ci
10211bf215546Sopenharmony_ci   radv_flush_vgt_streamout(cmd_buffer);
10212bf215546Sopenharmony_ci
10213bf215546Sopenharmony_ci   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
10214bf215546Sopenharmony_ci   u_foreach_bit(i, so->enabled_mask)
10215bf215546Sopenharmony_ci   {
10216bf215546Sopenharmony_ci      int32_t counter_buffer_idx = i - firstCounterBuffer;
10217bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
10218bf215546Sopenharmony_ci         counter_buffer_idx = -1;
10219bf215546Sopenharmony_ci
10220bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
10221bf215546Sopenharmony_ci         /* The array of counters buffer is optional. */
10222bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
10223bf215546Sopenharmony_ci         uint64_t va = radv_buffer_get_va(buffer->bo);
10224bf215546Sopenharmony_ci         uint64_t counter_buffer_offset = 0;
10225bf215546Sopenharmony_ci
10226bf215546Sopenharmony_ci         if (pCounterBufferOffsets)
10227bf215546Sopenharmony_ci            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
10228bf215546Sopenharmony_ci
10229bf215546Sopenharmony_ci         va += buffer->offset + counter_buffer_offset;
10230bf215546Sopenharmony_ci
10231bf215546Sopenharmony_ci         radeon_emit(cs, PKT3(PKT3_STRMOUT_BUFFER_UPDATE, 4, 0));
10232bf215546Sopenharmony_ci         radeon_emit(cs, STRMOUT_SELECT_BUFFER(i) | STRMOUT_DATA_TYPE(1) | /* offset in bytes */
10233bf215546Sopenharmony_ci                            STRMOUT_OFFSET_SOURCE(STRMOUT_OFFSET_NONE) |
10234bf215546Sopenharmony_ci                            STRMOUT_STORE_BUFFER_FILLED_SIZE); /* control */
10235bf215546Sopenharmony_ci         radeon_emit(cs, va);                                  /* dst address lo */
10236bf215546Sopenharmony_ci         radeon_emit(cs, va >> 32);                            /* dst address hi */
10237bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                   /* unused */
10238bf215546Sopenharmony_ci         radeon_emit(cs, 0);                                   /* unused */
10239bf215546Sopenharmony_ci
10240bf215546Sopenharmony_ci         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
10241bf215546Sopenharmony_ci      }
10242bf215546Sopenharmony_ci
10243bf215546Sopenharmony_ci      /* Deactivate transform feedback by zeroing the buffer size.
10244bf215546Sopenharmony_ci       * The counters (primitives generated, primitives emitted) may
10245bf215546Sopenharmony_ci       * be enabled even if there is not buffer bound. This ensures
10246bf215546Sopenharmony_ci       * that the primitives-emitted query won't increment.
10247bf215546Sopenharmony_ci       */
10248bf215546Sopenharmony_ci      radeon_set_context_reg(cs, R_028AD0_VGT_STRMOUT_BUFFER_SIZE_0 + 16 * i, 0);
10249bf215546Sopenharmony_ci
10250bf215546Sopenharmony_ci      cmd_buffer->state.context_roll_without_scissor_emitted = true;
10251bf215546Sopenharmony_ci   }
10252bf215546Sopenharmony_ci
10253bf215546Sopenharmony_ci   radv_set_streamout_enable(cmd_buffer, false);
10254bf215546Sopenharmony_ci}
10255bf215546Sopenharmony_ci
10256bf215546Sopenharmony_cistatic void
10257bf215546Sopenharmony_cigfx10_emit_streamout_end(struct radv_cmd_buffer *cmd_buffer, uint32_t firstCounterBuffer,
10258bf215546Sopenharmony_ci                         uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
10259bf215546Sopenharmony_ci                         const VkDeviceSize *pCounterBufferOffsets)
10260bf215546Sopenharmony_ci{
10261bf215546Sopenharmony_ci   struct radv_streamout_state *so = &cmd_buffer->state.streamout;
10262bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
10263bf215546Sopenharmony_ci
10264bf215546Sopenharmony_ci   assert(cmd_buffer->device->physical_device->rad_info.gfx_level >= GFX10);
10265bf215546Sopenharmony_ci   assert(firstCounterBuffer + counterBufferCount <= MAX_SO_BUFFERS);
10266bf215546Sopenharmony_ci
10267bf215546Sopenharmony_ci   u_foreach_bit(i, so->enabled_mask)
10268bf215546Sopenharmony_ci   {
10269bf215546Sopenharmony_ci      int32_t counter_buffer_idx = i - firstCounterBuffer;
10270bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && counter_buffer_idx >= counterBufferCount)
10271bf215546Sopenharmony_ci         counter_buffer_idx = -1;
10272bf215546Sopenharmony_ci
10273bf215546Sopenharmony_ci      if (counter_buffer_idx >= 0 && pCounterBuffers && pCounterBuffers[counter_buffer_idx]) {
10274bf215546Sopenharmony_ci         /* The array of counters buffer is optional. */
10275bf215546Sopenharmony_ci         RADV_FROM_HANDLE(radv_buffer, buffer, pCounterBuffers[counter_buffer_idx]);
10276bf215546Sopenharmony_ci         uint64_t va = radv_buffer_get_va(buffer->bo);
10277bf215546Sopenharmony_ci         uint64_t counter_buffer_offset = 0;
10278bf215546Sopenharmony_ci
10279bf215546Sopenharmony_ci         if (pCounterBufferOffsets)
10280bf215546Sopenharmony_ci            counter_buffer_offset = pCounterBufferOffsets[counter_buffer_idx];
10281bf215546Sopenharmony_ci
10282bf215546Sopenharmony_ci         va += buffer->offset + counter_buffer_offset;
10283bf215546Sopenharmony_ci
10284bf215546Sopenharmony_ci         si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
10285bf215546Sopenharmony_ci                                    radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_PS_DONE, 0,
10286bf215546Sopenharmony_ci                                    EOP_DST_SEL_TC_L2, EOP_DATA_SEL_GDS, va, EOP_DATA_GDS(i, 1), 0);
10287bf215546Sopenharmony_ci
10288bf215546Sopenharmony_ci         radv_cs_add_buffer(cmd_buffer->device->ws, cs, buffer->bo);
10289bf215546Sopenharmony_ci      }
10290bf215546Sopenharmony_ci   }
10291bf215546Sopenharmony_ci
10292bf215546Sopenharmony_ci   radv_set_streamout_enable(cmd_buffer, false);
10293bf215546Sopenharmony_ci}
10294bf215546Sopenharmony_ci
10295bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
10296bf215546Sopenharmony_ciradv_CmdEndTransformFeedbackEXT(VkCommandBuffer commandBuffer, uint32_t firstCounterBuffer,
10297bf215546Sopenharmony_ci                                uint32_t counterBufferCount, const VkBuffer *pCounterBuffers,
10298bf215546Sopenharmony_ci                                const VkDeviceSize *pCounterBufferOffsets)
10299bf215546Sopenharmony_ci{
10300bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
10301bf215546Sopenharmony_ci
10302bf215546Sopenharmony_ci   if (cmd_buffer->device->physical_device->use_ngg_streamout) {
10303bf215546Sopenharmony_ci      gfx10_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
10304bf215546Sopenharmony_ci                               pCounterBufferOffsets);
10305bf215546Sopenharmony_ci   } else {
10306bf215546Sopenharmony_ci      radv_emit_streamout_end(cmd_buffer, firstCounterBuffer, counterBufferCount, pCounterBuffers,
10307bf215546Sopenharmony_ci                              pCounterBufferOffsets);
10308bf215546Sopenharmony_ci   }
10309bf215546Sopenharmony_ci}
10310bf215546Sopenharmony_ci
10311bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
10312bf215546Sopenharmony_ciradv_CmdDrawIndirectByteCountEXT(VkCommandBuffer commandBuffer, uint32_t instanceCount,
10313bf215546Sopenharmony_ci                                 uint32_t firstInstance, VkBuffer _counterBuffer,
10314bf215546Sopenharmony_ci                                 VkDeviceSize counterBufferOffset, uint32_t counterOffset,
10315bf215546Sopenharmony_ci                                 uint32_t vertexStride)
10316bf215546Sopenharmony_ci{
10317bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
10318bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, counterBuffer, _counterBuffer);
10319bf215546Sopenharmony_ci   struct radv_draw_info info;
10320bf215546Sopenharmony_ci
10321bf215546Sopenharmony_ci   info.count = 0;
10322bf215546Sopenharmony_ci   info.instance_count = instanceCount;
10323bf215546Sopenharmony_ci   info.first_instance = firstInstance;
10324bf215546Sopenharmony_ci   info.strmout_buffer = counterBuffer;
10325bf215546Sopenharmony_ci   info.strmout_buffer_offset = counterBufferOffset;
10326bf215546Sopenharmony_ci   info.stride = vertexStride;
10327bf215546Sopenharmony_ci   info.indexed = false;
10328bf215546Sopenharmony_ci   info.indirect = NULL;
10329bf215546Sopenharmony_ci
10330bf215546Sopenharmony_ci   if (!radv_before_draw(cmd_buffer, &info, 1))
10331bf215546Sopenharmony_ci      return;
10332bf215546Sopenharmony_ci   struct VkMultiDrawInfoEXT minfo = { 0, 0 };
10333bf215546Sopenharmony_ci   radv_emit_direct_draw_packets(cmd_buffer, &info, 1, &minfo, S_0287F0_USE_OPAQUE(1), 0);
10334bf215546Sopenharmony_ci   radv_after_draw(cmd_buffer);
10335bf215546Sopenharmony_ci}
10336bf215546Sopenharmony_ci
10337bf215546Sopenharmony_ci/* VK_AMD_buffer_marker */
10338bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
10339bf215546Sopenharmony_ciradv_CmdWriteBufferMarker2AMD(VkCommandBuffer commandBuffer, VkPipelineStageFlags2 stage,
10340bf215546Sopenharmony_ci                              VkBuffer dstBuffer, VkDeviceSize dstOffset, uint32_t marker)
10341bf215546Sopenharmony_ci{
10342bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
10343bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, buffer, dstBuffer);
10344bf215546Sopenharmony_ci   struct radeon_cmdbuf *cs = cmd_buffer->cs;
10345bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(buffer->bo) + buffer->offset + dstOffset;
10346bf215546Sopenharmony_ci
10347bf215546Sopenharmony_ci   si_emit_cache_flush(cmd_buffer);
10348bf215546Sopenharmony_ci
10349bf215546Sopenharmony_ci   ASSERTED unsigned cdw_max = radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, 12);
10350bf215546Sopenharmony_ci
10351bf215546Sopenharmony_ci   if (!(stage & ~VK_PIPELINE_STAGE_2_TOP_OF_PIPE_BIT)) {
10352bf215546Sopenharmony_ci      radeon_emit(cs, PKT3(PKT3_COPY_DATA, 4, 0));
10353bf215546Sopenharmony_ci      radeon_emit(cs, COPY_DATA_SRC_SEL(COPY_DATA_IMM) | COPY_DATA_DST_SEL(COPY_DATA_DST_MEM) |
10354bf215546Sopenharmony_ci                         COPY_DATA_WR_CONFIRM);
10355bf215546Sopenharmony_ci      radeon_emit(cs, marker);
10356bf215546Sopenharmony_ci      radeon_emit(cs, 0);
10357bf215546Sopenharmony_ci      radeon_emit(cs, va);
10358bf215546Sopenharmony_ci      radeon_emit(cs, va >> 32);
10359bf215546Sopenharmony_ci   } else {
10360bf215546Sopenharmony_ci      si_cs_emit_write_event_eop(cs, cmd_buffer->device->physical_device->rad_info.gfx_level,
10361bf215546Sopenharmony_ci                                 radv_cmd_buffer_uses_mec(cmd_buffer), V_028A90_BOTTOM_OF_PIPE_TS,
10362bf215546Sopenharmony_ci                                 0, EOP_DST_SEL_MEM, EOP_DATA_SEL_VALUE_32BIT, va, marker,
10363bf215546Sopenharmony_ci                                 cmd_buffer->gfx9_eop_bug_va);
10364bf215546Sopenharmony_ci   }
10365bf215546Sopenharmony_ci
10366bf215546Sopenharmony_ci   assert(cmd_buffer->cs->cdw <= cdw_max);
10367bf215546Sopenharmony_ci}
10368bf215546Sopenharmony_ci
10369bf215546Sopenharmony_civoid
10370bf215546Sopenharmony_ciradv_CmdBindPipelineShaderGroupNV(VkCommandBuffer commandBuffer,
10371bf215546Sopenharmony_ci                                  VkPipelineBindPoint pipelineBindPoint, VkPipeline pipeline,
10372bf215546Sopenharmony_ci                                  uint32_t groupIndex)
10373bf215546Sopenharmony_ci{
10374bf215546Sopenharmony_ci   fprintf(stderr, "radv: unimplemented vkCmdBindPipelineShaderGroupNV\n");
10375bf215546Sopenharmony_ci   abort();
10376bf215546Sopenharmony_ci}