1bf215546Sopenharmony_ci#include "nir/nir_builder.h"
2bf215546Sopenharmony_ci#include "radv_meta.h"
3bf215546Sopenharmony_ci
4bf215546Sopenharmony_ci#include "radv_cs.h"
5bf215546Sopenharmony_ci#include "sid.h"
6bf215546Sopenharmony_ci
7bf215546Sopenharmony_cistatic nir_shader *
8bf215546Sopenharmony_cibuild_buffer_fill_shader(struct radv_device *dev)
9bf215546Sopenharmony_ci{
10bf215546Sopenharmony_ci   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_buffer_fill");
11bf215546Sopenharmony_ci   b.shader->info.workgroup_size[0] = 64;
12bf215546Sopenharmony_ci
13bf215546Sopenharmony_ci   nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
14bf215546Sopenharmony_ci   nir_ssa_def *buffer_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
15bf215546Sopenharmony_ci   nir_ssa_def *size_minus16 = nir_channel(&b, pconst, 2);
16bf215546Sopenharmony_ci   nir_ssa_def *data = nir_swizzle(&b, nir_channel(&b, pconst, 3), (unsigned[]){0, 0, 0, 0}, 4);
17bf215546Sopenharmony_ci
18bf215546Sopenharmony_ci   nir_ssa_def *global_id =
19bf215546Sopenharmony_ci      nir_iadd(&b,
20bf215546Sopenharmony_ci               nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0),
21bf215546Sopenharmony_ci                            b.shader->info.workgroup_size[0]),
22bf215546Sopenharmony_ci               nir_load_local_invocation_index(&b));
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_imin(&b, nir_imul_imm(&b, global_id, 16), size_minus16);
25bf215546Sopenharmony_ci   nir_ssa_def *dst_addr = nir_iadd(&b, buffer_addr, nir_u2u64(&b, offset));
26bf215546Sopenharmony_ci   nir_build_store_global(&b, data, dst_addr, .align_mul = 4);
27bf215546Sopenharmony_ci
28bf215546Sopenharmony_ci   return b.shader;
29bf215546Sopenharmony_ci}
30bf215546Sopenharmony_ci
31bf215546Sopenharmony_cistatic nir_shader *
32bf215546Sopenharmony_cibuild_buffer_copy_shader(struct radv_device *dev)
33bf215546Sopenharmony_ci{
34bf215546Sopenharmony_ci   nir_builder b = radv_meta_init_shader(dev, MESA_SHADER_COMPUTE, "meta_buffer_copy");
35bf215546Sopenharmony_ci   b.shader->info.workgroup_size[0] = 64;
36bf215546Sopenharmony_ci
37bf215546Sopenharmony_ci   nir_ssa_def *pconst = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 16);
38bf215546Sopenharmony_ci   nir_ssa_def *size_minus16 =
39bf215546Sopenharmony_ci      nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 16, .range = 4);
40bf215546Sopenharmony_ci   nir_ssa_def *src_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b0011));
41bf215546Sopenharmony_ci   nir_ssa_def *dst_addr = nir_pack_64_2x32(&b, nir_channels(&b, pconst, 0b1100));
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci   nir_ssa_def *global_id =
44bf215546Sopenharmony_ci      nir_iadd(&b,
45bf215546Sopenharmony_ci               nir_imul_imm(&b, nir_channel(&b, nir_load_workgroup_id(&b, 32), 0),
46bf215546Sopenharmony_ci                            b.shader->info.workgroup_size[0]),
47bf215546Sopenharmony_ci               nir_load_local_invocation_index(&b));
48bf215546Sopenharmony_ci
49bf215546Sopenharmony_ci   nir_ssa_def *offset = nir_u2u64(&b, nir_imin(&b, nir_imul_imm(&b, global_id, 16), size_minus16));
50bf215546Sopenharmony_ci
51bf215546Sopenharmony_ci   nir_ssa_def *data =
52bf215546Sopenharmony_ci      nir_build_load_global(&b, 4, 32, nir_iadd(&b, src_addr, offset), .align_mul = 4);
53bf215546Sopenharmony_ci   nir_build_store_global(&b, data, nir_iadd(&b, dst_addr, offset), .align_mul = 4);
54bf215546Sopenharmony_ci
55bf215546Sopenharmony_ci   return b.shader;
56bf215546Sopenharmony_ci}
57bf215546Sopenharmony_ci
58bf215546Sopenharmony_cistruct fill_constants {
59bf215546Sopenharmony_ci   uint64_t addr;
60bf215546Sopenharmony_ci   uint32_t size_minus16;
61bf215546Sopenharmony_ci   uint32_t data;
62bf215546Sopenharmony_ci};
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_cistruct copy_constants {
65bf215546Sopenharmony_ci   uint64_t src_addr;
66bf215546Sopenharmony_ci   uint64_t dst_addr;
67bf215546Sopenharmony_ci   uint32_t size_minus16;
68bf215546Sopenharmony_ci};
69bf215546Sopenharmony_ci
70bf215546Sopenharmony_ciVkResult
71bf215546Sopenharmony_ciradv_device_init_meta_buffer_state(struct radv_device *device)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   VkResult result;
74bf215546Sopenharmony_ci   nir_shader *fill_cs = build_buffer_fill_shader(device);
75bf215546Sopenharmony_ci   nir_shader *copy_cs = build_buffer_copy_shader(device);
76bf215546Sopenharmony_ci
77bf215546Sopenharmony_ci   VkPipelineLayoutCreateInfo fill_pl_create_info = {
78bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
79bf215546Sopenharmony_ci      .setLayoutCount = 0,
80bf215546Sopenharmony_ci      .pushConstantRangeCount = 1,
81bf215546Sopenharmony_ci      .pPushConstantRanges =
82bf215546Sopenharmony_ci         &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct fill_constants)},
83bf215546Sopenharmony_ci   };
84bf215546Sopenharmony_ci
85bf215546Sopenharmony_ci   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &fill_pl_create_info,
86bf215546Sopenharmony_ci                                      &device->meta_state.alloc,
87bf215546Sopenharmony_ci                                      &device->meta_state.buffer.fill_p_layout);
88bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
89bf215546Sopenharmony_ci      goto fail;
90bf215546Sopenharmony_ci
91bf215546Sopenharmony_ci   VkPipelineLayoutCreateInfo copy_pl_create_info = {
92bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
93bf215546Sopenharmony_ci      .setLayoutCount = 0,
94bf215546Sopenharmony_ci      .pushConstantRangeCount = 1,
95bf215546Sopenharmony_ci      .pPushConstantRanges =
96bf215546Sopenharmony_ci         &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(struct copy_constants)},
97bf215546Sopenharmony_ci   };
98bf215546Sopenharmony_ci
99bf215546Sopenharmony_ci   result = radv_CreatePipelineLayout(radv_device_to_handle(device), &copy_pl_create_info,
100bf215546Sopenharmony_ci                                      &device->meta_state.alloc,
101bf215546Sopenharmony_ci                                      &device->meta_state.buffer.copy_p_layout);
102bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
103bf215546Sopenharmony_ci      goto fail;
104bf215546Sopenharmony_ci
105bf215546Sopenharmony_ci   VkPipelineShaderStageCreateInfo fill_pipeline_shader_stage = {
106bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
107bf215546Sopenharmony_ci      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
108bf215546Sopenharmony_ci      .module = vk_shader_module_handle_from_nir(fill_cs),
109bf215546Sopenharmony_ci      .pName = "main",
110bf215546Sopenharmony_ci      .pSpecializationInfo = NULL,
111bf215546Sopenharmony_ci   };
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_ci   VkComputePipelineCreateInfo fill_vk_pipeline_info = {
114bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
115bf215546Sopenharmony_ci      .stage = fill_pipeline_shader_stage,
116bf215546Sopenharmony_ci      .flags = 0,
117bf215546Sopenharmony_ci      .layout = device->meta_state.buffer.fill_p_layout,
118bf215546Sopenharmony_ci   };
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   result = radv_CreateComputePipelines(
121bf215546Sopenharmony_ci      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
122bf215546Sopenharmony_ci      &fill_vk_pipeline_info, NULL, &device->meta_state.buffer.fill_pipeline);
123bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
124bf215546Sopenharmony_ci      goto fail;
125bf215546Sopenharmony_ci
126bf215546Sopenharmony_ci   VkPipelineShaderStageCreateInfo copy_pipeline_shader_stage = {
127bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
128bf215546Sopenharmony_ci      .stage = VK_SHADER_STAGE_COMPUTE_BIT,
129bf215546Sopenharmony_ci      .module = vk_shader_module_handle_from_nir(copy_cs),
130bf215546Sopenharmony_ci      .pName = "main",
131bf215546Sopenharmony_ci      .pSpecializationInfo = NULL,
132bf215546Sopenharmony_ci   };
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci   VkComputePipelineCreateInfo copy_vk_pipeline_info = {
135bf215546Sopenharmony_ci      .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
136bf215546Sopenharmony_ci      .stage = copy_pipeline_shader_stage,
137bf215546Sopenharmony_ci      .flags = 0,
138bf215546Sopenharmony_ci      .layout = device->meta_state.buffer.copy_p_layout,
139bf215546Sopenharmony_ci   };
140bf215546Sopenharmony_ci
141bf215546Sopenharmony_ci   result = radv_CreateComputePipelines(
142bf215546Sopenharmony_ci      radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
143bf215546Sopenharmony_ci      &copy_vk_pipeline_info, NULL, &device->meta_state.buffer.copy_pipeline);
144bf215546Sopenharmony_ci   if (result != VK_SUCCESS)
145bf215546Sopenharmony_ci      goto fail;
146bf215546Sopenharmony_ci
147bf215546Sopenharmony_ci   ralloc_free(fill_cs);
148bf215546Sopenharmony_ci   ralloc_free(copy_cs);
149bf215546Sopenharmony_ci   return VK_SUCCESS;
150bf215546Sopenharmony_cifail:
151bf215546Sopenharmony_ci   ralloc_free(fill_cs);
152bf215546Sopenharmony_ci   ralloc_free(copy_cs);
153bf215546Sopenharmony_ci   return result;
154bf215546Sopenharmony_ci}
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_civoid
157bf215546Sopenharmony_ciradv_device_finish_meta_buffer_state(struct radv_device *device)
158bf215546Sopenharmony_ci{
159bf215546Sopenharmony_ci   struct radv_meta_state *state = &device->meta_state;
160bf215546Sopenharmony_ci
161bf215546Sopenharmony_ci   radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.copy_pipeline, &state->alloc);
162bf215546Sopenharmony_ci   radv_DestroyPipeline(radv_device_to_handle(device), state->buffer.fill_pipeline, &state->alloc);
163bf215546Sopenharmony_ci   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.copy_p_layout,
164bf215546Sopenharmony_ci                              &state->alloc);
165bf215546Sopenharmony_ci   radv_DestroyPipelineLayout(radv_device_to_handle(device), state->buffer.fill_p_layout,
166bf215546Sopenharmony_ci                              &state->alloc);
167bf215546Sopenharmony_ci}
168bf215546Sopenharmony_ci
169bf215546Sopenharmony_cistatic void
170bf215546Sopenharmony_cifill_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t va, uint64_t size, uint32_t data)
171bf215546Sopenharmony_ci{
172bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
173bf215546Sopenharmony_ci   struct radv_meta_saved_state saved_state;
174bf215546Sopenharmony_ci
175bf215546Sopenharmony_ci   radv_meta_save(
176bf215546Sopenharmony_ci      &saved_state, cmd_buffer,
177bf215546Sopenharmony_ci      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
180bf215546Sopenharmony_ci                        device->meta_state.buffer.fill_pipeline);
181bf215546Sopenharmony_ci
182bf215546Sopenharmony_ci   assert(size >= 16 && size <= UINT32_MAX);
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci   struct fill_constants fill_consts = {
185bf215546Sopenharmony_ci      .addr = va,
186bf215546Sopenharmony_ci      .size_minus16 = size - 16,
187bf215546Sopenharmony_ci      .data = data,
188bf215546Sopenharmony_ci   };
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
191bf215546Sopenharmony_ci                         device->meta_state.buffer.fill_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
192bf215546Sopenharmony_ci                         sizeof(fill_consts), &fill_consts);
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci   radv_unaligned_dispatch(cmd_buffer, DIV_ROUND_UP(size, 16), 1, 1);
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci   radv_meta_restore(&saved_state, cmd_buffer);
197bf215546Sopenharmony_ci}
198bf215546Sopenharmony_ci
199bf215546Sopenharmony_cistatic void
200bf215546Sopenharmony_cicopy_buffer_shader(struct radv_cmd_buffer *cmd_buffer, uint64_t src_va, uint64_t dst_va,
201bf215546Sopenharmony_ci                   uint64_t size)
202bf215546Sopenharmony_ci{
203bf215546Sopenharmony_ci   struct radv_device *device = cmd_buffer->device;
204bf215546Sopenharmony_ci   struct radv_meta_saved_state saved_state;
205bf215546Sopenharmony_ci
206bf215546Sopenharmony_ci   radv_meta_save(
207bf215546Sopenharmony_ci      &saved_state, cmd_buffer,
208bf215546Sopenharmony_ci      RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_ci   radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
211bf215546Sopenharmony_ci                        device->meta_state.buffer.copy_pipeline);
212bf215546Sopenharmony_ci
213bf215546Sopenharmony_ci   assert(size >= 16 && size <= UINT32_MAX);
214bf215546Sopenharmony_ci
215bf215546Sopenharmony_ci   struct copy_constants copy_consts = {
216bf215546Sopenharmony_ci      .src_addr = src_va,
217bf215546Sopenharmony_ci      .dst_addr = dst_va,
218bf215546Sopenharmony_ci      .size_minus16 = size - 16,
219bf215546Sopenharmony_ci   };
220bf215546Sopenharmony_ci
221bf215546Sopenharmony_ci   radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
222bf215546Sopenharmony_ci                         device->meta_state.buffer.copy_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
223bf215546Sopenharmony_ci                         sizeof(copy_consts), &copy_consts);
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   radv_unaligned_dispatch(cmd_buffer, DIV_ROUND_UP(size, 16), 1, 1);
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci   radv_meta_restore(&saved_state, cmd_buffer);
228bf215546Sopenharmony_ci}
229bf215546Sopenharmony_ci
230bf215546Sopenharmony_cistatic bool
231bf215546Sopenharmony_ciradv_prefer_compute_dma(const struct radv_device *device, uint64_t size,
232bf215546Sopenharmony_ci                        struct radeon_winsys_bo *src_bo, struct radeon_winsys_bo *dst_bo)
233bf215546Sopenharmony_ci{
234bf215546Sopenharmony_ci   bool use_compute = size >= RADV_BUFFER_OPS_CS_THRESHOLD;
235bf215546Sopenharmony_ci
236bf215546Sopenharmony_ci   if (device->physical_device->rad_info.gfx_level >= GFX10 &&
237bf215546Sopenharmony_ci       device->physical_device->rad_info.has_dedicated_vram) {
238bf215546Sopenharmony_ci      if ((src_bo && !(src_bo->initial_domain & RADEON_DOMAIN_VRAM)) ||
239bf215546Sopenharmony_ci          (dst_bo && !(dst_bo->initial_domain & RADEON_DOMAIN_VRAM))) {
240bf215546Sopenharmony_ci         /* Prefer CP DMA for GTT on dGPUS due to slow PCIe. */
241bf215546Sopenharmony_ci         use_compute = false;
242bf215546Sopenharmony_ci      }
243bf215546Sopenharmony_ci   }
244bf215546Sopenharmony_ci
245bf215546Sopenharmony_ci   return use_compute;
246bf215546Sopenharmony_ci}
247bf215546Sopenharmony_ci
248bf215546Sopenharmony_ciuint32_t
249bf215546Sopenharmony_ciradv_fill_buffer(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,
250bf215546Sopenharmony_ci                 struct radeon_winsys_bo *bo, uint64_t va, uint64_t size, uint32_t value)
251bf215546Sopenharmony_ci{
252bf215546Sopenharmony_ci   bool use_compute = radv_prefer_compute_dma(cmd_buffer->device, size, NULL, bo);
253bf215546Sopenharmony_ci   uint32_t flush_bits = 0;
254bf215546Sopenharmony_ci
255bf215546Sopenharmony_ci   assert(!(va & 3));
256bf215546Sopenharmony_ci   assert(!(size & 3));
257bf215546Sopenharmony_ci
258bf215546Sopenharmony_ci   if (bo)
259bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, bo);
260bf215546Sopenharmony_ci
261bf215546Sopenharmony_ci   if (use_compute) {
262bf215546Sopenharmony_ci      cmd_buffer->state.flush_bits |=
263bf215546Sopenharmony_ci         radv_dst_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci      fill_buffer_shader(cmd_buffer, va, size, value);
266bf215546Sopenharmony_ci
267bf215546Sopenharmony_ci      flush_bits = RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
268bf215546Sopenharmony_ci                   radv_src_access_flush(cmd_buffer, VK_ACCESS_2_SHADER_WRITE_BIT, image);
269bf215546Sopenharmony_ci   } else if (size)
270bf215546Sopenharmony_ci      si_cp_dma_clear_buffer(cmd_buffer, va, size, value);
271bf215546Sopenharmony_ci
272bf215546Sopenharmony_ci   return flush_bits;
273bf215546Sopenharmony_ci}
274bf215546Sopenharmony_ci
275bf215546Sopenharmony_civoid
276bf215546Sopenharmony_ciradv_copy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radeon_winsys_bo *src_bo,
277bf215546Sopenharmony_ci                 struct radeon_winsys_bo *dst_bo, uint64_t src_offset, uint64_t dst_offset,
278bf215546Sopenharmony_ci                 uint64_t size)
279bf215546Sopenharmony_ci{
280bf215546Sopenharmony_ci   bool use_compute = !(size & 3) && !(src_offset & 3) && !(dst_offset & 3) &&
281bf215546Sopenharmony_ci                      radv_prefer_compute_dma(cmd_buffer->device, size, src_bo, dst_bo);
282bf215546Sopenharmony_ci
283bf215546Sopenharmony_ci   uint64_t src_va = radv_buffer_get_va(src_bo) + src_offset;
284bf215546Sopenharmony_ci   uint64_t dst_va = radv_buffer_get_va(dst_bo) + dst_offset;
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, src_bo);
287bf215546Sopenharmony_ci   radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_bo);
288bf215546Sopenharmony_ci
289bf215546Sopenharmony_ci   if (use_compute)
290bf215546Sopenharmony_ci      copy_buffer_shader(cmd_buffer, src_va, dst_va, size);
291bf215546Sopenharmony_ci   else if (size)
292bf215546Sopenharmony_ci      si_cp_dma_buffer_copy(cmd_buffer, src_va, dst_va, size);
293bf215546Sopenharmony_ci}
294bf215546Sopenharmony_ci
295bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
296bf215546Sopenharmony_ciradv_CmdFillBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
297bf215546Sopenharmony_ci                   VkDeviceSize fillSize, uint32_t data)
298bf215546Sopenharmony_ci{
299bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
300bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
301bf215546Sopenharmony_ci
302bf215546Sopenharmony_ci   fillSize = vk_buffer_range(&dst_buffer->vk, dstOffset, fillSize) & ~3ull;
303bf215546Sopenharmony_ci
304bf215546Sopenharmony_ci   radv_fill_buffer(cmd_buffer, NULL, dst_buffer->bo,
305bf215546Sopenharmony_ci                    radv_buffer_get_va(dst_buffer->bo) + dst_buffer->offset + dstOffset, fillSize,
306bf215546Sopenharmony_ci                    data);
307bf215546Sopenharmony_ci}
308bf215546Sopenharmony_ci
309bf215546Sopenharmony_cistatic void
310bf215546Sopenharmony_cicopy_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *src_buffer,
311bf215546Sopenharmony_ci            struct radv_buffer *dst_buffer, const VkBufferCopy2 *region)
312bf215546Sopenharmony_ci{
313bf215546Sopenharmony_ci   bool old_predicating;
314bf215546Sopenharmony_ci
315bf215546Sopenharmony_ci   /* VK_EXT_conditional_rendering says that copy commands should not be
316bf215546Sopenharmony_ci    * affected by conditional rendering.
317bf215546Sopenharmony_ci    */
318bf215546Sopenharmony_ci   old_predicating = cmd_buffer->state.predicating;
319bf215546Sopenharmony_ci   cmd_buffer->state.predicating = false;
320bf215546Sopenharmony_ci
321bf215546Sopenharmony_ci   radv_copy_buffer(cmd_buffer, src_buffer->bo, dst_buffer->bo,
322bf215546Sopenharmony_ci                    src_buffer->offset + region->srcOffset, dst_buffer->offset + region->dstOffset,
323bf215546Sopenharmony_ci                    region->size);
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   /* Restore conditional rendering. */
326bf215546Sopenharmony_ci   cmd_buffer->state.predicating = old_predicating;
327bf215546Sopenharmony_ci}
328bf215546Sopenharmony_ci
329bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
330bf215546Sopenharmony_ciradv_CmdCopyBuffer2(VkCommandBuffer commandBuffer, const VkCopyBufferInfo2 *pCopyBufferInfo)
331bf215546Sopenharmony_ci{
332bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
333bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
334bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
335bf215546Sopenharmony_ci
336bf215546Sopenharmony_ci   for (unsigned r = 0; r < pCopyBufferInfo->regionCount; r++) {
337bf215546Sopenharmony_ci      copy_buffer(cmd_buffer, src_buffer, dst_buffer, &pCopyBufferInfo->pRegions[r]);
338bf215546Sopenharmony_ci   }
339bf215546Sopenharmony_ci}
340bf215546Sopenharmony_ci
341bf215546Sopenharmony_civoid
342bf215546Sopenharmony_ciradv_update_buffer_cp(struct radv_cmd_buffer *cmd_buffer, uint64_t va, const void *data,
343bf215546Sopenharmony_ci                      uint64_t size)
344bf215546Sopenharmony_ci{
345bf215546Sopenharmony_ci   uint64_t words = size / 4;
346bf215546Sopenharmony_ci   bool mec = radv_cmd_buffer_uses_mec(cmd_buffer);
347bf215546Sopenharmony_ci
348bf215546Sopenharmony_ci   assert(size < RADV_BUFFER_UPDATE_THRESHOLD);
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   si_emit_cache_flush(cmd_buffer);
351bf215546Sopenharmony_ci   radeon_check_space(cmd_buffer->device->ws, cmd_buffer->cs, words + 4);
352bf215546Sopenharmony_ci
353bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, PKT3(PKT3_WRITE_DATA, 2 + words, 0));
354bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, S_370_DST_SEL(mec ? V_370_MEM : V_370_MEM_GRBM) |
355bf215546Sopenharmony_ci                                  S_370_WR_CONFIRM(1) | S_370_ENGINE_SEL(V_370_ME));
356bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, va);
357bf215546Sopenharmony_ci   radeon_emit(cmd_buffer->cs, va >> 32);
358bf215546Sopenharmony_ci   radeon_emit_array(cmd_buffer->cs, data, words);
359bf215546Sopenharmony_ci
360bf215546Sopenharmony_ci   if (unlikely(cmd_buffer->device->trace_bo))
361bf215546Sopenharmony_ci      radv_cmd_buffer_trace_emit(cmd_buffer);
362bf215546Sopenharmony_ci}
363bf215546Sopenharmony_ci
364bf215546Sopenharmony_ciVKAPI_ATTR void VKAPI_CALL
365bf215546Sopenharmony_ciradv_CmdUpdateBuffer(VkCommandBuffer commandBuffer, VkBuffer dstBuffer, VkDeviceSize dstOffset,
366bf215546Sopenharmony_ci                     VkDeviceSize dataSize, const void *pData)
367bf215546Sopenharmony_ci{
368bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);
369bf215546Sopenharmony_ci   RADV_FROM_HANDLE(radv_buffer, dst_buffer, dstBuffer);
370bf215546Sopenharmony_ci   uint64_t va = radv_buffer_get_va(dst_buffer->bo);
371bf215546Sopenharmony_ci   va += dstOffset + dst_buffer->offset;
372bf215546Sopenharmony_ci
373bf215546Sopenharmony_ci   assert(!(dataSize & 3));
374bf215546Sopenharmony_ci   assert(!(va & 3));
375bf215546Sopenharmony_ci
376bf215546Sopenharmony_ci   if (!dataSize)
377bf215546Sopenharmony_ci      return;
378bf215546Sopenharmony_ci
379bf215546Sopenharmony_ci   if (dataSize < RADV_BUFFER_UPDATE_THRESHOLD) {
380bf215546Sopenharmony_ci      radv_cs_add_buffer(cmd_buffer->device->ws, cmd_buffer->cs, dst_buffer->bo);
381bf215546Sopenharmony_ci      radv_update_buffer_cp(cmd_buffer, va, pData, dataSize);
382bf215546Sopenharmony_ci   } else {
383bf215546Sopenharmony_ci      uint32_t buf_offset;
384bf215546Sopenharmony_ci      radv_cmd_buffer_upload_data(cmd_buffer, dataSize, pData, &buf_offset);
385bf215546Sopenharmony_ci      radv_copy_buffer(cmd_buffer, cmd_buffer->upload.upload_bo, dst_buffer->bo, buf_offset,
386bf215546Sopenharmony_ci                       dstOffset + dst_buffer->offset, dataSize);
387bf215546Sopenharmony_ci   }
388bf215546Sopenharmony_ci}
389