1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2021 Collabora Ltd.
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "gen_macros.h"
25bf215546Sopenharmony_ci
26bf215546Sopenharmony_ci#include "nir/nir_builder.h"
27bf215546Sopenharmony_ci#include "pan_encoder.h"
28bf215546Sopenharmony_ci#include "pan_shader.h"
29bf215546Sopenharmony_ci
30bf215546Sopenharmony_ci#include "panvk_private.h"
31bf215546Sopenharmony_ci
32bf215546Sopenharmony_cistatic mali_ptr
33bf215546Sopenharmony_cipanvk_meta_copy_img_emit_texture(struct panfrost_device *pdev,
34bf215546Sopenharmony_ci                                 struct pan_pool *desc_pool,
35bf215546Sopenharmony_ci                                 const struct pan_image_view *view)
36bf215546Sopenharmony_ci{
37bf215546Sopenharmony_ci   struct panfrost_ptr texture =
38bf215546Sopenharmony_ci      pan_pool_alloc_desc(desc_pool, TEXTURE);
39bf215546Sopenharmony_ci   size_t payload_size =
40bf215546Sopenharmony_ci      GENX(panfrost_estimate_texture_payload_size)(view);
41bf215546Sopenharmony_ci   struct panfrost_ptr surfaces =
42bf215546Sopenharmony_ci      pan_pool_alloc_aligned(desc_pool, payload_size,
43bf215546Sopenharmony_ci                             pan_alignment(SURFACE_WITH_STRIDE));
44bf215546Sopenharmony_ci
45bf215546Sopenharmony_ci   GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces);
46bf215546Sopenharmony_ci
47bf215546Sopenharmony_ci   return texture.gpu;
48bf215546Sopenharmony_ci}
49bf215546Sopenharmony_ci
50bf215546Sopenharmony_cistatic mali_ptr
51bf215546Sopenharmony_cipanvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev,
52bf215546Sopenharmony_ci                                 struct pan_pool *desc_pool)
53bf215546Sopenharmony_ci{
54bf215546Sopenharmony_ci   struct panfrost_ptr sampler =
55bf215546Sopenharmony_ci      pan_pool_alloc_desc(desc_pool, SAMPLER);
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci   pan_pack(sampler.cpu, SAMPLER, cfg) {
58bf215546Sopenharmony_ci      cfg.seamless_cube_map = false;
59bf215546Sopenharmony_ci      cfg.normalized_coordinates = false;
60bf215546Sopenharmony_ci      cfg.minify_nearest = true;
61bf215546Sopenharmony_ci      cfg.magnify_nearest = true;
62bf215546Sopenharmony_ci   }
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_ci   return sampler.gpu;
65bf215546Sopenharmony_ci}
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistatic void
68bf215546Sopenharmony_cipanvk_meta_copy_emit_varying(struct pan_pool *pool,
69bf215546Sopenharmony_ci                             mali_ptr coordinates,
70bf215546Sopenharmony_ci                             mali_ptr *varying_bufs,
71bf215546Sopenharmony_ci                             mali_ptr *varyings)
72bf215546Sopenharmony_ci{
73bf215546Sopenharmony_ci   struct panfrost_ptr varying =
74bf215546Sopenharmony_ci      pan_pool_alloc_desc(pool, ATTRIBUTE);
75bf215546Sopenharmony_ci   struct panfrost_ptr varying_buffer =
76bf215546Sopenharmony_ci      pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER);
77bf215546Sopenharmony_ci
78bf215546Sopenharmony_ci   pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) {
79bf215546Sopenharmony_ci      cfg.pointer = coordinates;
80bf215546Sopenharmony_ci      cfg.stride = 4 * sizeof(uint32_t);
81bf215546Sopenharmony_ci      cfg.size = cfg.stride * 4;
82bf215546Sopenharmony_ci   }
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci   /* Bifrost needs an empty desc to mark end of prefetching */
85bf215546Sopenharmony_ci   pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER),
86bf215546Sopenharmony_ci            ATTRIBUTE_BUFFER, cfg);
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_ci   pan_pack(varying.cpu, ATTRIBUTE, cfg) {
89bf215546Sopenharmony_ci      cfg.buffer_index = 0;
90bf215546Sopenharmony_ci      cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw;
91bf215546Sopenharmony_ci   }
92bf215546Sopenharmony_ci
93bf215546Sopenharmony_ci   *varyings = varying.gpu;
94bf215546Sopenharmony_ci   *varying_bufs = varying_buffer.gpu;
95bf215546Sopenharmony_ci}
96bf215546Sopenharmony_ci
97bf215546Sopenharmony_cistatic void
98bf215546Sopenharmony_cipanvk_meta_copy_emit_dcd(struct pan_pool *pool,
99bf215546Sopenharmony_ci                         mali_ptr src_coords, mali_ptr dst_coords,
100bf215546Sopenharmony_ci                         mali_ptr texture, mali_ptr sampler,
101bf215546Sopenharmony_ci                         mali_ptr vpd, mali_ptr tsd, mali_ptr rsd,
102bf215546Sopenharmony_ci                         mali_ptr push_constants, void *out)
103bf215546Sopenharmony_ci{
104bf215546Sopenharmony_ci   pan_pack(out, DRAW, cfg) {
105bf215546Sopenharmony_ci      cfg.thread_storage = tsd;
106bf215546Sopenharmony_ci      cfg.state = rsd;
107bf215546Sopenharmony_ci      cfg.push_uniforms = push_constants;
108bf215546Sopenharmony_ci      cfg.position = dst_coords;
109bf215546Sopenharmony_ci      if (src_coords) {
110bf215546Sopenharmony_ci              panvk_meta_copy_emit_varying(pool, src_coords,
111bf215546Sopenharmony_ci                                           &cfg.varying_buffers,
112bf215546Sopenharmony_ci                                           &cfg.varyings);
113bf215546Sopenharmony_ci      }
114bf215546Sopenharmony_ci      cfg.viewport = vpd;
115bf215546Sopenharmony_ci      cfg.textures = texture;
116bf215546Sopenharmony_ci      cfg.samplers = sampler;
117bf215546Sopenharmony_ci   }
118bf215546Sopenharmony_ci}
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_cistatic struct panfrost_ptr
121bf215546Sopenharmony_cipanvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool,
122bf215546Sopenharmony_ci                               struct pan_scoreboard *scoreboard,
123bf215546Sopenharmony_ci                               mali_ptr src_coords, mali_ptr dst_coords,
124bf215546Sopenharmony_ci                               mali_ptr texture, mali_ptr sampler,
125bf215546Sopenharmony_ci                               mali_ptr push_constants,
126bf215546Sopenharmony_ci                               mali_ptr vpd, mali_ptr rsd,
127bf215546Sopenharmony_ci                               mali_ptr tsd, mali_ptr tiler)
128bf215546Sopenharmony_ci{
129bf215546Sopenharmony_ci   struct panfrost_ptr job =
130bf215546Sopenharmony_ci      pan_pool_alloc_desc(desc_pool, TILER_JOB);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci   panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords,
133bf215546Sopenharmony_ci                            texture, sampler, vpd, tsd, rsd, push_constants,
134bf215546Sopenharmony_ci                            pan_section_ptr(job.cpu, TILER_JOB, DRAW));
135bf215546Sopenharmony_ci
136bf215546Sopenharmony_ci   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) {
137bf215546Sopenharmony_ci      cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP;
138bf215546Sopenharmony_ci      cfg.index_count = 4;
139bf215546Sopenharmony_ci      cfg.job_task_split = 6;
140bf215546Sopenharmony_ci   }
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci   pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) {
143bf215546Sopenharmony_ci      cfg.constant = 1.0f;
144bf215546Sopenharmony_ci   }
145bf215546Sopenharmony_ci
146bf215546Sopenharmony_ci   void *invoc = pan_section_ptr(job.cpu,
147bf215546Sopenharmony_ci                                 TILER_JOB,
148bf215546Sopenharmony_ci                                 INVOCATION);
149bf215546Sopenharmony_ci   panfrost_pack_work_groups_compute(invoc, 1, 4,
150bf215546Sopenharmony_ci                                     1, 1, 1, 1, true, false);
151bf215546Sopenharmony_ci
152bf215546Sopenharmony_ci   pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg);
153bf215546Sopenharmony_ci   pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) {
154bf215546Sopenharmony_ci      cfg.address = tiler;
155bf215546Sopenharmony_ci   }
156bf215546Sopenharmony_ci
157bf215546Sopenharmony_ci   panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER,
158bf215546Sopenharmony_ci                    false, false, 0, 0, &job, false);
159bf215546Sopenharmony_ci   return job;
160bf215546Sopenharmony_ci}
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_cistatic struct panfrost_ptr
163bf215546Sopenharmony_cipanvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool,
164bf215546Sopenharmony_ci                                 struct pan_scoreboard *scoreboard,
165bf215546Sopenharmony_ci                                 const struct pan_compute_dim *num_wg,
166bf215546Sopenharmony_ci                                 const struct pan_compute_dim *wg_sz,
167bf215546Sopenharmony_ci                                 mali_ptr texture, mali_ptr sampler,
168bf215546Sopenharmony_ci                                 mali_ptr push_constants,
169bf215546Sopenharmony_ci                                 mali_ptr rsd, mali_ptr tsd)
170bf215546Sopenharmony_ci{
171bf215546Sopenharmony_ci   struct panfrost_ptr job =
172bf215546Sopenharmony_ci      pan_pool_alloc_desc(desc_pool, COMPUTE_JOB);
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci   void *invoc = pan_section_ptr(job.cpu,
175bf215546Sopenharmony_ci                                 COMPUTE_JOB,
176bf215546Sopenharmony_ci                                 INVOCATION);
177bf215546Sopenharmony_ci   panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z,
178bf215546Sopenharmony_ci                                     wg_sz->x, wg_sz->y, wg_sz->z,
179bf215546Sopenharmony_ci                                     false, false);
180bf215546Sopenharmony_ci
181bf215546Sopenharmony_ci   pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) {
182bf215546Sopenharmony_ci      cfg.job_task_split = 8;
183bf215546Sopenharmony_ci   }
184bf215546Sopenharmony_ci
185bf215546Sopenharmony_ci   panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler,
186bf215546Sopenharmony_ci                            0, tsd, rsd, push_constants,
187bf215546Sopenharmony_ci                            pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW));
188bf215546Sopenharmony_ci
189bf215546Sopenharmony_ci   panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE,
190bf215546Sopenharmony_ci                    false, false, 0, 0, &job, false);
191bf215546Sopenharmony_ci   return job;
192bf215546Sopenharmony_ci}
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_ci
195bf215546Sopenharmony_cistatic uint32_t
196bf215546Sopenharmony_cipanvk_meta_copy_img_bifrost_raw_format(unsigned texelsize)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci   switch (texelsize) {
199bf215546Sopenharmony_ci   case 6: return MALI_RGB16UI << 12;
200bf215546Sopenharmony_ci   case 8: return MALI_RG32UI << 12;
201bf215546Sopenharmony_ci   case 12: return MALI_RGB32UI << 12;
202bf215546Sopenharmony_ci   case 16: return MALI_RGBA32UI << 12;
203bf215546Sopenharmony_ci   default: unreachable("Invalid texel size\n");
204bf215546Sopenharmony_ci   }
205bf215546Sopenharmony_ci}
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_cistatic mali_ptr
208bf215546Sopenharmony_cipanvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev,
209bf215546Sopenharmony_ci                                struct pan_pool *desc_pool,
210bf215546Sopenharmony_ci                                mali_ptr shader,
211bf215546Sopenharmony_ci                                const struct pan_shader_info *shader_info,
212bf215546Sopenharmony_ci                                enum pipe_format fmt, unsigned wrmask,
213bf215546Sopenharmony_ci                                bool from_img)
214bf215546Sopenharmony_ci{
215bf215546Sopenharmony_ci   struct panfrost_ptr rsd_ptr =
216bf215546Sopenharmony_ci      pan_pool_alloc_desc_aggregate(desc_pool,
217bf215546Sopenharmony_ci                                    PAN_DESC(RENDERER_STATE),
218bf215546Sopenharmony_ci                                    PAN_DESC_ARRAY(1, BLEND));
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci   bool raw = util_format_get_blocksize(fmt) > 4;
221bf215546Sopenharmony_ci   unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1;
222bf215546Sopenharmony_ci   bool partialwrite = fullmask != wrmask && !raw;
223bf215546Sopenharmony_ci   bool readstb = fullmask != wrmask && raw;
224bf215546Sopenharmony_ci
225bf215546Sopenharmony_ci   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
226bf215546Sopenharmony_ci      pan_shader_prepare_rsd(shader_info, shader, &cfg);
227bf215546Sopenharmony_ci      if (from_img) {
228bf215546Sopenharmony_ci         cfg.shader.varying_count = 1;
229bf215546Sopenharmony_ci         cfg.shader.texture_count = 1;
230bf215546Sopenharmony_ci         cfg.shader.sampler_count = 1;
231bf215546Sopenharmony_ci      }
232bf215546Sopenharmony_ci      cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION;
233bf215546Sopenharmony_ci      cfg.multisample_misc.sample_mask = UINT16_MAX;
234bf215546Sopenharmony_ci      cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS;
235bf215546Sopenharmony_ci      cfg.stencil_mask_misc.stencil_mask_front = 0xFF;
236bf215546Sopenharmony_ci      cfg.stencil_mask_misc.stencil_mask_back = 0xFF;
237bf215546Sopenharmony_ci      cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS;
238bf215546Sopenharmony_ci      cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE;
239bf215546Sopenharmony_ci      cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE;
240bf215546Sopenharmony_ci      cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE;
241bf215546Sopenharmony_ci      cfg.stencil_front.mask = 0xFF;
242bf215546Sopenharmony_ci      cfg.stencil_back = cfg.stencil_front;
243bf215546Sopenharmony_ci
244bf215546Sopenharmony_ci      cfg.properties.allow_forward_pixel_to_be_killed = true;
245bf215546Sopenharmony_ci      cfg.properties.allow_forward_pixel_to_kill =
246bf215546Sopenharmony_ci         !partialwrite && !readstb;
247bf215546Sopenharmony_ci      cfg.properties.zs_update_operation =
248bf215546Sopenharmony_ci         MALI_PIXEL_KILL_STRONG_EARLY;
249bf215546Sopenharmony_ci      cfg.properties.pixel_kill_operation =
250bf215546Sopenharmony_ci         MALI_PIXEL_KILL_FORCE_EARLY;
251bf215546Sopenharmony_ci   }
252bf215546Sopenharmony_ci
253bf215546Sopenharmony_ci   pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) {
254bf215546Sopenharmony_ci      cfg.round_to_fb_precision = true;
255bf215546Sopenharmony_ci      cfg.load_destination = partialwrite;
256bf215546Sopenharmony_ci      cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC;
257bf215546Sopenharmony_ci      cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC;
258bf215546Sopenharmony_ci      cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO;
259bf215546Sopenharmony_ci      cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC;
260bf215546Sopenharmony_ci      cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC;
261bf215546Sopenharmony_ci      cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO;
262bf215546Sopenharmony_ci      cfg.internal.mode =
263bf215546Sopenharmony_ci         partialwrite ?
264bf215546Sopenharmony_ci         MALI_BLEND_MODE_FIXED_FUNCTION :
265bf215546Sopenharmony_ci         MALI_BLEND_MODE_OPAQUE;
266bf215546Sopenharmony_ci      cfg.equation.color_mask = partialwrite ? wrmask : 0xf;
267bf215546Sopenharmony_ci      cfg.internal.fixed_function.num_comps = 4;
268bf215546Sopenharmony_ci      if (!raw) {
269bf215546Sopenharmony_ci         cfg.internal.fixed_function.conversion.memory_format =
270bf215546Sopenharmony_ci            panfrost_format_to_bifrost_blend(pdev, fmt, false);
271bf215546Sopenharmony_ci         cfg.internal.fixed_function.conversion.register_format =
272bf215546Sopenharmony_ci            MALI_REGISTER_FILE_FORMAT_F32;
273bf215546Sopenharmony_ci      } else {
274bf215546Sopenharmony_ci         unsigned imgtexelsz = util_format_get_blocksize(fmt);
275bf215546Sopenharmony_ci
276bf215546Sopenharmony_ci         cfg.internal.fixed_function.conversion.memory_format =
277bf215546Sopenharmony_ci            panvk_meta_copy_img_bifrost_raw_format(imgtexelsz);
278bf215546Sopenharmony_ci         cfg.internal.fixed_function.conversion.register_format =
279bf215546Sopenharmony_ci            (imgtexelsz & 2) ?
280bf215546Sopenharmony_ci            MALI_REGISTER_FILE_FORMAT_U16 :
281bf215546Sopenharmony_ci            MALI_REGISTER_FILE_FORMAT_U32;
282bf215546Sopenharmony_ci      }
283bf215546Sopenharmony_ci   }
284bf215546Sopenharmony_ci
285bf215546Sopenharmony_ci   return rsd_ptr.gpu;
286bf215546Sopenharmony_ci}
287bf215546Sopenharmony_ci
288bf215546Sopenharmony_cistatic mali_ptr
289bf215546Sopenharmony_cipanvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev,
290bf215546Sopenharmony_ci                                struct pan_pool *desc_pool,
291bf215546Sopenharmony_ci                                mali_ptr shader,
292bf215546Sopenharmony_ci                                const struct pan_shader_info *shader_info,
293bf215546Sopenharmony_ci                                bool from_img)
294bf215546Sopenharmony_ci{
295bf215546Sopenharmony_ci   struct panfrost_ptr rsd_ptr =
296bf215546Sopenharmony_ci      pan_pool_alloc_desc_aggregate(desc_pool,
297bf215546Sopenharmony_ci                                    PAN_DESC(RENDERER_STATE));
298bf215546Sopenharmony_ci
299bf215546Sopenharmony_ci   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
300bf215546Sopenharmony_ci      pan_shader_prepare_rsd(shader_info, shader, &cfg);
301bf215546Sopenharmony_ci      if (from_img) {
302bf215546Sopenharmony_ci         cfg.shader.texture_count = 1;
303bf215546Sopenharmony_ci         cfg.shader.sampler_count = 1;
304bf215546Sopenharmony_ci      }
305bf215546Sopenharmony_ci   }
306bf215546Sopenharmony_ci
307bf215546Sopenharmony_ci   return rsd_ptr.gpu;
308bf215546Sopenharmony_ci}
309bf215546Sopenharmony_ci
310bf215546Sopenharmony_cistatic mali_ptr
311bf215546Sopenharmony_cipanvk_meta_copy_img2img_shader(struct panfrost_device *pdev,
312bf215546Sopenharmony_ci                               struct pan_pool *bin_pool,
313bf215546Sopenharmony_ci                               enum pipe_format srcfmt,
314bf215546Sopenharmony_ci                               enum pipe_format dstfmt, unsigned dstmask,
315bf215546Sopenharmony_ci                               unsigned texdim, bool texisarray, bool is_ms,
316bf215546Sopenharmony_ci                               struct pan_shader_info *shader_info)
317bf215546Sopenharmony_ci{
318bf215546Sopenharmony_ci   nir_builder b =
319bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
320bf215546Sopenharmony_ci                                     GENX(pan_shader_get_compiler_options)(),
321bf215546Sopenharmony_ci                                     "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)",
322bf215546Sopenharmony_ci                                     util_format_name(srcfmt), util_format_name(dstfmt),
323bf215546Sopenharmony_ci                                     texdim, texisarray ? "[]" : "", is_ms ? ",ms" : "");
324bf215546Sopenharmony_ci
325bf215546Sopenharmony_ci   nir_variable *coord_var =
326bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_in,
327bf215546Sopenharmony_ci                          glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray),
328bf215546Sopenharmony_ci                          "coord");
329bf215546Sopenharmony_ci   coord_var->data.location = VARYING_SLOT_VAR0;
330bf215546Sopenharmony_ci   nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var));
331bf215546Sopenharmony_ci
332bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1);
333bf215546Sopenharmony_ci   tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf;
334bf215546Sopenharmony_ci   tex->texture_index = 0;
335bf215546Sopenharmony_ci   tex->is_array = texisarray;
336bf215546Sopenharmony_ci   tex->dest_type = util_format_is_unorm(srcfmt) ?
337bf215546Sopenharmony_ci                    nir_type_float32 : nir_type_uint32;
338bf215546Sopenharmony_ci
339bf215546Sopenharmony_ci   switch (texdim) {
340bf215546Sopenharmony_ci   case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
341bf215546Sopenharmony_ci   case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
342bf215546Sopenharmony_ci   case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
343bf215546Sopenharmony_ci   default: unreachable("Invalid texture dimension");
344bf215546Sopenharmony_ci   }
345bf215546Sopenharmony_ci
346bf215546Sopenharmony_ci   tex->src[0].src_type = nir_tex_src_coord;
347bf215546Sopenharmony_ci   tex->src[0].src = nir_src_for_ssa(coord);
348bf215546Sopenharmony_ci   tex->coord_components = texdim + texisarray;
349bf215546Sopenharmony_ci
350bf215546Sopenharmony_ci   if (is_ms) {
351bf215546Sopenharmony_ci      tex->src[1].src_type = nir_tex_src_ms_index;
352bf215546Sopenharmony_ci      tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b));
353bf215546Sopenharmony_ci   }
354bf215546Sopenharmony_ci
355bf215546Sopenharmony_ci   nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
356bf215546Sopenharmony_ci                     nir_alu_type_get_type_size(tex->dest_type), NULL);
357bf215546Sopenharmony_ci   nir_builder_instr_insert(&b, &tex->instr);
358bf215546Sopenharmony_ci
359bf215546Sopenharmony_ci   nir_ssa_def *texel = &tex->dest.ssa;
360bf215546Sopenharmony_ci
361bf215546Sopenharmony_ci   unsigned dstcompsz =
362bf215546Sopenharmony_ci      util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
363bf215546Sopenharmony_ci   unsigned ndstcomps = util_format_get_nr_components(dstfmt);
364bf215546Sopenharmony_ci   const struct glsl_type *outtype = NULL;
365bf215546Sopenharmony_ci
366bf215546Sopenharmony_ci   if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) {
367bf215546Sopenharmony_ci      nir_ssa_def *rgb =
368bf215546Sopenharmony_ci         nir_f2u32(&b, nir_fmul(&b, texel,
369bf215546Sopenharmony_ci                                nir_vec3(&b,
370bf215546Sopenharmony_ci                                         nir_imm_float(&b, 31),
371bf215546Sopenharmony_ci                                         nir_imm_float(&b, 63),
372bf215546Sopenharmony_ci                                         nir_imm_float(&b, 31))));
373bf215546Sopenharmony_ci      nir_ssa_def *rg =
374bf215546Sopenharmony_ci         nir_vec2(&b,
375bf215546Sopenharmony_ci                  nir_ior(&b, nir_channel(&b, rgb, 0),
376bf215546Sopenharmony_ci                          nir_ishl(&b, nir_channel(&b, rgb, 1),
377bf215546Sopenharmony_ci                                   nir_imm_int(&b, 5))),
378bf215546Sopenharmony_ci                  nir_ior(&b,
379bf215546Sopenharmony_ci                          nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3),
380bf215546Sopenharmony_ci                          nir_ishl(&b, nir_channel(&b, rgb, 2),
381bf215546Sopenharmony_ci                                   nir_imm_int(&b, 3))));
382bf215546Sopenharmony_ci      rg = nir_iand_imm(&b, rg, 255);
383bf215546Sopenharmony_ci      texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255);
384bf215546Sopenharmony_ci      outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2);
385bf215546Sopenharmony_ci   } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) {
386bf215546Sopenharmony_ci      nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255));
387bf215546Sopenharmony_ci      nir_ssa_def *rgb =
388bf215546Sopenharmony_ci         nir_vec3(&b,
389bf215546Sopenharmony_ci                  nir_channel(&b, rg, 0),
390bf215546Sopenharmony_ci                  nir_ior(&b,
391bf215546Sopenharmony_ci                          nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5),
392bf215546Sopenharmony_ci                          nir_ishl(&b, nir_channel(&b, rg, 1),
393bf215546Sopenharmony_ci                                   nir_imm_int(&b, 3))),
394bf215546Sopenharmony_ci                  nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3));
395bf215546Sopenharmony_ci      rgb = nir_iand(&b, rgb,
396bf215546Sopenharmony_ci                     nir_vec3(&b,
397bf215546Sopenharmony_ci                              nir_imm_int(&b, 31),
398bf215546Sopenharmony_ci                              nir_imm_int(&b, 63),
399bf215546Sopenharmony_ci                              nir_imm_int(&b, 31)));
400bf215546Sopenharmony_ci      texel = nir_fmul(&b, nir_u2f32(&b, rgb),
401bf215546Sopenharmony_ci                       nir_vec3(&b,
402bf215546Sopenharmony_ci                                nir_imm_float(&b, 1.0 / 31),
403bf215546Sopenharmony_ci                                nir_imm_float(&b, 1.0 / 63),
404bf215546Sopenharmony_ci                                nir_imm_float(&b, 1.0 / 31)));
405bf215546Sopenharmony_ci      outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3);
406bf215546Sopenharmony_ci   } else {
407bf215546Sopenharmony_ci      assert(srcfmt == dstfmt);
408bf215546Sopenharmony_ci      enum glsl_base_type basetype;
409bf215546Sopenharmony_ci      if (util_format_is_unorm(dstfmt)) {
410bf215546Sopenharmony_ci         basetype = GLSL_TYPE_FLOAT;
411bf215546Sopenharmony_ci      } else if (dstcompsz == 16) {
412bf215546Sopenharmony_ci         basetype = GLSL_TYPE_UINT16;
413bf215546Sopenharmony_ci      } else {
414bf215546Sopenharmony_ci         assert(dstcompsz == 32);
415bf215546Sopenharmony_ci         basetype = GLSL_TYPE_UINT;
416bf215546Sopenharmony_ci      }
417bf215546Sopenharmony_ci
418bf215546Sopenharmony_ci      if (dstcompsz == 16)
419bf215546Sopenharmony_ci         texel = nir_u2u16(&b, texel);
420bf215546Sopenharmony_ci
421bf215546Sopenharmony_ci      texel = nir_channels(&b, texel, (1 << ndstcomps) - 1);
422bf215546Sopenharmony_ci      outtype = glsl_vector_type(basetype, ndstcomps);
423bf215546Sopenharmony_ci   }
424bf215546Sopenharmony_ci
425bf215546Sopenharmony_ci   nir_variable *out =
426bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_out, outtype, "out");
427bf215546Sopenharmony_ci   out->data.location = FRAG_RESULT_DATA0;
428bf215546Sopenharmony_ci
429bf215546Sopenharmony_ci   unsigned fullmask = (1 << ndstcomps) - 1;
430bf215546Sopenharmony_ci   if (dstcompsz > 8 && dstmask != fullmask) {
431bf215546Sopenharmony_ci      nir_ssa_def *oldtexel = nir_load_var(&b, out);
432bf215546Sopenharmony_ci      nir_ssa_def *dstcomps[4];
433bf215546Sopenharmony_ci
434bf215546Sopenharmony_ci      for (unsigned i = 0; i < ndstcomps; i++) {
435bf215546Sopenharmony_ci         if (dstmask & BITFIELD_BIT(i))
436bf215546Sopenharmony_ci            dstcomps[i] = nir_channel(&b, texel, i);
437bf215546Sopenharmony_ci         else
438bf215546Sopenharmony_ci            dstcomps[i] = nir_channel(&b, oldtexel, i);
439bf215546Sopenharmony_ci      }
440bf215546Sopenharmony_ci
441bf215546Sopenharmony_ci      texel = nir_vec(&b, dstcomps, ndstcomps);
442bf215546Sopenharmony_ci   }
443bf215546Sopenharmony_ci
444bf215546Sopenharmony_ci   nir_store_var(&b, out, texel, 0xff);
445bf215546Sopenharmony_ci
446bf215546Sopenharmony_ci   struct panfrost_compile_inputs inputs = {
447bf215546Sopenharmony_ci      .gpu_id = pdev->gpu_id,
448bf215546Sopenharmony_ci      .is_blit = true,
449bf215546Sopenharmony_ci      .no_ubo_to_push = true,
450bf215546Sopenharmony_ci   };
451bf215546Sopenharmony_ci
452bf215546Sopenharmony_ci   pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
453bf215546Sopenharmony_ci      cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
454bf215546Sopenharmony_ci      cfg.register_format = dstcompsz == 2 ?
455bf215546Sopenharmony_ci                            MALI_REGISTER_FILE_FORMAT_U16 :
456bf215546Sopenharmony_ci                            MALI_REGISTER_FILE_FORMAT_U32;
457bf215546Sopenharmony_ci   }
458bf215546Sopenharmony_ci   inputs.bifrost.static_rt_conv = true;
459bf215546Sopenharmony_ci
460bf215546Sopenharmony_ci   struct util_dynarray binary;
461bf215546Sopenharmony_ci
462bf215546Sopenharmony_ci   util_dynarray_init(&binary, NULL);
463bf215546Sopenharmony_ci   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
464bf215546Sopenharmony_ci
465bf215546Sopenharmony_ci   shader_info->fs.sample_shading = is_ms;
466bf215546Sopenharmony_ci
467bf215546Sopenharmony_ci   mali_ptr shader =
468bf215546Sopenharmony_ci      pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
469bf215546Sopenharmony_ci
470bf215546Sopenharmony_ci   util_dynarray_fini(&binary);
471bf215546Sopenharmony_ci   ralloc_free(b.shader);
472bf215546Sopenharmony_ci
473bf215546Sopenharmony_ci   return shader;
474bf215546Sopenharmony_ci}
475bf215546Sopenharmony_ci
476bf215546Sopenharmony_cistatic enum pipe_format
477bf215546Sopenharmony_cipanvk_meta_copy_img_format(enum pipe_format fmt)
478bf215546Sopenharmony_ci{
479bf215546Sopenharmony_ci   /* We can't use a non-compressed format when handling a tiled/AFBC
480bf215546Sopenharmony_ci    * compressed format because the tile size differ (4x4 blocks for
481bf215546Sopenharmony_ci    * compressed formats and 16x16 texels for non-compressed ones).
482bf215546Sopenharmony_ci    */
483bf215546Sopenharmony_ci   assert(!util_format_is_compressed(fmt));
484bf215546Sopenharmony_ci
485bf215546Sopenharmony_ci   /* Pick blendable formats when we can, otherwise pick the UINT variant
486bf215546Sopenharmony_ci    * matching the texel size.
487bf215546Sopenharmony_ci    */
488bf215546Sopenharmony_ci   switch (util_format_get_blocksize(fmt)) {
489bf215546Sopenharmony_ci   case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
490bf215546Sopenharmony_ci   case 12: return PIPE_FORMAT_R32G32B32_UINT;
491bf215546Sopenharmony_ci   case 8: return PIPE_FORMAT_R32G32_UINT;
492bf215546Sopenharmony_ci   case 6: return PIPE_FORMAT_R16G16B16_UINT;
493bf215546Sopenharmony_ci   case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
494bf215546Sopenharmony_ci   case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM ||
495bf215546Sopenharmony_ci                   fmt == PIPE_FORMAT_B5G6R5_UNORM) ?
496bf215546Sopenharmony_ci                  PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
497bf215546Sopenharmony_ci   case 1: return PIPE_FORMAT_R8_UNORM;
498bf215546Sopenharmony_ci   default: unreachable("Unsupported format\n");
499bf215546Sopenharmony_ci   }
500bf215546Sopenharmony_ci}
501bf215546Sopenharmony_ci
502bf215546Sopenharmony_cistruct panvk_meta_copy_img2img_format_info {
503bf215546Sopenharmony_ci   enum pipe_format srcfmt;
504bf215546Sopenharmony_ci   enum pipe_format dstfmt;
505bf215546Sopenharmony_ci   unsigned dstmask;
506bf215546Sopenharmony_ci} PACKED;
507bf215546Sopenharmony_ci
508bf215546Sopenharmony_cistatic const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = {
509bf215546Sopenharmony_ci   { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1},
510bf215546Sopenharmony_ci   { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
511bf215546Sopenharmony_ci   { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
512bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7},
513bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3},
514bf215546Sopenharmony_ci   /* Z24S8(depth) */
515bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
516bf215546Sopenharmony_ci   /* Z24S8(stencil) */
517bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
518bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
519bf215546Sopenharmony_ci   { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 },
520bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 },
521bf215546Sopenharmony_ci   /* Z32S8X24(depth) */
522bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 },
523bf215546Sopenharmony_ci   /* Z32S8X24(stencil) */
524bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 },
525bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 },
526bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
527bf215546Sopenharmony_ci};
528bf215546Sopenharmony_ci
529bf215546Sopenharmony_cistatic unsigned
530bf215546Sopenharmony_cipanvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key)
531bf215546Sopenharmony_ci{
532bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
533bf215546Sopenharmony_ci
534bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
535bf215546Sopenharmony_ci      if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key)))
536bf215546Sopenharmony_ci         return i;
537bf215546Sopenharmony_ci   }
538bf215546Sopenharmony_ci
539bf215546Sopenharmony_ci   unreachable("Invalid image format\n");
540bf215546Sopenharmony_ci}
541bf215546Sopenharmony_ci
542bf215546Sopenharmony_cistatic unsigned
543bf215546Sopenharmony_cipanvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask)
544bf215546Sopenharmony_ci{
545bf215546Sopenharmony_ci   if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT &&
546bf215546Sopenharmony_ci       aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) {
547bf215546Sopenharmony_ci      enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt);
548bf215546Sopenharmony_ci
549bf215546Sopenharmony_ci      return (1 << util_format_get_nr_components(outfmt)) - 1;
550bf215546Sopenharmony_ci   }
551bf215546Sopenharmony_ci
552bf215546Sopenharmony_ci   switch (imgfmt) {
553bf215546Sopenharmony_ci   case PIPE_FORMAT_S8_UINT:
554bf215546Sopenharmony_ci      return 1;
555bf215546Sopenharmony_ci   case PIPE_FORMAT_Z16_UNORM:
556bf215546Sopenharmony_ci      return 3;
557bf215546Sopenharmony_ci   case PIPE_FORMAT_Z16_UNORM_S8_UINT:
558bf215546Sopenharmony_ci      return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8;
559bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24_UNORM_S8_UINT:
560bf215546Sopenharmony_ci      return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8;
561bf215546Sopenharmony_ci   case PIPE_FORMAT_Z24X8_UNORM:
562bf215546Sopenharmony_ci      assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT);
563bf215546Sopenharmony_ci      return 7;
564bf215546Sopenharmony_ci   case PIPE_FORMAT_Z32_FLOAT:
565bf215546Sopenharmony_ci      return 0xf;
566bf215546Sopenharmony_ci   case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
567bf215546Sopenharmony_ci      return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2;
568bf215546Sopenharmony_ci   default:
569bf215546Sopenharmony_ci      unreachable("Invalid depth format\n");
570bf215546Sopenharmony_ci   }
571bf215546Sopenharmony_ci}
572bf215546Sopenharmony_ci
573bf215546Sopenharmony_cistatic void
574bf215546Sopenharmony_cipanvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf,
575bf215546Sopenharmony_ci                        const struct panvk_image *src,
576bf215546Sopenharmony_ci                        const struct panvk_image *dst,
577bf215546Sopenharmony_ci                        const VkImageCopy2 *region)
578bf215546Sopenharmony_ci{
579bf215546Sopenharmony_ci   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
580bf215546Sopenharmony_ci   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
581bf215546Sopenharmony_ci   struct panvk_meta_copy_img2img_format_info key = {
582bf215546Sopenharmony_ci      .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format),
583bf215546Sopenharmony_ci      .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format),
584bf215546Sopenharmony_ci      .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format,
585bf215546Sopenharmony_ci                                          region->dstSubresource.aspectMask),
586bf215546Sopenharmony_ci   };
587bf215546Sopenharmony_ci
588bf215546Sopenharmony_ci   assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples);
589bf215546Sopenharmony_ci
590bf215546Sopenharmony_ci   unsigned texdimidx =
591bf215546Sopenharmony_ci      panvk_meta_copy_tex_type(src->pimage.layout.dim,
592bf215546Sopenharmony_ci                               src->pimage.layout.array_size > 1);
593bf215546Sopenharmony_ci   unsigned fmtidx =
594bf215546Sopenharmony_ci      panvk_meta_copy_img2img_format_idx(key);
595bf215546Sopenharmony_ci   unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0;
596bf215546Sopenharmony_ci
597bf215546Sopenharmony_ci   mali_ptr rsd =
598bf215546Sopenharmony_ci      cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd;
599bf215546Sopenharmony_ci
600bf215546Sopenharmony_ci   struct pan_image_view srcview = {
601bf215546Sopenharmony_ci      .format = key.srcfmt,
602bf215546Sopenharmony_ci      .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
603bf215546Sopenharmony_ci             MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim,
604bf215546Sopenharmony_ci      .image = &src->pimage,
605bf215546Sopenharmony_ci      .nr_samples = src->pimage.layout.nr_samples,
606bf215546Sopenharmony_ci      .first_level = region->srcSubresource.mipLevel,
607bf215546Sopenharmony_ci      .last_level = region->srcSubresource.mipLevel,
608bf215546Sopenharmony_ci      .first_layer = region->srcSubresource.baseArrayLayer,
609bf215546Sopenharmony_ci      .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1,
610bf215546Sopenharmony_ci      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
611bf215546Sopenharmony_ci   };
612bf215546Sopenharmony_ci
613bf215546Sopenharmony_ci   struct pan_image_view dstview = {
614bf215546Sopenharmony_ci      .format = key.dstfmt,
615bf215546Sopenharmony_ci      .dim = MALI_TEXTURE_DIMENSION_2D,
616bf215546Sopenharmony_ci      .image = &dst->pimage,
617bf215546Sopenharmony_ci      .nr_samples = dst->pimage.layout.nr_samples,
618bf215546Sopenharmony_ci      .first_level = region->dstSubresource.mipLevel,
619bf215546Sopenharmony_ci      .last_level = region->dstSubresource.mipLevel,
620bf215546Sopenharmony_ci      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
621bf215546Sopenharmony_ci   };
622bf215546Sopenharmony_ci
623bf215546Sopenharmony_ci   unsigned minx = MAX2(region->dstOffset.x, 0);
624bf215546Sopenharmony_ci   unsigned miny = MAX2(region->dstOffset.y, 0);
625bf215546Sopenharmony_ci   unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0);
626bf215546Sopenharmony_ci   unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0);
627bf215546Sopenharmony_ci
628bf215546Sopenharmony_ci   mali_ptr vpd =
629bf215546Sopenharmony_ci      panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
630bf215546Sopenharmony_ci                                         minx, miny, maxx, maxy);
631bf215546Sopenharmony_ci
632bf215546Sopenharmony_ci   float dst_rect[] = {
633bf215546Sopenharmony_ci      minx, miny, 0.0, 1.0,
634bf215546Sopenharmony_ci      maxx + 1, miny, 0.0, 1.0,
635bf215546Sopenharmony_ci      minx, maxy + 1, 0.0, 1.0,
636bf215546Sopenharmony_ci      maxx + 1, maxy + 1, 0.0, 1.0,
637bf215546Sopenharmony_ci   };
638bf215546Sopenharmony_ci
639bf215546Sopenharmony_ci   mali_ptr dst_coords =
640bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
641bf215546Sopenharmony_ci                              sizeof(dst_rect), 64);
642bf215546Sopenharmony_ci
643bf215546Sopenharmony_ci   /* TODO: don't force preloads of dst resources if unneeded */
644bf215546Sopenharmony_ci
645bf215546Sopenharmony_ci   unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel);
646bf215546Sopenharmony_ci   unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel);
647bf215546Sopenharmony_ci   cmdbuf->state.fb.crc_valid[0] = false;
648bf215546Sopenharmony_ci   *fbinfo = (struct pan_fb_info){
649bf215546Sopenharmony_ci      .width = width,
650bf215546Sopenharmony_ci      .height = height,
651bf215546Sopenharmony_ci      .extent.minx = minx & ~31,
652bf215546Sopenharmony_ci      .extent.miny = miny & ~31,
653bf215546Sopenharmony_ci      .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1,
654bf215546Sopenharmony_ci      .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1,
655bf215546Sopenharmony_ci      .nr_samples = dst->pimage.layout.nr_samples,
656bf215546Sopenharmony_ci      .rt_count = 1,
657bf215546Sopenharmony_ci      .rts[0].view = &dstview,
658bf215546Sopenharmony_ci      .rts[0].preload = true,
659bf215546Sopenharmony_ci      .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
660bf215546Sopenharmony_ci   };
661bf215546Sopenharmony_ci
662bf215546Sopenharmony_ci   mali_ptr texture =
663bf215546Sopenharmony_ci      panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview);
664bf215546Sopenharmony_ci   mali_ptr sampler =
665bf215546Sopenharmony_ci      panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
666bf215546Sopenharmony_ci
667bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
668bf215546Sopenharmony_ci
669bf215546Sopenharmony_ci   minx = MAX2(region->srcOffset.x, 0);
670bf215546Sopenharmony_ci   miny = MAX2(region->srcOffset.y, 0);
671bf215546Sopenharmony_ci   maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0);
672bf215546Sopenharmony_ci   maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0);
673bf215546Sopenharmony_ci   assert(region->dstOffset.z >= 0);
674bf215546Sopenharmony_ci
675bf215546Sopenharmony_ci   unsigned first_src_layer = MAX2(0, region->srcOffset.z);
676bf215546Sopenharmony_ci   unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z);
677bf215546Sopenharmony_ci   unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth);
678bf215546Sopenharmony_ci   for (unsigned l = 0; l < nlayers; l++) {
679bf215546Sopenharmony_ci      unsigned src_l = l + first_src_layer;
680bf215546Sopenharmony_ci      float src_rect[] = {
681bf215546Sopenharmony_ci         minx, miny, src_l, 1.0,
682bf215546Sopenharmony_ci         maxx + 1, miny, src_l, 1.0,
683bf215546Sopenharmony_ci         minx, maxy + 1, src_l, 1.0,
684bf215546Sopenharmony_ci         maxx + 1, maxy + 1, src_l, 1.0,
685bf215546Sopenharmony_ci      };
686bf215546Sopenharmony_ci
687bf215546Sopenharmony_ci      mali_ptr src_coords =
688bf215546Sopenharmony_ci         pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
689bf215546Sopenharmony_ci                                 sizeof(src_rect), 64);
690bf215546Sopenharmony_ci
691bf215546Sopenharmony_ci      struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
692bf215546Sopenharmony_ci
693bf215546Sopenharmony_ci      dstview.first_layer = dstview.last_layer = l + first_dst_layer;
694bf215546Sopenharmony_ci      batch->blit.src = src->pimage.data.bo;
695bf215546Sopenharmony_ci      batch->blit.dst = dst->pimage.data.bo;
696bf215546Sopenharmony_ci      panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
697bf215546Sopenharmony_ci      panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
698bf215546Sopenharmony_ci      panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
699bf215546Sopenharmony_ci
700bf215546Sopenharmony_ci      mali_ptr tsd, tiler;
701bf215546Sopenharmony_ci
702bf215546Sopenharmony_ci      tsd = batch->tls.gpu;
703bf215546Sopenharmony_ci      tiler = batch->tiler.descs.gpu;
704bf215546Sopenharmony_ci
705bf215546Sopenharmony_ci      struct panfrost_ptr job;
706bf215546Sopenharmony_ci
707bf215546Sopenharmony_ci      job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
708bf215546Sopenharmony_ci                                           &batch->scoreboard,
709bf215546Sopenharmony_ci                                           src_coords, dst_coords,
710bf215546Sopenharmony_ci                                           texture, sampler, 0,
711bf215546Sopenharmony_ci                                           vpd, rsd, tsd, tiler);
712bf215546Sopenharmony_ci
713bf215546Sopenharmony_ci      util_dynarray_append(&batch->jobs, void *, job.cpu);
714bf215546Sopenharmony_ci      panvk_per_arch(cmd_close_batch)(cmdbuf);
715bf215546Sopenharmony_ci   }
716bf215546Sopenharmony_ci}
717bf215546Sopenharmony_ci
718bf215546Sopenharmony_cistatic void
719bf215546Sopenharmony_cipanvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms)
720bf215546Sopenharmony_ci{
721bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS);
722bf215546Sopenharmony_ci
723bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) {
724bf215546Sopenharmony_ci      for (unsigned texdim = 1; texdim <= 3; texdim++) {
725bf215546Sopenharmony_ci         unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
726bf215546Sopenharmony_ci         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
727bf215546Sopenharmony_ci
728bf215546Sopenharmony_ci         /* No MSAA on 3D textures */
729bf215546Sopenharmony_ci         if (texdim == 3 && is_ms) continue;
730bf215546Sopenharmony_ci
731bf215546Sopenharmony_ci         struct pan_shader_info shader_info;
732bf215546Sopenharmony_ci         mali_ptr shader =
733bf215546Sopenharmony_ci            panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
734bf215546Sopenharmony_ci                                           panvk_meta_copy_img2img_fmts[i].srcfmt,
735bf215546Sopenharmony_ci                                           panvk_meta_copy_img2img_fmts[i].dstfmt,
736bf215546Sopenharmony_ci                                           panvk_meta_copy_img2img_fmts[i].dstmask,
737bf215546Sopenharmony_ci                                           texdim, false, is_ms, &shader_info);
738bf215546Sopenharmony_ci         dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
739bf215546Sopenharmony_ci            panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
740bf215546Sopenharmony_ci                                            shader, &shader_info,
741bf215546Sopenharmony_ci                                            panvk_meta_copy_img2img_fmts[i].dstfmt,
742bf215546Sopenharmony_ci                                            panvk_meta_copy_img2img_fmts[i].dstmask,
743bf215546Sopenharmony_ci                                            true);
744bf215546Sopenharmony_ci         if (texdim == 3)
745bf215546Sopenharmony_ci            continue;
746bf215546Sopenharmony_ci
747bf215546Sopenharmony_ci         memset(&shader_info, 0, sizeof(shader_info));
748bf215546Sopenharmony_ci         texdimidx = panvk_meta_copy_tex_type(texdim, true);
749bf215546Sopenharmony_ci         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0]));
750bf215546Sopenharmony_ci         shader =
751bf215546Sopenharmony_ci            panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
752bf215546Sopenharmony_ci                                           panvk_meta_copy_img2img_fmts[i].srcfmt,
753bf215546Sopenharmony_ci                                           panvk_meta_copy_img2img_fmts[i].dstfmt,
754bf215546Sopenharmony_ci                                           panvk_meta_copy_img2img_fmts[i].dstmask,
755bf215546Sopenharmony_ci                                           texdim, true, is_ms, &shader_info);
756bf215546Sopenharmony_ci         dev->meta.copy.img2img[is_ms][texdimidx][i].rsd =
757bf215546Sopenharmony_ci            panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
758bf215546Sopenharmony_ci                                            shader, &shader_info,
759bf215546Sopenharmony_ci                                            panvk_meta_copy_img2img_fmts[i].dstfmt,
760bf215546Sopenharmony_ci                                            panvk_meta_copy_img2img_fmts[i].dstmask,
761bf215546Sopenharmony_ci                                            true);
762bf215546Sopenharmony_ci      }
763bf215546Sopenharmony_ci   }
764bf215546Sopenharmony_ci}
765bf215546Sopenharmony_ci
766bf215546Sopenharmony_civoid
767bf215546Sopenharmony_cipanvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer,
768bf215546Sopenharmony_ci                              const VkCopyImageInfo2 *pCopyImageInfo)
769bf215546Sopenharmony_ci{
770bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
771bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage);
772bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage);
773bf215546Sopenharmony_ci
774bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) {
775bf215546Sopenharmony_ci      panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]);
776bf215546Sopenharmony_ci   }
777bf215546Sopenharmony_ci}
778bf215546Sopenharmony_ci
779bf215546Sopenharmony_cistatic unsigned
780bf215546Sopenharmony_cipanvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask)
781bf215546Sopenharmony_ci{
782bf215546Sopenharmony_ci   unsigned imgtexelsz = util_format_get_blocksize(imgfmt);
783bf215546Sopenharmony_ci   unsigned nbufcomps = util_bitcount(mask);
784bf215546Sopenharmony_ci
785bf215546Sopenharmony_ci   if (nbufcomps == util_format_get_nr_components(imgfmt))
786bf215546Sopenharmony_ci      return imgtexelsz;
787bf215546Sopenharmony_ci
788bf215546Sopenharmony_ci   /* Special case for Z24 buffers which are not tightly packed */
789bf215546Sopenharmony_ci   if (mask == 7 && imgtexelsz == 4)
790bf215546Sopenharmony_ci      return 4;
791bf215546Sopenharmony_ci
792bf215546Sopenharmony_ci   /* Special case for S8 extraction from Z32_S8X24 */
793bf215546Sopenharmony_ci   if (mask == 2 && imgtexelsz == 8)
794bf215546Sopenharmony_ci      return 1;
795bf215546Sopenharmony_ci
796bf215546Sopenharmony_ci   unsigned compsz =
797bf215546Sopenharmony_ci      util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0);
798bf215546Sopenharmony_ci
799bf215546Sopenharmony_ci   assert(!(compsz % 8));
800bf215546Sopenharmony_ci
801bf215546Sopenharmony_ci   return nbufcomps * compsz / 8;
802bf215546Sopenharmony_ci}
803bf215546Sopenharmony_ci
804bf215546Sopenharmony_cistatic enum pipe_format
805bf215546Sopenharmony_cipanvk_meta_copy_buf2img_format(enum pipe_format imgfmt)
806bf215546Sopenharmony_ci{
807bf215546Sopenharmony_ci   /* Pick blendable formats when we can, and the FLOAT variant matching the
808bf215546Sopenharmony_ci    * texelsize otherwise.
809bf215546Sopenharmony_ci    */
810bf215546Sopenharmony_ci   switch (util_format_get_blocksize(imgfmt)) {
811bf215546Sopenharmony_ci   case 1: return PIPE_FORMAT_R8_UNORM;
812bf215546Sopenharmony_ci   /* AFBC stores things differently for RGB565,
813bf215546Sopenharmony_ci    * we can't simply map to R8G8 in that case */
814bf215546Sopenharmony_ci   case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
815bf215546Sopenharmony_ci                   imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
816bf215546Sopenharmony_ci                  PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM;
817bf215546Sopenharmony_ci   case 4: return PIPE_FORMAT_R8G8B8A8_UNORM;
818bf215546Sopenharmony_ci   case 6: return PIPE_FORMAT_R16G16B16_UINT;
819bf215546Sopenharmony_ci   case 8: return PIPE_FORMAT_R32G32_UINT;
820bf215546Sopenharmony_ci   case 12: return PIPE_FORMAT_R32G32B32_UINT;
821bf215546Sopenharmony_ci   case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
822bf215546Sopenharmony_ci   default: unreachable("Invalid format\n");
823bf215546Sopenharmony_ci   }
824bf215546Sopenharmony_ci}
825bf215546Sopenharmony_ci
826bf215546Sopenharmony_cistruct panvk_meta_copy_format_info {
827bf215546Sopenharmony_ci   enum pipe_format imgfmt;
828bf215546Sopenharmony_ci   unsigned mask;
829bf215546Sopenharmony_ci} PACKED;
830bf215546Sopenharmony_ci
831bf215546Sopenharmony_cistatic const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = {
832bf215546Sopenharmony_ci   { PIPE_FORMAT_R8_UNORM, 0x1 },
833bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8_UNORM, 0x3 },
834bf215546Sopenharmony_ci   { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
835bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf },
836bf215546Sopenharmony_ci   { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
837bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, 0x3 },
838bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
839bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
840bf215546Sopenharmony_ci   /* S8 -> Z24S8 */
841bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 },
842bf215546Sopenharmony_ci   /* S8 -> Z32_S8X24 */
843bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, 0x2 },
844bf215546Sopenharmony_ci   /* Z24X8 -> Z24S8 */
845bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 },
846bf215546Sopenharmony_ci   /* Z32 -> Z32_S8X24 */
847bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, 0x1 },
848bf215546Sopenharmony_ci};
849bf215546Sopenharmony_ci
850bf215546Sopenharmony_cistruct panvk_meta_copy_buf2img_info {
851bf215546Sopenharmony_ci   struct {
852bf215546Sopenharmony_ci      mali_ptr ptr;
853bf215546Sopenharmony_ci      struct {
854bf215546Sopenharmony_ci         unsigned line;
855bf215546Sopenharmony_ci         unsigned surf;
856bf215546Sopenharmony_ci      } stride;
857bf215546Sopenharmony_ci   } buf;
858bf215546Sopenharmony_ci} PACKED;
859bf215546Sopenharmony_ci
860bf215546Sopenharmony_ci#define panvk_meta_copy_buf2img_get_info_field(b, field) \
861bf215546Sopenharmony_ci        nir_load_push_constant((b), 1, \
862bf215546Sopenharmony_ci                     sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \
863bf215546Sopenharmony_ci                     nir_imm_int(b, 0), \
864bf215546Sopenharmony_ci                     .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \
865bf215546Sopenharmony_ci                     .range = ~0)
866bf215546Sopenharmony_ci
867bf215546Sopenharmony_cistatic mali_ptr
868bf215546Sopenharmony_cipanvk_meta_copy_buf2img_shader(struct panfrost_device *pdev,
869bf215546Sopenharmony_ci                               struct pan_pool *bin_pool,
870bf215546Sopenharmony_ci                               struct panvk_meta_copy_format_info key,
871bf215546Sopenharmony_ci                               struct pan_shader_info *shader_info)
872bf215546Sopenharmony_ci{
873bf215546Sopenharmony_ci   nir_builder b =
874bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
875bf215546Sopenharmony_ci                                     GENX(pan_shader_get_compiler_options)(),
876bf215546Sopenharmony_ci                                     "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)",
877bf215546Sopenharmony_ci                                     util_format_name(key.imgfmt),
878bf215546Sopenharmony_ci                                     key.mask);
879bf215546Sopenharmony_ci
880bf215546Sopenharmony_ci   nir_variable *coord_var =
881bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_in,
882bf215546Sopenharmony_ci                          glsl_vector_type(GLSL_TYPE_FLOAT, 3),
883bf215546Sopenharmony_ci                          "coord");
884bf215546Sopenharmony_ci   coord_var->data.location = VARYING_SLOT_VAR0;
885bf215546Sopenharmony_ci   nir_ssa_def *coord = nir_load_var(&b, coord_var);
886bf215546Sopenharmony_ci
887bf215546Sopenharmony_ci   coord = nir_f2u32(&b, coord);
888bf215546Sopenharmony_ci
889bf215546Sopenharmony_ci   nir_ssa_def *bufptr =
890bf215546Sopenharmony_ci      panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr);
891bf215546Sopenharmony_ci   nir_ssa_def *buflinestride =
892bf215546Sopenharmony_ci      panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line);
893bf215546Sopenharmony_ci   nir_ssa_def *bufsurfstride =
894bf215546Sopenharmony_ci      panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf);
895bf215546Sopenharmony_ci
896bf215546Sopenharmony_ci   unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
897bf215546Sopenharmony_ci   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
898bf215546Sopenharmony_ci   unsigned writemask = key.mask;
899bf215546Sopenharmony_ci
900bf215546Sopenharmony_ci   nir_ssa_def *offset =
901bf215546Sopenharmony_ci      nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
902bf215546Sopenharmony_ci   offset = nir_iadd(&b, offset,
903bf215546Sopenharmony_ci                     nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
904bf215546Sopenharmony_ci   offset = nir_iadd(&b, offset,
905bf215546Sopenharmony_ci                     nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
906bf215546Sopenharmony_ci   bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
907bf215546Sopenharmony_ci
908bf215546Sopenharmony_ci   unsigned imgcompsz =
909bf215546Sopenharmony_ci      (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ?
910bf215546Sopenharmony_ci      1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
911bf215546Sopenharmony_ci
912bf215546Sopenharmony_ci   unsigned nimgcomps = imgtexelsz / imgcompsz;
913bf215546Sopenharmony_ci   unsigned bufcompsz = MIN2(buftexelsz, imgcompsz);
914bf215546Sopenharmony_ci   unsigned nbufcomps = buftexelsz / bufcompsz;
915bf215546Sopenharmony_ci
916bf215546Sopenharmony_ci   assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
917bf215546Sopenharmony_ci   assert(nbufcomps <= 4 && nimgcomps <= 4);
918bf215546Sopenharmony_ci
919bf215546Sopenharmony_ci   nir_ssa_def *texel =
920bf215546Sopenharmony_ci      nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8);
921bf215546Sopenharmony_ci
922bf215546Sopenharmony_ci   enum glsl_base_type basetype;
923bf215546Sopenharmony_ci   if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
924bf215546Sopenharmony_ci      texel = nir_vec3(&b,
925bf215546Sopenharmony_ci                       nir_iand_imm(&b, texel, BITFIELD_MASK(5)),
926bf215546Sopenharmony_ci                       nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)),
927bf215546Sopenharmony_ci                       nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5)));
928bf215546Sopenharmony_ci      texel = nir_fmul(&b,
929bf215546Sopenharmony_ci                       nir_u2f32(&b, texel),
930bf215546Sopenharmony_ci                       nir_vec3(&b,
931bf215546Sopenharmony_ci                                nir_imm_float(&b, 1.0f / 31),
932bf215546Sopenharmony_ci                                nir_imm_float(&b, 1.0f / 63),
933bf215546Sopenharmony_ci                                nir_imm_float(&b, 1.0f / 31)));
934bf215546Sopenharmony_ci      nimgcomps = 3;
935bf215546Sopenharmony_ci      basetype = GLSL_TYPE_FLOAT;
936bf215546Sopenharmony_ci   } else if (imgcompsz == 1) {
937bf215546Sopenharmony_ci      assert(bufcompsz == 1);
938bf215546Sopenharmony_ci      /* Blendable formats are unorm and the fixed-function blend unit
939bf215546Sopenharmony_ci       * takes float values.
940bf215546Sopenharmony_ci       */
941bf215546Sopenharmony_ci      texel = nir_fmul(&b, nir_u2f32(&b, texel),
942bf215546Sopenharmony_ci                       nir_imm_float(&b, 1.0f / 255));
943bf215546Sopenharmony_ci      basetype = GLSL_TYPE_FLOAT;
944bf215546Sopenharmony_ci   } else {
945bf215546Sopenharmony_ci      texel = nir_u2uN(&b, texel, imgcompsz * 8);
946bf215546Sopenharmony_ci      basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT;
947bf215546Sopenharmony_ci   }
948bf215546Sopenharmony_ci
949bf215546Sopenharmony_ci   /* We always pass the texel using 32-bit regs for now */
950bf215546Sopenharmony_ci   nir_variable *out =
951bf215546Sopenharmony_ci      nir_variable_create(b.shader, nir_var_shader_out,
952bf215546Sopenharmony_ci                          glsl_vector_type(basetype, nimgcomps),
953bf215546Sopenharmony_ci                          "out");
954bf215546Sopenharmony_ci   out->data.location = FRAG_RESULT_DATA0;
955bf215546Sopenharmony_ci
956bf215546Sopenharmony_ci   uint16_t fullmask = (1 << nimgcomps) - 1;
957bf215546Sopenharmony_ci
958bf215546Sopenharmony_ci   assert(fullmask >= writemask);
959bf215546Sopenharmony_ci
960bf215546Sopenharmony_ci   if (fullmask != writemask) {
961bf215546Sopenharmony_ci      unsigned first_written_comp = ffs(writemask) - 1;
962bf215546Sopenharmony_ci      nir_ssa_def *oldtexel = NULL;
963bf215546Sopenharmony_ci      if (imgcompsz > 1)
964bf215546Sopenharmony_ci         oldtexel = nir_load_var(&b, out);
965bf215546Sopenharmony_ci
966bf215546Sopenharmony_ci      nir_ssa_def *texel_comps[4];
967bf215546Sopenharmony_ci      for (unsigned i = 0; i < nimgcomps; i++) {
968bf215546Sopenharmony_ci         if (writemask & BITFIELD_BIT(i))
969bf215546Sopenharmony_ci            texel_comps[i] = nir_channel(&b, texel, i - first_written_comp);
970bf215546Sopenharmony_ci         else if (imgcompsz > 1)
971bf215546Sopenharmony_ci            texel_comps[i] = nir_channel(&b, oldtexel, i);
972bf215546Sopenharmony_ci         else
973bf215546Sopenharmony_ci            texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size);
974bf215546Sopenharmony_ci      }
975bf215546Sopenharmony_ci
976bf215546Sopenharmony_ci      texel = nir_vec(&b, texel_comps, nimgcomps);
977bf215546Sopenharmony_ci   }
978bf215546Sopenharmony_ci
979bf215546Sopenharmony_ci   nir_store_var(&b, out, texel, 0xff);
980bf215546Sopenharmony_ci
981bf215546Sopenharmony_ci   struct panfrost_compile_inputs inputs = {
982bf215546Sopenharmony_ci      .gpu_id = pdev->gpu_id,
983bf215546Sopenharmony_ci      .is_blit = true,
984bf215546Sopenharmony_ci      .no_ubo_to_push = true,
985bf215546Sopenharmony_ci   };
986bf215546Sopenharmony_ci
987bf215546Sopenharmony_ci   pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) {
988bf215546Sopenharmony_ci      cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12;
989bf215546Sopenharmony_ci      cfg.register_format = imgcompsz == 2 ?
990bf215546Sopenharmony_ci                            MALI_REGISTER_FILE_FORMAT_U16 :
991bf215546Sopenharmony_ci                            MALI_REGISTER_FILE_FORMAT_U32;
992bf215546Sopenharmony_ci   }
993bf215546Sopenharmony_ci   inputs.bifrost.static_rt_conv = true;
994bf215546Sopenharmony_ci
995bf215546Sopenharmony_ci   struct util_dynarray binary;
996bf215546Sopenharmony_ci
997bf215546Sopenharmony_ci   util_dynarray_init(&binary, NULL);
998bf215546Sopenharmony_ci   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
999bf215546Sopenharmony_ci   shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4);
1000bf215546Sopenharmony_ci
1001bf215546Sopenharmony_ci   mali_ptr shader =
1002bf215546Sopenharmony_ci      pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1003bf215546Sopenharmony_ci
1004bf215546Sopenharmony_ci   util_dynarray_fini(&binary);
1005bf215546Sopenharmony_ci   ralloc_free(b.shader);
1006bf215546Sopenharmony_ci
1007bf215546Sopenharmony_ci   return shader;
1008bf215546Sopenharmony_ci}
1009bf215546Sopenharmony_ci
1010bf215546Sopenharmony_cistatic unsigned
1011bf215546Sopenharmony_cipanvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key)
1012bf215546Sopenharmony_ci{
1013bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1014bf215546Sopenharmony_ci      if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key)))
1015bf215546Sopenharmony_ci         return i;
1016bf215546Sopenharmony_ci   }
1017bf215546Sopenharmony_ci
1018bf215546Sopenharmony_ci   unreachable("Invalid image format\n");
1019bf215546Sopenharmony_ci}
1020bf215546Sopenharmony_ci
1021bf215546Sopenharmony_cistatic void
1022bf215546Sopenharmony_cipanvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf,
1023bf215546Sopenharmony_ci                        const struct panvk_buffer *buf,
1024bf215546Sopenharmony_ci                        const struct panvk_image *img,
1025bf215546Sopenharmony_ci                        const VkBufferImageCopy2 *region)
1026bf215546Sopenharmony_ci{
1027bf215546Sopenharmony_ci   struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info;
1028bf215546Sopenharmony_ci   unsigned minx = MAX2(region->imageOffset.x, 0);
1029bf215546Sopenharmony_ci   unsigned miny = MAX2(region->imageOffset.y, 0);
1030bf215546Sopenharmony_ci   unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0);
1031bf215546Sopenharmony_ci   unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1032bf215546Sopenharmony_ci
1033bf215546Sopenharmony_ci   mali_ptr vpd =
1034bf215546Sopenharmony_ci      panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base,
1035bf215546Sopenharmony_ci                                         minx, miny, maxx, maxy);
1036bf215546Sopenharmony_ci
1037bf215546Sopenharmony_ci   float dst_rect[] = {
1038bf215546Sopenharmony_ci      minx, miny, 0.0, 1.0,
1039bf215546Sopenharmony_ci      maxx + 1, miny, 0.0, 1.0,
1040bf215546Sopenharmony_ci      minx, maxy + 1, 0.0, 1.0,
1041bf215546Sopenharmony_ci      maxx + 1, maxy + 1, 0.0, 1.0,
1042bf215546Sopenharmony_ci   };
1043bf215546Sopenharmony_ci   mali_ptr dst_coords =
1044bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect,
1045bf215546Sopenharmony_ci                              sizeof(dst_rect), 64);
1046bf215546Sopenharmony_ci
1047bf215546Sopenharmony_ci   struct panvk_meta_copy_format_info key = {
1048bf215546Sopenharmony_ci      .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format),
1049bf215546Sopenharmony_ci      .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1050bf215546Sopenharmony_ci                                       region->imageSubresource.aspectMask),
1051bf215546Sopenharmony_ci   };
1052bf215546Sopenharmony_ci
1053bf215546Sopenharmony_ci   unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key);
1054bf215546Sopenharmony_ci
1055bf215546Sopenharmony_ci   mali_ptr rsd =
1056bf215546Sopenharmony_ci      cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd;
1057bf215546Sopenharmony_ci
1058bf215546Sopenharmony_ci   const struct vk_image_buffer_layout buflayout =
1059bf215546Sopenharmony_ci      vk_image_buffer_copy_layout(&img->vk, region);
1060bf215546Sopenharmony_ci   struct panvk_meta_copy_buf2img_info info = {
1061bf215546Sopenharmony_ci      .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1062bf215546Sopenharmony_ci      .buf.stride.line = buflayout.row_stride_B,
1063bf215546Sopenharmony_ci      .buf.stride.surf = buflayout.image_stride_B,
1064bf215546Sopenharmony_ci   };
1065bf215546Sopenharmony_ci
1066bf215546Sopenharmony_ci   mali_ptr pushconsts =
1067bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1068bf215546Sopenharmony_ci
1069bf215546Sopenharmony_ci   struct pan_image_view view = {
1070bf215546Sopenharmony_ci      .format = key.imgfmt,
1071bf215546Sopenharmony_ci      .dim = MALI_TEXTURE_DIMENSION_2D,
1072bf215546Sopenharmony_ci      .image = &img->pimage,
1073bf215546Sopenharmony_ci      .nr_samples = img->pimage.layout.nr_samples,
1074bf215546Sopenharmony_ci      .first_level = region->imageSubresource.mipLevel,
1075bf215546Sopenharmony_ci      .last_level = region->imageSubresource.mipLevel,
1076bf215546Sopenharmony_ci      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1077bf215546Sopenharmony_ci   };
1078bf215546Sopenharmony_ci
1079bf215546Sopenharmony_ci   /* TODO: don't force preloads of dst resources if unneeded */
1080bf215546Sopenharmony_ci   cmdbuf->state.fb.crc_valid[0] = false;
1081bf215546Sopenharmony_ci   *fbinfo = (struct pan_fb_info){
1082bf215546Sopenharmony_ci      .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel),
1083bf215546Sopenharmony_ci      .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel),
1084bf215546Sopenharmony_ci      .extent.minx = minx,
1085bf215546Sopenharmony_ci      .extent.maxx = maxx,
1086bf215546Sopenharmony_ci      .extent.miny = miny,
1087bf215546Sopenharmony_ci      .extent.maxy = maxy,
1088bf215546Sopenharmony_ci      .nr_samples = 1,
1089bf215546Sopenharmony_ci      .rt_count = 1,
1090bf215546Sopenharmony_ci      .rts[0].view = &view,
1091bf215546Sopenharmony_ci      .rts[0].preload = true,
1092bf215546Sopenharmony_ci      .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0],
1093bf215546Sopenharmony_ci   };
1094bf215546Sopenharmony_ci
1095bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1096bf215546Sopenharmony_ci
1097bf215546Sopenharmony_ci   assert(region->imageSubresource.layerCount == 1 ||
1098bf215546Sopenharmony_ci          region->imageExtent.depth == 1);
1099bf215546Sopenharmony_ci   assert(region->imageOffset.z >= 0);
1100bf215546Sopenharmony_ci   unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z);
1101bf215546Sopenharmony_ci   unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth);
1102bf215546Sopenharmony_ci   for (unsigned l = 0; l < nlayers; l++) {
1103bf215546Sopenharmony_ci      float src_rect[] = {
1104bf215546Sopenharmony_ci         0, 0, l, 1.0,
1105bf215546Sopenharmony_ci         region->imageExtent.width, 0, l, 1.0,
1106bf215546Sopenharmony_ci         0, region->imageExtent.height, l, 1.0,
1107bf215546Sopenharmony_ci         region->imageExtent.width, region->imageExtent.height, l, 1.0,
1108bf215546Sopenharmony_ci      };
1109bf215546Sopenharmony_ci
1110bf215546Sopenharmony_ci      mali_ptr src_coords =
1111bf215546Sopenharmony_ci         pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect,
1112bf215546Sopenharmony_ci                                 sizeof(src_rect), 64);
1113bf215546Sopenharmony_ci
1114bf215546Sopenharmony_ci      struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1115bf215546Sopenharmony_ci
1116bf215546Sopenharmony_ci      view.first_layer = view.last_layer = l + first_layer;
1117bf215546Sopenharmony_ci      batch->blit.src = buf->bo;
1118bf215546Sopenharmony_ci      batch->blit.dst = img->pimage.data.bo;
1119bf215546Sopenharmony_ci      panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true);
1120bf215546Sopenharmony_ci      panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf);
1121bf215546Sopenharmony_ci      panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf);
1122bf215546Sopenharmony_ci
1123bf215546Sopenharmony_ci      mali_ptr tsd, tiler;
1124bf215546Sopenharmony_ci
1125bf215546Sopenharmony_ci      tsd = batch->tls.gpu;
1126bf215546Sopenharmony_ci      tiler = batch->tiler.descs.gpu;
1127bf215546Sopenharmony_ci
1128bf215546Sopenharmony_ci      struct panfrost_ptr job;
1129bf215546Sopenharmony_ci
1130bf215546Sopenharmony_ci      job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base,
1131bf215546Sopenharmony_ci                                           &batch->scoreboard,
1132bf215546Sopenharmony_ci                                           src_coords, dst_coords,
1133bf215546Sopenharmony_ci                                           0, 0, pushconsts,
1134bf215546Sopenharmony_ci                                           vpd, rsd, tsd, tiler);
1135bf215546Sopenharmony_ci
1136bf215546Sopenharmony_ci      util_dynarray_append(&batch->jobs, void *, job.cpu);
1137bf215546Sopenharmony_ci      panvk_per_arch(cmd_close_batch)(cmdbuf);
1138bf215546Sopenharmony_ci   }
1139bf215546Sopenharmony_ci}
1140bf215546Sopenharmony_ci
1141bf215546Sopenharmony_cistatic void
1142bf215546Sopenharmony_cipanvk_meta_copy_buf2img_init(struct panvk_physical_device *dev)
1143bf215546Sopenharmony_ci{
1144bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS);
1145bf215546Sopenharmony_ci
1146bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) {
1147bf215546Sopenharmony_ci      struct pan_shader_info shader_info;
1148bf215546Sopenharmony_ci      mali_ptr shader =
1149bf215546Sopenharmony_ci         panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base,
1150bf215546Sopenharmony_ci                                        panvk_meta_copy_buf2img_fmts[i],
1151bf215546Sopenharmony_ci                                        &shader_info);
1152bf215546Sopenharmony_ci      dev->meta.copy.buf2img[i].rsd =
1153bf215546Sopenharmony_ci         panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1154bf215546Sopenharmony_ci                                         shader, &shader_info,
1155bf215546Sopenharmony_ci                                         panvk_meta_copy_buf2img_fmts[i].imgfmt,
1156bf215546Sopenharmony_ci                                         panvk_meta_copy_buf2img_fmts[i].mask,
1157bf215546Sopenharmony_ci                                         false);
1158bf215546Sopenharmony_ci   }
1159bf215546Sopenharmony_ci}
1160bf215546Sopenharmony_ci
1161bf215546Sopenharmony_civoid
1162bf215546Sopenharmony_cipanvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer,
1163bf215546Sopenharmony_ci                                      const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo)
1164bf215546Sopenharmony_ci{
1165bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1166bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer);
1167bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage);
1168bf215546Sopenharmony_ci
1169bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) {
1170bf215546Sopenharmony_ci      panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]);
1171bf215546Sopenharmony_ci   }
1172bf215546Sopenharmony_ci}
1173bf215546Sopenharmony_ci
1174bf215546Sopenharmony_cistatic const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = {
1175bf215546Sopenharmony_ci   { PIPE_FORMAT_R8_UINT, 0x1 },
1176bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8_UINT, 0x3 },
1177bf215546Sopenharmony_ci   { PIPE_FORMAT_R5G6B5_UNORM, 0x7 },
1178bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UINT, 0xf },
1179bf215546Sopenharmony_ci   { PIPE_FORMAT_R16G16B16_UINT, 0x7 },
1180bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, 0x3 },
1181bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32B32_UINT, 0x7 },
1182bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32B32A32_UINT, 0xf },
1183bf215546Sopenharmony_ci   /* S8 -> Z24S8 */
1184bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 },
1185bf215546Sopenharmony_ci   /* S8 -> Z32_S8X24 */
1186bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, 0x2 },
1187bf215546Sopenharmony_ci   /* Z24X8 -> Z24S8 */
1188bf215546Sopenharmony_ci   { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 },
1189bf215546Sopenharmony_ci   /* Z32 -> Z32_S8X24 */
1190bf215546Sopenharmony_ci   { PIPE_FORMAT_R32G32_UINT, 0x1 },
1191bf215546Sopenharmony_ci};
1192bf215546Sopenharmony_ci
1193bf215546Sopenharmony_cistatic enum pipe_format
1194bf215546Sopenharmony_cipanvk_meta_copy_img2buf_format(enum pipe_format imgfmt)
1195bf215546Sopenharmony_ci{
1196bf215546Sopenharmony_ci   /* Pick blendable formats when we can, and the FLOAT variant matching the
1197bf215546Sopenharmony_ci    * texelsize otherwise.
1198bf215546Sopenharmony_ci    */
1199bf215546Sopenharmony_ci   switch (util_format_get_blocksize(imgfmt)) {
1200bf215546Sopenharmony_ci   case 1: return PIPE_FORMAT_R8_UINT;
1201bf215546Sopenharmony_ci   /* AFBC stores things differently for RGB565,
1202bf215546Sopenharmony_ci    * we can't simply map to R8G8 in that case */
1203bf215546Sopenharmony_ci   case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM ||
1204bf215546Sopenharmony_ci                   imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ?
1205bf215546Sopenharmony_ci                  PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT;
1206bf215546Sopenharmony_ci   case 4: return PIPE_FORMAT_R8G8B8A8_UINT;
1207bf215546Sopenharmony_ci   case 6: return PIPE_FORMAT_R16G16B16_UINT;
1208bf215546Sopenharmony_ci   case 8: return PIPE_FORMAT_R32G32_UINT;
1209bf215546Sopenharmony_ci   case 12: return PIPE_FORMAT_R32G32B32_UINT;
1210bf215546Sopenharmony_ci   case 16: return PIPE_FORMAT_R32G32B32A32_UINT;
1211bf215546Sopenharmony_ci   default: unreachable("Invalid format\n");
1212bf215546Sopenharmony_ci   }
1213bf215546Sopenharmony_ci}
1214bf215546Sopenharmony_ci
1215bf215546Sopenharmony_cistruct panvk_meta_copy_img2buf_info {
1216bf215546Sopenharmony_ci   struct {
1217bf215546Sopenharmony_ci      mali_ptr ptr;
1218bf215546Sopenharmony_ci      struct {
1219bf215546Sopenharmony_ci         unsigned line;
1220bf215546Sopenharmony_ci         unsigned surf;
1221bf215546Sopenharmony_ci      } stride;
1222bf215546Sopenharmony_ci   } buf;
1223bf215546Sopenharmony_ci   struct {
1224bf215546Sopenharmony_ci      struct {
1225bf215546Sopenharmony_ci         unsigned x, y, z;
1226bf215546Sopenharmony_ci      } offset;
1227bf215546Sopenharmony_ci      struct {
1228bf215546Sopenharmony_ci         unsigned minx, miny, maxx, maxy;
1229bf215546Sopenharmony_ci      } extent;
1230bf215546Sopenharmony_ci   } img;
1231bf215546Sopenharmony_ci} PACKED;
1232bf215546Sopenharmony_ci
1233bf215546Sopenharmony_ci#define panvk_meta_copy_img2buf_get_info_field(b, field) \
1234bf215546Sopenharmony_ci        nir_load_push_constant((b), 1, \
1235bf215546Sopenharmony_ci                     sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \
1236bf215546Sopenharmony_ci                     nir_imm_int(b, 0), \
1237bf215546Sopenharmony_ci                     .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \
1238bf215546Sopenharmony_ci                     .range = ~0)
1239bf215546Sopenharmony_ci
1240bf215546Sopenharmony_cistatic mali_ptr
1241bf215546Sopenharmony_cipanvk_meta_copy_img2buf_shader(struct panfrost_device *pdev,
1242bf215546Sopenharmony_ci                               struct pan_pool *bin_pool,
1243bf215546Sopenharmony_ci                               struct panvk_meta_copy_format_info key,
1244bf215546Sopenharmony_ci                               unsigned texdim, unsigned texisarray,
1245bf215546Sopenharmony_ci                               struct pan_shader_info *shader_info)
1246bf215546Sopenharmony_ci{
1247bf215546Sopenharmony_ci   unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt);
1248bf215546Sopenharmony_ci   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1249bf215546Sopenharmony_ci
1250bf215546Sopenharmony_ci   /* FIXME: Won't work on compute queues, but we can't do that with
1251bf215546Sopenharmony_ci    * a compute shader if the destination is an AFBC surface.
1252bf215546Sopenharmony_ci    */
1253bf215546Sopenharmony_ci   nir_builder b =
1254bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1255bf215546Sopenharmony_ci                                     GENX(pan_shader_get_compiler_options)(),
1256bf215546Sopenharmony_ci                                     "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)",
1257bf215546Sopenharmony_ci                                     texdim, texisarray ? "[]" : "",
1258bf215546Sopenharmony_ci                                     util_format_name(key.imgfmt),
1259bf215546Sopenharmony_ci                                     key.mask);
1260bf215546Sopenharmony_ci
1261bf215546Sopenharmony_ci   nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1262bf215546Sopenharmony_ci   nir_ssa_def *bufptr =
1263bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr);
1264bf215546Sopenharmony_ci   nir_ssa_def *buflinestride =
1265bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line);
1266bf215546Sopenharmony_ci   nir_ssa_def *bufsurfstride =
1267bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf);
1268bf215546Sopenharmony_ci
1269bf215546Sopenharmony_ci   nir_ssa_def *imgminx =
1270bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx);
1271bf215546Sopenharmony_ci   nir_ssa_def *imgminy =
1272bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny);
1273bf215546Sopenharmony_ci   nir_ssa_def *imgmaxx =
1274bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx);
1275bf215546Sopenharmony_ci   nir_ssa_def *imgmaxy =
1276bf215546Sopenharmony_ci      panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy);
1277bf215546Sopenharmony_ci
1278bf215546Sopenharmony_ci   nir_ssa_def *imgcoords, *inbounds;
1279bf215546Sopenharmony_ci
1280bf215546Sopenharmony_ci   switch (texdim + texisarray) {
1281bf215546Sopenharmony_ci   case 1:
1282bf215546Sopenharmony_ci      imgcoords =
1283bf215546Sopenharmony_ci         nir_iadd(&b,
1284bf215546Sopenharmony_ci                  nir_channel(&b, coord, 0),
1285bf215546Sopenharmony_ci                  panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x));
1286bf215546Sopenharmony_ci      inbounds =
1287bf215546Sopenharmony_ci         nir_iand(&b,
1288bf215546Sopenharmony_ci                  nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1289bf215546Sopenharmony_ci                  nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx));
1290bf215546Sopenharmony_ci      break;
1291bf215546Sopenharmony_ci   case 2:
1292bf215546Sopenharmony_ci      imgcoords =
1293bf215546Sopenharmony_ci         nir_vec2(&b,
1294bf215546Sopenharmony_ci                  nir_iadd(&b,
1295bf215546Sopenharmony_ci                           nir_channel(&b, coord, 0),
1296bf215546Sopenharmony_ci                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1297bf215546Sopenharmony_ci                  nir_iadd(&b,
1298bf215546Sopenharmony_ci                           nir_channel(&b, coord, 1),
1299bf215546Sopenharmony_ci                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1300bf215546Sopenharmony_ci      inbounds =
1301bf215546Sopenharmony_ci         nir_iand(&b,
1302bf215546Sopenharmony_ci                  nir_iand(&b,
1303bf215546Sopenharmony_ci                           nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1304bf215546Sopenharmony_ci                           nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1305bf215546Sopenharmony_ci                  nir_iand(&b,
1306bf215546Sopenharmony_ci                           nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1307bf215546Sopenharmony_ci                           nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1308bf215546Sopenharmony_ci      break;
1309bf215546Sopenharmony_ci   case 3:
1310bf215546Sopenharmony_ci      imgcoords =
1311bf215546Sopenharmony_ci         nir_vec3(&b,
1312bf215546Sopenharmony_ci                  nir_iadd(&b,
1313bf215546Sopenharmony_ci                           nir_channel(&b, coord, 0),
1314bf215546Sopenharmony_ci                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)),
1315bf215546Sopenharmony_ci                  nir_iadd(&b,
1316bf215546Sopenharmony_ci                           nir_channel(&b, coord, 1),
1317bf215546Sopenharmony_ci                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)),
1318bf215546Sopenharmony_ci                  nir_iadd(&b,
1319bf215546Sopenharmony_ci                           nir_channel(&b, coord, 2),
1320bf215546Sopenharmony_ci                           panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)));
1321bf215546Sopenharmony_ci      inbounds =
1322bf215546Sopenharmony_ci         nir_iand(&b,
1323bf215546Sopenharmony_ci                  nir_iand(&b,
1324bf215546Sopenharmony_ci                           nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)),
1325bf215546Sopenharmony_ci                           nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))),
1326bf215546Sopenharmony_ci                  nir_iand(&b,
1327bf215546Sopenharmony_ci                           nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx),
1328bf215546Sopenharmony_ci                           nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy)));
1329bf215546Sopenharmony_ci      break;
1330bf215546Sopenharmony_ci   default:
1331bf215546Sopenharmony_ci      unreachable("Invalid texture dimension\n");
1332bf215546Sopenharmony_ci   }
1333bf215546Sopenharmony_ci
1334bf215546Sopenharmony_ci   nir_push_if(&b, inbounds);
1335bf215546Sopenharmony_ci
1336bf215546Sopenharmony_ci   /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4
1337bf215546Sopenharmony_ci    * blocks instead of 16x16 texels in that case, and there's nothing we can
1338bf215546Sopenharmony_ci    * do to force the tile size to 4x4 in the render path.
1339bf215546Sopenharmony_ci    * This being said, compressed textures are not compatible with AFBC, so we
1340bf215546Sopenharmony_ci    * could use a compute shader arranging the blocks properly.
1341bf215546Sopenharmony_ci    */
1342bf215546Sopenharmony_ci   nir_ssa_def *offset =
1343bf215546Sopenharmony_ci      nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz));
1344bf215546Sopenharmony_ci   offset = nir_iadd(&b, offset,
1345bf215546Sopenharmony_ci                     nir_imul(&b, nir_channel(&b, coord, 1), buflinestride));
1346bf215546Sopenharmony_ci   offset = nir_iadd(&b, offset,
1347bf215546Sopenharmony_ci                     nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride));
1348bf215546Sopenharmony_ci   bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset));
1349bf215546Sopenharmony_ci
1350bf215546Sopenharmony_ci   unsigned imgcompsz = imgtexelsz <= 4 ?
1351bf215546Sopenharmony_ci                        1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4);
1352bf215546Sopenharmony_ci   unsigned nimgcomps = imgtexelsz / imgcompsz;
1353bf215546Sopenharmony_ci   assert(nimgcomps <= 4);
1354bf215546Sopenharmony_ci
1355bf215546Sopenharmony_ci   nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1);
1356bf215546Sopenharmony_ci   tex->op = nir_texop_txf;
1357bf215546Sopenharmony_ci   tex->texture_index = 0;
1358bf215546Sopenharmony_ci   tex->is_array = texisarray;
1359bf215546Sopenharmony_ci   tex->dest_type = util_format_is_unorm(key.imgfmt) ?
1360bf215546Sopenharmony_ci                    nir_type_float32 : nir_type_uint32;
1361bf215546Sopenharmony_ci
1362bf215546Sopenharmony_ci   switch (texdim) {
1363bf215546Sopenharmony_ci   case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break;
1364bf215546Sopenharmony_ci   case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break;
1365bf215546Sopenharmony_ci   case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break;
1366bf215546Sopenharmony_ci   default: unreachable("Invalid texture dimension");
1367bf215546Sopenharmony_ci   }
1368bf215546Sopenharmony_ci
1369bf215546Sopenharmony_ci   tex->src[0].src_type = nir_tex_src_coord;
1370bf215546Sopenharmony_ci   tex->src[0].src = nir_src_for_ssa(imgcoords);
1371bf215546Sopenharmony_ci   tex->coord_components = texdim + texisarray;
1372bf215546Sopenharmony_ci   nir_ssa_dest_init(&tex->instr, &tex->dest, 4,
1373bf215546Sopenharmony_ci                     nir_alu_type_get_type_size(tex->dest_type), NULL);
1374bf215546Sopenharmony_ci   nir_builder_instr_insert(&b, &tex->instr);
1375bf215546Sopenharmony_ci
1376bf215546Sopenharmony_ci   nir_ssa_def *texel = &tex->dest.ssa;
1377bf215546Sopenharmony_ci
1378bf215546Sopenharmony_ci   unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1;
1379bf215546Sopenharmony_ci   unsigned nbufcomps = util_bitcount(fullmask);
1380bf215546Sopenharmony_ci   if (key.mask != fullmask) {
1381bf215546Sopenharmony_ci      nir_ssa_def *bufcomps[4];
1382bf215546Sopenharmony_ci      nbufcomps = 0;
1383bf215546Sopenharmony_ci      for (unsigned i = 0; i < nimgcomps; i++) {
1384bf215546Sopenharmony_ci         if (key.mask & BITFIELD_BIT(i))
1385bf215546Sopenharmony_ci            bufcomps[nbufcomps++] = nir_channel(&b, texel, i);
1386bf215546Sopenharmony_ci      }
1387bf215546Sopenharmony_ci
1388bf215546Sopenharmony_ci      texel = nir_vec(&b, bufcomps, nbufcomps);
1389bf215546Sopenharmony_ci   }
1390bf215546Sopenharmony_ci
1391bf215546Sopenharmony_ci   unsigned bufcompsz = buftexelsz / nbufcomps;
1392bf215546Sopenharmony_ci
1393bf215546Sopenharmony_ci   if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) {
1394bf215546Sopenharmony_ci      texel = nir_fmul(&b, texel,
1395bf215546Sopenharmony_ci                       nir_vec3(&b,
1396bf215546Sopenharmony_ci                                nir_imm_float(&b, 31),
1397bf215546Sopenharmony_ci                                nir_imm_float(&b, 63),
1398bf215546Sopenharmony_ci                                nir_imm_float(&b, 31)));
1399bf215546Sopenharmony_ci      texel = nir_f2u16(&b, texel);
1400bf215546Sopenharmony_ci      texel = nir_ior(&b, nir_channel(&b, texel, 0),
1401bf215546Sopenharmony_ci                      nir_ior(&b,
1402bf215546Sopenharmony_ci                              nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)),
1403bf215546Sopenharmony_ci                              nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11))));
1404bf215546Sopenharmony_ci      imgcompsz = 2;
1405bf215546Sopenharmony_ci      bufcompsz = 2;
1406bf215546Sopenharmony_ci      nbufcomps = 1;
1407bf215546Sopenharmony_ci      nimgcomps = 1;
1408bf215546Sopenharmony_ci   } else if (imgcompsz == 1) {
1409bf215546Sopenharmony_ci      nir_ssa_def *packed = nir_channel(&b, texel, 0);
1410bf215546Sopenharmony_ci      for (unsigned i = 1; i < nbufcomps; i++) {
1411bf215546Sopenharmony_ci         packed = nir_ior(&b, packed,
1412bf215546Sopenharmony_ci                          nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff),
1413bf215546Sopenharmony_ci                                   nir_imm_int(&b, i * 8)));
1414bf215546Sopenharmony_ci      }
1415bf215546Sopenharmony_ci      texel = packed;
1416bf215546Sopenharmony_ci
1417bf215546Sopenharmony_ci      bufcompsz = nbufcomps == 3 ? 4 : nbufcomps;
1418bf215546Sopenharmony_ci      nbufcomps = 1;
1419bf215546Sopenharmony_ci   }
1420bf215546Sopenharmony_ci
1421bf215546Sopenharmony_ci   assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4);
1422bf215546Sopenharmony_ci   assert(nbufcomps <= 4 && nimgcomps <= 4);
1423bf215546Sopenharmony_ci   texel = nir_u2uN(&b, texel, bufcompsz * 8);
1424bf215546Sopenharmony_ci
1425bf215546Sopenharmony_ci   nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1);
1426bf215546Sopenharmony_ci   nir_pop_if(&b, NULL);
1427bf215546Sopenharmony_ci
1428bf215546Sopenharmony_ci   struct panfrost_compile_inputs inputs = {
1429bf215546Sopenharmony_ci      .gpu_id = pdev->gpu_id,
1430bf215546Sopenharmony_ci      .is_blit = true,
1431bf215546Sopenharmony_ci      .no_ubo_to_push = true,
1432bf215546Sopenharmony_ci   };
1433bf215546Sopenharmony_ci
1434bf215546Sopenharmony_ci   struct util_dynarray binary;
1435bf215546Sopenharmony_ci
1436bf215546Sopenharmony_ci   util_dynarray_init(&binary, NULL);
1437bf215546Sopenharmony_ci   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1438bf215546Sopenharmony_ci
1439bf215546Sopenharmony_ci   shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4);
1440bf215546Sopenharmony_ci
1441bf215546Sopenharmony_ci   mali_ptr shader =
1442bf215546Sopenharmony_ci      pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1443bf215546Sopenharmony_ci
1444bf215546Sopenharmony_ci   util_dynarray_fini(&binary);
1445bf215546Sopenharmony_ci   ralloc_free(b.shader);
1446bf215546Sopenharmony_ci
1447bf215546Sopenharmony_ci   return shader;
1448bf215546Sopenharmony_ci}
1449bf215546Sopenharmony_ci
1450bf215546Sopenharmony_cistatic unsigned
1451bf215546Sopenharmony_cipanvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key)
1452bf215546Sopenharmony_ci{
1453bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1454bf215546Sopenharmony_ci      if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key)))
1455bf215546Sopenharmony_ci         return i;
1456bf215546Sopenharmony_ci   }
1457bf215546Sopenharmony_ci
1458bf215546Sopenharmony_ci   unreachable("Invalid texel size\n");
1459bf215546Sopenharmony_ci}
1460bf215546Sopenharmony_ci
1461bf215546Sopenharmony_cistatic void
1462bf215546Sopenharmony_cipanvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf,
1463bf215546Sopenharmony_ci                        const struct panvk_buffer *buf,
1464bf215546Sopenharmony_ci                        const struct panvk_image *img,
1465bf215546Sopenharmony_ci                        const VkBufferImageCopy2 *region)
1466bf215546Sopenharmony_ci{
1467bf215546Sopenharmony_ci   struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev;
1468bf215546Sopenharmony_ci   struct panvk_meta_copy_format_info key = {
1469bf215546Sopenharmony_ci      .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format),
1470bf215546Sopenharmony_ci      .mask = panvk_meta_copy_img_mask(img->pimage.layout.format,
1471bf215546Sopenharmony_ci                                       region->imageSubresource.aspectMask),
1472bf215546Sopenharmony_ci   };
1473bf215546Sopenharmony_ci   unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask);
1474bf215546Sopenharmony_ci   unsigned texdimidx =
1475bf215546Sopenharmony_ci      panvk_meta_copy_tex_type(img->pimage.layout.dim,
1476bf215546Sopenharmony_ci                               img->pimage.layout.array_size > 1);
1477bf215546Sopenharmony_ci   unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key);
1478bf215546Sopenharmony_ci
1479bf215546Sopenharmony_ci   mali_ptr rsd =
1480bf215546Sopenharmony_ci      cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd;
1481bf215546Sopenharmony_ci
1482bf215546Sopenharmony_ci   struct panvk_meta_copy_img2buf_info info = {
1483bf215546Sopenharmony_ci      .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset),
1484bf215546Sopenharmony_ci      .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz,
1485bf215546Sopenharmony_ci      .img.offset.x = MAX2(region->imageOffset.x & ~15, 0),
1486bf215546Sopenharmony_ci      .img.extent.minx = MAX2(region->imageOffset.x, 0),
1487bf215546Sopenharmony_ci      .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0),
1488bf215546Sopenharmony_ci   };
1489bf215546Sopenharmony_ci
1490bf215546Sopenharmony_ci   if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) {
1491bf215546Sopenharmony_ci      info.img.extent.maxy = region->imageSubresource.layerCount - 1;
1492bf215546Sopenharmony_ci   } else {
1493bf215546Sopenharmony_ci      info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0);
1494bf215546Sopenharmony_ci      info.img.offset.z = MAX2(region->imageOffset.z, 0);
1495bf215546Sopenharmony_ci      info.img.extent.miny = MAX2(region->imageOffset.y, 0);
1496bf215546Sopenharmony_ci      info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0);
1497bf215546Sopenharmony_ci   }
1498bf215546Sopenharmony_ci
1499bf215546Sopenharmony_ci   info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) *
1500bf215546Sopenharmony_ci                          info.buf.stride.line;
1501bf215546Sopenharmony_ci
1502bf215546Sopenharmony_ci   mali_ptr pushconsts =
1503bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1504bf215546Sopenharmony_ci
1505bf215546Sopenharmony_ci   struct pan_image_view view = {
1506bf215546Sopenharmony_ci      .format = key.imgfmt,
1507bf215546Sopenharmony_ci      .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ?
1508bf215546Sopenharmony_ci             MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim,
1509bf215546Sopenharmony_ci      .image = &img->pimage,
1510bf215546Sopenharmony_ci      .nr_samples = img->pimage.layout.nr_samples,
1511bf215546Sopenharmony_ci      .first_level = region->imageSubresource.mipLevel,
1512bf215546Sopenharmony_ci      .last_level = region->imageSubresource.mipLevel,
1513bf215546Sopenharmony_ci      .first_layer = region->imageSubresource.baseArrayLayer,
1514bf215546Sopenharmony_ci      .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1,
1515bf215546Sopenharmony_ci      .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W },
1516bf215546Sopenharmony_ci   };
1517bf215546Sopenharmony_ci
1518bf215546Sopenharmony_ci   mali_ptr texture =
1519bf215546Sopenharmony_ci      panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view);
1520bf215546Sopenharmony_ci   mali_ptr sampler =
1521bf215546Sopenharmony_ci      panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base);
1522bf215546Sopenharmony_ci
1523bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1524bf215546Sopenharmony_ci
1525bf215546Sopenharmony_ci   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1526bf215546Sopenharmony_ci
1527bf215546Sopenharmony_ci   struct pan_tls_info tlsinfo = { 0 };
1528bf215546Sopenharmony_ci
1529bf215546Sopenharmony_ci   batch->blit.src = img->pimage.data.bo;
1530bf215546Sopenharmony_ci   batch->blit.dst = buf->bo;
1531bf215546Sopenharmony_ci   batch->tls =
1532bf215546Sopenharmony_ci      pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE);
1533bf215546Sopenharmony_ci   GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu);
1534bf215546Sopenharmony_ci
1535bf215546Sopenharmony_ci   mali_ptr tsd = batch->tls.gpu;
1536bf215546Sopenharmony_ci
1537bf215546Sopenharmony_ci   struct pan_compute_dim wg_sz = {
1538bf215546Sopenharmony_ci      16,
1539bf215546Sopenharmony_ci      img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16,
1540bf215546Sopenharmony_ci      1,
1541bf215546Sopenharmony_ci   };
1542bf215546Sopenharmony_ci
1543bf215546Sopenharmony_ci   struct pan_compute_dim num_wg = {
1544bf215546Sopenharmony_ci     (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16,
1545bf215546Sopenharmony_ci     img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ?
1546bf215546Sopenharmony_ci        region->imageSubresource.layerCount :
1547bf215546Sopenharmony_ci        (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16,
1548bf215546Sopenharmony_ci     img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ?
1549bf215546Sopenharmony_ci        MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1,
1550bf215546Sopenharmony_ci   };
1551bf215546Sopenharmony_ci
1552bf215546Sopenharmony_ci   struct panfrost_ptr job =
1553bf215546Sopenharmony_ci      panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1554bf215546Sopenharmony_ci                                       &batch->scoreboard, &num_wg, &wg_sz,
1555bf215546Sopenharmony_ci                                       texture, sampler,
1556bf215546Sopenharmony_ci                                       pushconsts, rsd, tsd);
1557bf215546Sopenharmony_ci
1558bf215546Sopenharmony_ci   util_dynarray_append(&batch->jobs, void *, job.cpu);
1559bf215546Sopenharmony_ci
1560bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1561bf215546Sopenharmony_ci}
1562bf215546Sopenharmony_ci
1563bf215546Sopenharmony_cistatic void
1564bf215546Sopenharmony_cipanvk_meta_copy_img2buf_init(struct panvk_physical_device *dev)
1565bf215546Sopenharmony_ci{
1566bf215546Sopenharmony_ci   STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS);
1567bf215546Sopenharmony_ci
1568bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) {
1569bf215546Sopenharmony_ci      for (unsigned texdim = 1; texdim <= 3; texdim++) {
1570bf215546Sopenharmony_ci         unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false);
1571bf215546Sopenharmony_ci         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1572bf215546Sopenharmony_ci
1573bf215546Sopenharmony_ci         struct pan_shader_info shader_info;
1574bf215546Sopenharmony_ci         mali_ptr shader =
1575bf215546Sopenharmony_ci            panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1576bf215546Sopenharmony_ci                                           panvk_meta_copy_img2buf_fmts[i],
1577bf215546Sopenharmony_ci                                           texdim, false, &shader_info);
1578bf215546Sopenharmony_ci         dev->meta.copy.img2buf[texdimidx][i].rsd =
1579bf215546Sopenharmony_ci            panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1580bf215546Sopenharmony_ci                                            &dev->meta.desc_pool.base,
1581bf215546Sopenharmony_ci                                            shader, &shader_info, true);
1582bf215546Sopenharmony_ci
1583bf215546Sopenharmony_ci         if (texdim == 3)
1584bf215546Sopenharmony_ci            continue;
1585bf215546Sopenharmony_ci
1586bf215546Sopenharmony_ci         memset(&shader_info, 0, sizeof(shader_info));
1587bf215546Sopenharmony_ci         texdimidx = panvk_meta_copy_tex_type(texdim, true);
1588bf215546Sopenharmony_ci         assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf));
1589bf215546Sopenharmony_ci         shader =
1590bf215546Sopenharmony_ci            panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1591bf215546Sopenharmony_ci                                           panvk_meta_copy_img2buf_fmts[i],
1592bf215546Sopenharmony_ci                                           texdim, true, &shader_info);
1593bf215546Sopenharmony_ci         dev->meta.copy.img2buf[texdimidx][i].rsd =
1594bf215546Sopenharmony_ci            panvk_meta_copy_to_buf_emit_rsd(&dev->pdev,
1595bf215546Sopenharmony_ci                                            &dev->meta.desc_pool.base,
1596bf215546Sopenharmony_ci                                            shader, &shader_info, true);
1597bf215546Sopenharmony_ci      }
1598bf215546Sopenharmony_ci   }
1599bf215546Sopenharmony_ci}
1600bf215546Sopenharmony_ci
1601bf215546Sopenharmony_civoid
1602bf215546Sopenharmony_cipanvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer,
1603bf215546Sopenharmony_ci                                      const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo)
1604bf215546Sopenharmony_ci{
1605bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1606bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer);
1607bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage);
1608bf215546Sopenharmony_ci
1609bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) {
1610bf215546Sopenharmony_ci      panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]);
1611bf215546Sopenharmony_ci   }
1612bf215546Sopenharmony_ci}
1613bf215546Sopenharmony_ci
1614bf215546Sopenharmony_cistruct panvk_meta_copy_buf2buf_info {
1615bf215546Sopenharmony_ci   mali_ptr src;
1616bf215546Sopenharmony_ci   mali_ptr dst;
1617bf215546Sopenharmony_ci} PACKED;
1618bf215546Sopenharmony_ci
1619bf215546Sopenharmony_ci#define panvk_meta_copy_buf2buf_get_info_field(b, field) \
1620bf215546Sopenharmony_ci        nir_load_push_constant((b), 1, \
1621bf215546Sopenharmony_ci                     sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \
1622bf215546Sopenharmony_ci                     nir_imm_int(b, 0), \
1623bf215546Sopenharmony_ci                     .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \
1624bf215546Sopenharmony_ci                     .range = ~0)
1625bf215546Sopenharmony_ci
1626bf215546Sopenharmony_cistatic mali_ptr
1627bf215546Sopenharmony_cipanvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev,
1628bf215546Sopenharmony_ci                               struct pan_pool *bin_pool,
1629bf215546Sopenharmony_ci                               unsigned blksz,
1630bf215546Sopenharmony_ci                               struct pan_shader_info *shader_info)
1631bf215546Sopenharmony_ci{
1632bf215546Sopenharmony_ci   /* FIXME: Won't work on compute queues, but we can't do that with
1633bf215546Sopenharmony_ci    * a compute shader if the destination is an AFBC surface.
1634bf215546Sopenharmony_ci    */
1635bf215546Sopenharmony_ci   nir_builder b =
1636bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1637bf215546Sopenharmony_ci                                     GENX(pan_shader_get_compiler_options)(),
1638bf215546Sopenharmony_ci                                     "panvk_meta_copy_buf2buf(blksz=%d)",
1639bf215546Sopenharmony_ci                                     blksz);
1640bf215546Sopenharmony_ci
1641bf215546Sopenharmony_ci   nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1642bf215546Sopenharmony_ci
1643bf215546Sopenharmony_ci   nir_ssa_def *offset =
1644bf215546Sopenharmony_ci      nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz)));
1645bf215546Sopenharmony_ci   nir_ssa_def *srcptr =
1646bf215546Sopenharmony_ci      nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset);
1647bf215546Sopenharmony_ci   nir_ssa_def *dstptr =
1648bf215546Sopenharmony_ci      nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset);
1649bf215546Sopenharmony_ci
1650bf215546Sopenharmony_ci   unsigned compsz = blksz < 4 ? blksz : 4;
1651bf215546Sopenharmony_ci   unsigned ncomps = blksz / compsz;
1652bf215546Sopenharmony_ci   nir_store_global(&b, dstptr, blksz,
1653bf215546Sopenharmony_ci                    nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8),
1654bf215546Sopenharmony_ci                    (1 << ncomps) - 1);
1655bf215546Sopenharmony_ci
1656bf215546Sopenharmony_ci   struct panfrost_compile_inputs inputs = {
1657bf215546Sopenharmony_ci      .gpu_id = pdev->gpu_id,
1658bf215546Sopenharmony_ci      .is_blit = true,
1659bf215546Sopenharmony_ci      .no_ubo_to_push = true,
1660bf215546Sopenharmony_ci   };
1661bf215546Sopenharmony_ci
1662bf215546Sopenharmony_ci   struct util_dynarray binary;
1663bf215546Sopenharmony_ci
1664bf215546Sopenharmony_ci   util_dynarray_init(&binary, NULL);
1665bf215546Sopenharmony_ci   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1666bf215546Sopenharmony_ci
1667bf215546Sopenharmony_ci   shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4);
1668bf215546Sopenharmony_ci
1669bf215546Sopenharmony_ci   mali_ptr shader =
1670bf215546Sopenharmony_ci      pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1671bf215546Sopenharmony_ci
1672bf215546Sopenharmony_ci   util_dynarray_fini(&binary);
1673bf215546Sopenharmony_ci   ralloc_free(b.shader);
1674bf215546Sopenharmony_ci
1675bf215546Sopenharmony_ci   return shader;
1676bf215546Sopenharmony_ci}
1677bf215546Sopenharmony_ci
1678bf215546Sopenharmony_cistatic void
1679bf215546Sopenharmony_cipanvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev)
1680bf215546Sopenharmony_ci{
1681bf215546Sopenharmony_ci   for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) {
1682bf215546Sopenharmony_ci      struct pan_shader_info shader_info;
1683bf215546Sopenharmony_ci      mali_ptr shader =
1684bf215546Sopenharmony_ci         panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base,
1685bf215546Sopenharmony_ci                                        1 << i, &shader_info);
1686bf215546Sopenharmony_ci      dev->meta.copy.buf2buf[i].rsd =
1687bf215546Sopenharmony_ci         panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base,
1688bf215546Sopenharmony_ci                                         shader, &shader_info, false);
1689bf215546Sopenharmony_ci   }
1690bf215546Sopenharmony_ci}
1691bf215546Sopenharmony_ci
1692bf215546Sopenharmony_cistatic void
1693bf215546Sopenharmony_cipanvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf,
1694bf215546Sopenharmony_ci                        const struct panvk_buffer *src,
1695bf215546Sopenharmony_ci                        const struct panvk_buffer *dst,
1696bf215546Sopenharmony_ci                        const VkBufferCopy2 *region)
1697bf215546Sopenharmony_ci{
1698bf215546Sopenharmony_ci   struct panvk_meta_copy_buf2buf_info info = {
1699bf215546Sopenharmony_ci      .src = panvk_buffer_gpu_ptr(src, region->srcOffset),
1700bf215546Sopenharmony_ci      .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset),
1701bf215546Sopenharmony_ci   };
1702bf215546Sopenharmony_ci
1703bf215546Sopenharmony_ci   unsigned alignment = ffs((info.src | info.dst | region->size) & 15);
1704bf215546Sopenharmony_ci   unsigned log2blksz = alignment ? alignment - 1 : 4;
1705bf215546Sopenharmony_ci
1706bf215546Sopenharmony_ci   assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf));
1707bf215546Sopenharmony_ci   mali_ptr rsd =
1708bf215546Sopenharmony_ci      cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1709bf215546Sopenharmony_ci
1710bf215546Sopenharmony_ci   mali_ptr pushconsts =
1711bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1712bf215546Sopenharmony_ci
1713bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1714bf215546Sopenharmony_ci
1715bf215546Sopenharmony_ci   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1716bf215546Sopenharmony_ci
1717bf215546Sopenharmony_ci   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1718bf215546Sopenharmony_ci
1719bf215546Sopenharmony_ci   mali_ptr tsd = batch->tls.gpu;
1720bf215546Sopenharmony_ci
1721bf215546Sopenharmony_ci   unsigned nblocks = region->size >> log2blksz;
1722bf215546Sopenharmony_ci   struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1723bf215546Sopenharmony_ci   struct pan_compute_dim wg_sz = { 1, 1, 1};
1724bf215546Sopenharmony_ci   struct panfrost_ptr job =
1725bf215546Sopenharmony_ci     panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1726bf215546Sopenharmony_ci                                      &batch->scoreboard,
1727bf215546Sopenharmony_ci                                      &num_wg, &wg_sz,
1728bf215546Sopenharmony_ci                                      0, 0, pushconsts, rsd, tsd);
1729bf215546Sopenharmony_ci
1730bf215546Sopenharmony_ci   util_dynarray_append(&batch->jobs, void *, job.cpu);
1731bf215546Sopenharmony_ci
1732bf215546Sopenharmony_ci   batch->blit.src = src->bo;
1733bf215546Sopenharmony_ci   batch->blit.dst = dst->bo;
1734bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1735bf215546Sopenharmony_ci}
1736bf215546Sopenharmony_ci
1737bf215546Sopenharmony_civoid
1738bf215546Sopenharmony_cipanvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer,
1739bf215546Sopenharmony_ci                               const VkCopyBufferInfo2 *pCopyBufferInfo)
1740bf215546Sopenharmony_ci{
1741bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1742bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer);
1743bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer);
1744bf215546Sopenharmony_ci
1745bf215546Sopenharmony_ci   for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) {
1746bf215546Sopenharmony_ci      panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]);
1747bf215546Sopenharmony_ci   }
1748bf215546Sopenharmony_ci}
1749bf215546Sopenharmony_ci
1750bf215546Sopenharmony_cistruct panvk_meta_fill_buf_info {
1751bf215546Sopenharmony_ci   mali_ptr start;
1752bf215546Sopenharmony_ci   uint32_t val;
1753bf215546Sopenharmony_ci} PACKED;
1754bf215546Sopenharmony_ci
1755bf215546Sopenharmony_ci#define panvk_meta_fill_buf_get_info_field(b, field) \
1756bf215546Sopenharmony_ci        nir_load_push_constant((b), 1, \
1757bf215546Sopenharmony_ci                     sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \
1758bf215546Sopenharmony_ci                     nir_imm_int(b, 0), \
1759bf215546Sopenharmony_ci                     .base = offsetof(struct panvk_meta_fill_buf_info, field), \
1760bf215546Sopenharmony_ci                     .range = ~0)
1761bf215546Sopenharmony_ci
1762bf215546Sopenharmony_cistatic mali_ptr
1763bf215546Sopenharmony_cipanvk_meta_fill_buf_shader(struct panfrost_device *pdev,
1764bf215546Sopenharmony_ci                           struct pan_pool *bin_pool,
1765bf215546Sopenharmony_ci                           struct pan_shader_info *shader_info)
1766bf215546Sopenharmony_ci{
1767bf215546Sopenharmony_ci   /* FIXME: Won't work on compute queues, but we can't do that with
1768bf215546Sopenharmony_ci    * a compute shader if the destination is an AFBC surface.
1769bf215546Sopenharmony_ci    */
1770bf215546Sopenharmony_ci   nir_builder b =
1771bf215546Sopenharmony_ci      nir_builder_init_simple_shader(MESA_SHADER_COMPUTE,
1772bf215546Sopenharmony_ci                                     GENX(pan_shader_get_compiler_options)(),
1773bf215546Sopenharmony_ci                                     "panvk_meta_fill_buf()");
1774bf215546Sopenharmony_ci
1775bf215546Sopenharmony_ci   nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32);
1776bf215546Sopenharmony_ci
1777bf215546Sopenharmony_ci   nir_ssa_def *offset =
1778bf215546Sopenharmony_ci      nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t))));
1779bf215546Sopenharmony_ci   nir_ssa_def *ptr =
1780bf215546Sopenharmony_ci      nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset);
1781bf215546Sopenharmony_ci   nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val);
1782bf215546Sopenharmony_ci
1783bf215546Sopenharmony_ci   nir_store_global(&b, ptr, sizeof(uint32_t), val, 1);
1784bf215546Sopenharmony_ci
1785bf215546Sopenharmony_ci   struct panfrost_compile_inputs inputs = {
1786bf215546Sopenharmony_ci      .gpu_id = pdev->gpu_id,
1787bf215546Sopenharmony_ci      .is_blit = true,
1788bf215546Sopenharmony_ci      .no_ubo_to_push = true,
1789bf215546Sopenharmony_ci   };
1790bf215546Sopenharmony_ci
1791bf215546Sopenharmony_ci   struct util_dynarray binary;
1792bf215546Sopenharmony_ci
1793bf215546Sopenharmony_ci   util_dynarray_init(&binary, NULL);
1794bf215546Sopenharmony_ci   GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info);
1795bf215546Sopenharmony_ci
1796bf215546Sopenharmony_ci   shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4);
1797bf215546Sopenharmony_ci
1798bf215546Sopenharmony_ci   mali_ptr shader =
1799bf215546Sopenharmony_ci      pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128);
1800bf215546Sopenharmony_ci
1801bf215546Sopenharmony_ci   util_dynarray_fini(&binary);
1802bf215546Sopenharmony_ci   ralloc_free(b.shader);
1803bf215546Sopenharmony_ci
1804bf215546Sopenharmony_ci   return shader;
1805bf215546Sopenharmony_ci}
1806bf215546Sopenharmony_ci
1807bf215546Sopenharmony_cistatic mali_ptr
1808bf215546Sopenharmony_cipanvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev,
1809bf215546Sopenharmony_ci                             struct pan_pool *bin_pool,
1810bf215546Sopenharmony_ci                             struct pan_pool *desc_pool)
1811bf215546Sopenharmony_ci{
1812bf215546Sopenharmony_ci   struct pan_shader_info shader_info;
1813bf215546Sopenharmony_ci
1814bf215546Sopenharmony_ci   mali_ptr shader =
1815bf215546Sopenharmony_ci      panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info);
1816bf215546Sopenharmony_ci
1817bf215546Sopenharmony_ci   struct panfrost_ptr rsd_ptr =
1818bf215546Sopenharmony_ci      pan_pool_alloc_desc_aggregate(desc_pool,
1819bf215546Sopenharmony_ci                                    PAN_DESC(RENDERER_STATE));
1820bf215546Sopenharmony_ci
1821bf215546Sopenharmony_ci   pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) {
1822bf215546Sopenharmony_ci      pan_shader_prepare_rsd(&shader_info, shader, &cfg);
1823bf215546Sopenharmony_ci   }
1824bf215546Sopenharmony_ci
1825bf215546Sopenharmony_ci   return rsd_ptr.gpu;
1826bf215546Sopenharmony_ci}
1827bf215546Sopenharmony_ci
1828bf215546Sopenharmony_cistatic void
1829bf215546Sopenharmony_cipanvk_meta_fill_buf_init(struct panvk_physical_device *dev)
1830bf215546Sopenharmony_ci{
1831bf215546Sopenharmony_ci   dev->meta.copy.fillbuf.rsd =
1832bf215546Sopenharmony_ci      panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base,
1833bf215546Sopenharmony_ci                                   &dev->meta.desc_pool.base);
1834bf215546Sopenharmony_ci}
1835bf215546Sopenharmony_ci
1836bf215546Sopenharmony_cistatic void
1837bf215546Sopenharmony_cipanvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf,
1838bf215546Sopenharmony_ci                    const struct panvk_buffer *dst,
1839bf215546Sopenharmony_ci                    VkDeviceSize size, VkDeviceSize offset,
1840bf215546Sopenharmony_ci                    uint32_t val)
1841bf215546Sopenharmony_ci{
1842bf215546Sopenharmony_ci   struct panvk_meta_fill_buf_info info = {
1843bf215546Sopenharmony_ci      .start = panvk_buffer_gpu_ptr(dst, offset),
1844bf215546Sopenharmony_ci      .val = val,
1845bf215546Sopenharmony_ci   };
1846bf215546Sopenharmony_ci   size = panvk_buffer_range(dst, offset, size);
1847bf215546Sopenharmony_ci
1848bf215546Sopenharmony_ci   /* From the Vulkan spec:
1849bf215546Sopenharmony_ci    *
1850bf215546Sopenharmony_ci    *    "size is the number of bytes to fill, and must be either a multiple
1851bf215546Sopenharmony_ci    *    of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of
1852bf215546Sopenharmony_ci    *    the buffer. If VK_WHOLE_SIZE is used and the remaining size of the
1853bf215546Sopenharmony_ci    *    buffer is not a multiple of 4, then the nearest smaller multiple is
1854bf215546Sopenharmony_ci    *    used."
1855bf215546Sopenharmony_ci    */
1856bf215546Sopenharmony_ci   size &= ~3ull;
1857bf215546Sopenharmony_ci
1858bf215546Sopenharmony_ci   assert(!(offset & 3) && !(size & 3));
1859bf215546Sopenharmony_ci
1860bf215546Sopenharmony_ci   unsigned nwords = size / sizeof(uint32_t);
1861bf215546Sopenharmony_ci   mali_ptr rsd =
1862bf215546Sopenharmony_ci      cmdbuf->device->physical_device->meta.copy.fillbuf.rsd;
1863bf215546Sopenharmony_ci
1864bf215546Sopenharmony_ci   mali_ptr pushconsts =
1865bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1866bf215546Sopenharmony_ci
1867bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1868bf215546Sopenharmony_ci
1869bf215546Sopenharmony_ci   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1870bf215546Sopenharmony_ci
1871bf215546Sopenharmony_ci   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1872bf215546Sopenharmony_ci
1873bf215546Sopenharmony_ci   mali_ptr tsd = batch->tls.gpu;
1874bf215546Sopenharmony_ci
1875bf215546Sopenharmony_ci   struct pan_compute_dim num_wg = { nwords, 1, 1 };
1876bf215546Sopenharmony_ci   struct pan_compute_dim wg_sz = { 1, 1, 1};
1877bf215546Sopenharmony_ci   struct panfrost_ptr job =
1878bf215546Sopenharmony_ci     panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1879bf215546Sopenharmony_ci                                      &batch->scoreboard,
1880bf215546Sopenharmony_ci                                      &num_wg, &wg_sz,
1881bf215546Sopenharmony_ci                                      0, 0, pushconsts, rsd, tsd);
1882bf215546Sopenharmony_ci
1883bf215546Sopenharmony_ci   util_dynarray_append(&batch->jobs, void *, job.cpu);
1884bf215546Sopenharmony_ci
1885bf215546Sopenharmony_ci   batch->blit.dst = dst->bo;
1886bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1887bf215546Sopenharmony_ci}
1888bf215546Sopenharmony_ci
1889bf215546Sopenharmony_civoid
1890bf215546Sopenharmony_cipanvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer,
1891bf215546Sopenharmony_ci                              VkBuffer dstBuffer,
1892bf215546Sopenharmony_ci                              VkDeviceSize dstOffset,
1893bf215546Sopenharmony_ci                              VkDeviceSize fillSize,
1894bf215546Sopenharmony_ci                              uint32_t data)
1895bf215546Sopenharmony_ci{
1896bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1897bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1898bf215546Sopenharmony_ci
1899bf215546Sopenharmony_ci   panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data);
1900bf215546Sopenharmony_ci}
1901bf215546Sopenharmony_ci
1902bf215546Sopenharmony_cistatic void
1903bf215546Sopenharmony_cipanvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf,
1904bf215546Sopenharmony_ci                      const struct panvk_buffer *dst, VkDeviceSize offset,
1905bf215546Sopenharmony_ci                      VkDeviceSize size, const void *data)
1906bf215546Sopenharmony_ci{
1907bf215546Sopenharmony_ci   struct panvk_meta_copy_buf2buf_info info = {
1908bf215546Sopenharmony_ci      .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4),
1909bf215546Sopenharmony_ci      .dst = panvk_buffer_gpu_ptr(dst, offset),
1910bf215546Sopenharmony_ci   };
1911bf215546Sopenharmony_ci
1912bf215546Sopenharmony_ci   unsigned log2blksz = ffs(sizeof(uint32_t)) - 1;
1913bf215546Sopenharmony_ci
1914bf215546Sopenharmony_ci   mali_ptr rsd =
1915bf215546Sopenharmony_ci      cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd;
1916bf215546Sopenharmony_ci
1917bf215546Sopenharmony_ci   mali_ptr pushconsts =
1918bf215546Sopenharmony_ci      pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16);
1919bf215546Sopenharmony_ci
1920bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1921bf215546Sopenharmony_ci
1922bf215546Sopenharmony_ci   struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf);
1923bf215546Sopenharmony_ci
1924bf215546Sopenharmony_ci   panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false);
1925bf215546Sopenharmony_ci
1926bf215546Sopenharmony_ci   mali_ptr tsd = batch->tls.gpu;
1927bf215546Sopenharmony_ci
1928bf215546Sopenharmony_ci   unsigned nblocks = size >> log2blksz;
1929bf215546Sopenharmony_ci   struct pan_compute_dim num_wg = { nblocks, 1, 1 };
1930bf215546Sopenharmony_ci   struct pan_compute_dim wg_sz = { 1, 1, 1};
1931bf215546Sopenharmony_ci   struct panfrost_ptr job =
1932bf215546Sopenharmony_ci     panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base,
1933bf215546Sopenharmony_ci                                      &batch->scoreboard,
1934bf215546Sopenharmony_ci                                      &num_wg, &wg_sz,
1935bf215546Sopenharmony_ci                                      0, 0, pushconsts, rsd, tsd);
1936bf215546Sopenharmony_ci
1937bf215546Sopenharmony_ci   util_dynarray_append(&batch->jobs, void *, job.cpu);
1938bf215546Sopenharmony_ci
1939bf215546Sopenharmony_ci   batch->blit.dst = dst->bo;
1940bf215546Sopenharmony_ci   panvk_per_arch(cmd_close_batch)(cmdbuf);
1941bf215546Sopenharmony_ci}
1942bf215546Sopenharmony_ci
1943bf215546Sopenharmony_civoid
1944bf215546Sopenharmony_cipanvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer,
1945bf215546Sopenharmony_ci                                VkBuffer dstBuffer,
1946bf215546Sopenharmony_ci                                VkDeviceSize dstOffset,
1947bf215546Sopenharmony_ci                                VkDeviceSize dataSize,
1948bf215546Sopenharmony_ci                                const void *pData)
1949bf215546Sopenharmony_ci{
1950bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer);
1951bf215546Sopenharmony_ci   VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer);
1952bf215546Sopenharmony_ci
1953bf215546Sopenharmony_ci   panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData);
1954bf215546Sopenharmony_ci}
1955bf215546Sopenharmony_ci
1956bf215546Sopenharmony_civoid
1957bf215546Sopenharmony_cipanvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev)
1958bf215546Sopenharmony_ci{
1959bf215546Sopenharmony_ci   panvk_meta_copy_img2img_init(dev, false);
1960bf215546Sopenharmony_ci   panvk_meta_copy_img2img_init(dev, true);
1961bf215546Sopenharmony_ci   panvk_meta_copy_buf2img_init(dev);
1962bf215546Sopenharmony_ci   panvk_meta_copy_img2buf_init(dev);
1963bf215546Sopenharmony_ci   panvk_meta_copy_buf2buf_init(dev);
1964bf215546Sopenharmony_ci   panvk_meta_fill_buf_init(dev);
1965bf215546Sopenharmony_ci}
1966