1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2021 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21bf215546Sopenharmony_ci * DEALINGS IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "gen_macros.h" 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "nir/nir_builder.h" 27bf215546Sopenharmony_ci#include "pan_encoder.h" 28bf215546Sopenharmony_ci#include "pan_shader.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci#include "panvk_private.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistatic mali_ptr 33bf215546Sopenharmony_cipanvk_meta_copy_img_emit_texture(struct panfrost_device *pdev, 34bf215546Sopenharmony_ci struct pan_pool *desc_pool, 35bf215546Sopenharmony_ci const struct pan_image_view *view) 36bf215546Sopenharmony_ci{ 37bf215546Sopenharmony_ci struct panfrost_ptr texture = 38bf215546Sopenharmony_ci pan_pool_alloc_desc(desc_pool, TEXTURE); 39bf215546Sopenharmony_ci size_t payload_size = 40bf215546Sopenharmony_ci GENX(panfrost_estimate_texture_payload_size)(view); 41bf215546Sopenharmony_ci struct panfrost_ptr surfaces = 42bf215546Sopenharmony_ci pan_pool_alloc_aligned(desc_pool, payload_size, 43bf215546Sopenharmony_ci pan_alignment(SURFACE_WITH_STRIDE)); 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ci GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces); 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ci return texture.gpu; 48bf215546Sopenharmony_ci} 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_cistatic mali_ptr 51bf215546Sopenharmony_cipanvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev, 52bf215546Sopenharmony_ci struct pan_pool *desc_pool) 53bf215546Sopenharmony_ci{ 54bf215546Sopenharmony_ci struct panfrost_ptr sampler = 55bf215546Sopenharmony_ci pan_pool_alloc_desc(desc_pool, SAMPLER); 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci pan_pack(sampler.cpu, SAMPLER, cfg) { 58bf215546Sopenharmony_ci cfg.seamless_cube_map = false; 59bf215546Sopenharmony_ci cfg.normalized_coordinates = false; 60bf215546Sopenharmony_ci cfg.minify_nearest = true; 61bf215546Sopenharmony_ci cfg.magnify_nearest = true; 62bf215546Sopenharmony_ci } 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci return sampler.gpu; 65bf215546Sopenharmony_ci} 66bf215546Sopenharmony_ci 67bf215546Sopenharmony_cistatic void 68bf215546Sopenharmony_cipanvk_meta_copy_emit_varying(struct pan_pool *pool, 69bf215546Sopenharmony_ci mali_ptr coordinates, 70bf215546Sopenharmony_ci mali_ptr *varying_bufs, 71bf215546Sopenharmony_ci mali_ptr *varyings) 72bf215546Sopenharmony_ci{ 73bf215546Sopenharmony_ci struct panfrost_ptr varying = 74bf215546Sopenharmony_ci pan_pool_alloc_desc(pool, ATTRIBUTE); 75bf215546Sopenharmony_ci struct panfrost_ptr varying_buffer = 76bf215546Sopenharmony_ci pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER); 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { 79bf215546Sopenharmony_ci cfg.pointer = coordinates; 80bf215546Sopenharmony_ci cfg.stride = 4 * sizeof(uint32_t); 81bf215546Sopenharmony_ci cfg.size = cfg.stride * 4; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci /* Bifrost needs an empty desc to mark end of prefetching */ 85bf215546Sopenharmony_ci pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), 86bf215546Sopenharmony_ci ATTRIBUTE_BUFFER, cfg); 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_ci pan_pack(varying.cpu, ATTRIBUTE, cfg) { 89bf215546Sopenharmony_ci cfg.buffer_index = 0; 90bf215546Sopenharmony_ci cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw; 91bf215546Sopenharmony_ci } 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci *varyings = varying.gpu; 94bf215546Sopenharmony_ci *varying_bufs = varying_buffer.gpu; 95bf215546Sopenharmony_ci} 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_cistatic void 98bf215546Sopenharmony_cipanvk_meta_copy_emit_dcd(struct pan_pool *pool, 99bf215546Sopenharmony_ci mali_ptr src_coords, mali_ptr dst_coords, 100bf215546Sopenharmony_ci mali_ptr texture, mali_ptr sampler, 101bf215546Sopenharmony_ci mali_ptr vpd, mali_ptr tsd, mali_ptr rsd, 102bf215546Sopenharmony_ci mali_ptr push_constants, void *out) 103bf215546Sopenharmony_ci{ 104bf215546Sopenharmony_ci pan_pack(out, DRAW, cfg) { 105bf215546Sopenharmony_ci cfg.thread_storage = tsd; 106bf215546Sopenharmony_ci cfg.state = rsd; 107bf215546Sopenharmony_ci cfg.push_uniforms = push_constants; 108bf215546Sopenharmony_ci cfg.position = dst_coords; 109bf215546Sopenharmony_ci if (src_coords) { 110bf215546Sopenharmony_ci panvk_meta_copy_emit_varying(pool, src_coords, 111bf215546Sopenharmony_ci &cfg.varying_buffers, 112bf215546Sopenharmony_ci &cfg.varyings); 113bf215546Sopenharmony_ci } 114bf215546Sopenharmony_ci cfg.viewport = vpd; 115bf215546Sopenharmony_ci cfg.textures = texture; 116bf215546Sopenharmony_ci cfg.samplers = sampler; 117bf215546Sopenharmony_ci } 118bf215546Sopenharmony_ci} 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_cistatic struct panfrost_ptr 121bf215546Sopenharmony_cipanvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool, 122bf215546Sopenharmony_ci struct pan_scoreboard *scoreboard, 123bf215546Sopenharmony_ci mali_ptr src_coords, mali_ptr dst_coords, 124bf215546Sopenharmony_ci mali_ptr texture, mali_ptr sampler, 125bf215546Sopenharmony_ci mali_ptr push_constants, 126bf215546Sopenharmony_ci mali_ptr vpd, mali_ptr rsd, 127bf215546Sopenharmony_ci mali_ptr tsd, mali_ptr tiler) 128bf215546Sopenharmony_ci{ 129bf215546Sopenharmony_ci struct panfrost_ptr job = 130bf215546Sopenharmony_ci pan_pool_alloc_desc(desc_pool, TILER_JOB); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords, 133bf215546Sopenharmony_ci texture, sampler, vpd, tsd, rsd, push_constants, 134bf215546Sopenharmony_ci pan_section_ptr(job.cpu, TILER_JOB, DRAW)); 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { 137bf215546Sopenharmony_ci cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; 138bf215546Sopenharmony_ci cfg.index_count = 4; 139bf215546Sopenharmony_ci cfg.job_task_split = 6; 140bf215546Sopenharmony_ci } 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { 143bf215546Sopenharmony_ci cfg.constant = 1.0f; 144bf215546Sopenharmony_ci } 145bf215546Sopenharmony_ci 146bf215546Sopenharmony_ci void *invoc = pan_section_ptr(job.cpu, 147bf215546Sopenharmony_ci TILER_JOB, 148bf215546Sopenharmony_ci INVOCATION); 149bf215546Sopenharmony_ci panfrost_pack_work_groups_compute(invoc, 1, 4, 150bf215546Sopenharmony_ci 1, 1, 1, 1, true, false); 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg); 153bf215546Sopenharmony_ci pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { 154bf215546Sopenharmony_ci cfg.address = tiler; 155bf215546Sopenharmony_ci } 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, 158bf215546Sopenharmony_ci false, false, 0, 0, &job, false); 159bf215546Sopenharmony_ci return job; 160bf215546Sopenharmony_ci} 161bf215546Sopenharmony_ci 162bf215546Sopenharmony_cistatic struct panfrost_ptr 163bf215546Sopenharmony_cipanvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool, 164bf215546Sopenharmony_ci struct pan_scoreboard *scoreboard, 165bf215546Sopenharmony_ci const struct pan_compute_dim *num_wg, 166bf215546Sopenharmony_ci const struct pan_compute_dim *wg_sz, 167bf215546Sopenharmony_ci mali_ptr texture, mali_ptr sampler, 168bf215546Sopenharmony_ci mali_ptr push_constants, 169bf215546Sopenharmony_ci mali_ptr rsd, mali_ptr tsd) 170bf215546Sopenharmony_ci{ 171bf215546Sopenharmony_ci struct panfrost_ptr job = 172bf215546Sopenharmony_ci pan_pool_alloc_desc(desc_pool, COMPUTE_JOB); 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci void *invoc = pan_section_ptr(job.cpu, 175bf215546Sopenharmony_ci COMPUTE_JOB, 176bf215546Sopenharmony_ci INVOCATION); 177bf215546Sopenharmony_ci panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z, 178bf215546Sopenharmony_ci wg_sz->x, wg_sz->y, wg_sz->z, 179bf215546Sopenharmony_ci false, false); 180bf215546Sopenharmony_ci 181bf215546Sopenharmony_ci pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { 182bf215546Sopenharmony_ci cfg.job_task_split = 8; 183bf215546Sopenharmony_ci } 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_ci panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler, 186bf215546Sopenharmony_ci 0, tsd, rsd, push_constants, 187bf215546Sopenharmony_ci pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW)); 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_ci panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE, 190bf215546Sopenharmony_ci false, false, 0, 0, &job, false); 191bf215546Sopenharmony_ci return job; 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_ci 195bf215546Sopenharmony_cistatic uint32_t 196bf215546Sopenharmony_cipanvk_meta_copy_img_bifrost_raw_format(unsigned texelsize) 197bf215546Sopenharmony_ci{ 198bf215546Sopenharmony_ci switch (texelsize) { 199bf215546Sopenharmony_ci case 6: return MALI_RGB16UI << 12; 200bf215546Sopenharmony_ci case 8: return MALI_RG32UI << 12; 201bf215546Sopenharmony_ci case 12: return MALI_RGB32UI << 12; 202bf215546Sopenharmony_ci case 16: return MALI_RGBA32UI << 12; 203bf215546Sopenharmony_ci default: unreachable("Invalid texel size\n"); 204bf215546Sopenharmony_ci } 205bf215546Sopenharmony_ci} 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_cistatic mali_ptr 208bf215546Sopenharmony_cipanvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev, 209bf215546Sopenharmony_ci struct pan_pool *desc_pool, 210bf215546Sopenharmony_ci mali_ptr shader, 211bf215546Sopenharmony_ci const struct pan_shader_info *shader_info, 212bf215546Sopenharmony_ci enum pipe_format fmt, unsigned wrmask, 213bf215546Sopenharmony_ci bool from_img) 214bf215546Sopenharmony_ci{ 215bf215546Sopenharmony_ci struct panfrost_ptr rsd_ptr = 216bf215546Sopenharmony_ci pan_pool_alloc_desc_aggregate(desc_pool, 217bf215546Sopenharmony_ci PAN_DESC(RENDERER_STATE), 218bf215546Sopenharmony_ci PAN_DESC_ARRAY(1, BLEND)); 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci bool raw = util_format_get_blocksize(fmt) > 4; 221bf215546Sopenharmony_ci unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1; 222bf215546Sopenharmony_ci bool partialwrite = fullmask != wrmask && !raw; 223bf215546Sopenharmony_ci bool readstb = fullmask != wrmask && raw; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 226bf215546Sopenharmony_ci pan_shader_prepare_rsd(shader_info, shader, &cfg); 227bf215546Sopenharmony_ci if (from_img) { 228bf215546Sopenharmony_ci cfg.shader.varying_count = 1; 229bf215546Sopenharmony_ci cfg.shader.texture_count = 1; 230bf215546Sopenharmony_ci cfg.shader.sampler_count = 1; 231bf215546Sopenharmony_ci } 232bf215546Sopenharmony_ci cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; 233bf215546Sopenharmony_ci cfg.multisample_misc.sample_mask = UINT16_MAX; 234bf215546Sopenharmony_ci cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS; 235bf215546Sopenharmony_ci cfg.stencil_mask_misc.stencil_mask_front = 0xFF; 236bf215546Sopenharmony_ci cfg.stencil_mask_misc.stencil_mask_back = 0xFF; 237bf215546Sopenharmony_ci cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS; 238bf215546Sopenharmony_ci cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE; 239bf215546Sopenharmony_ci cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE; 240bf215546Sopenharmony_ci cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE; 241bf215546Sopenharmony_ci cfg.stencil_front.mask = 0xFF; 242bf215546Sopenharmony_ci cfg.stencil_back = cfg.stencil_front; 243bf215546Sopenharmony_ci 244bf215546Sopenharmony_ci cfg.properties.allow_forward_pixel_to_be_killed = true; 245bf215546Sopenharmony_ci cfg.properties.allow_forward_pixel_to_kill = 246bf215546Sopenharmony_ci !partialwrite && !readstb; 247bf215546Sopenharmony_ci cfg.properties.zs_update_operation = 248bf215546Sopenharmony_ci MALI_PIXEL_KILL_STRONG_EARLY; 249bf215546Sopenharmony_ci cfg.properties.pixel_kill_operation = 250bf215546Sopenharmony_ci MALI_PIXEL_KILL_FORCE_EARLY; 251bf215546Sopenharmony_ci } 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_ci pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) { 254bf215546Sopenharmony_ci cfg.round_to_fb_precision = true; 255bf215546Sopenharmony_ci cfg.load_destination = partialwrite; 256bf215546Sopenharmony_ci cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; 257bf215546Sopenharmony_ci cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; 258bf215546Sopenharmony_ci cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; 259bf215546Sopenharmony_ci cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; 260bf215546Sopenharmony_ci cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; 261bf215546Sopenharmony_ci cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; 262bf215546Sopenharmony_ci cfg.internal.mode = 263bf215546Sopenharmony_ci partialwrite ? 264bf215546Sopenharmony_ci MALI_BLEND_MODE_FIXED_FUNCTION : 265bf215546Sopenharmony_ci MALI_BLEND_MODE_OPAQUE; 266bf215546Sopenharmony_ci cfg.equation.color_mask = partialwrite ? wrmask : 0xf; 267bf215546Sopenharmony_ci cfg.internal.fixed_function.num_comps = 4; 268bf215546Sopenharmony_ci if (!raw) { 269bf215546Sopenharmony_ci cfg.internal.fixed_function.conversion.memory_format = 270bf215546Sopenharmony_ci panfrost_format_to_bifrost_blend(pdev, fmt, false); 271bf215546Sopenharmony_ci cfg.internal.fixed_function.conversion.register_format = 272bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_F32; 273bf215546Sopenharmony_ci } else { 274bf215546Sopenharmony_ci unsigned imgtexelsz = util_format_get_blocksize(fmt); 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci cfg.internal.fixed_function.conversion.memory_format = 277bf215546Sopenharmony_ci panvk_meta_copy_img_bifrost_raw_format(imgtexelsz); 278bf215546Sopenharmony_ci cfg.internal.fixed_function.conversion.register_format = 279bf215546Sopenharmony_ci (imgtexelsz & 2) ? 280bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_U16 : 281bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_U32; 282bf215546Sopenharmony_ci } 283bf215546Sopenharmony_ci } 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci return rsd_ptr.gpu; 286bf215546Sopenharmony_ci} 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_cistatic mali_ptr 289bf215546Sopenharmony_cipanvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev, 290bf215546Sopenharmony_ci struct pan_pool *desc_pool, 291bf215546Sopenharmony_ci mali_ptr shader, 292bf215546Sopenharmony_ci const struct pan_shader_info *shader_info, 293bf215546Sopenharmony_ci bool from_img) 294bf215546Sopenharmony_ci{ 295bf215546Sopenharmony_ci struct panfrost_ptr rsd_ptr = 296bf215546Sopenharmony_ci pan_pool_alloc_desc_aggregate(desc_pool, 297bf215546Sopenharmony_ci PAN_DESC(RENDERER_STATE)); 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_ci pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 300bf215546Sopenharmony_ci pan_shader_prepare_rsd(shader_info, shader, &cfg); 301bf215546Sopenharmony_ci if (from_img) { 302bf215546Sopenharmony_ci cfg.shader.texture_count = 1; 303bf215546Sopenharmony_ci cfg.shader.sampler_count = 1; 304bf215546Sopenharmony_ci } 305bf215546Sopenharmony_ci } 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci return rsd_ptr.gpu; 308bf215546Sopenharmony_ci} 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_cistatic mali_ptr 311bf215546Sopenharmony_cipanvk_meta_copy_img2img_shader(struct panfrost_device *pdev, 312bf215546Sopenharmony_ci struct pan_pool *bin_pool, 313bf215546Sopenharmony_ci enum pipe_format srcfmt, 314bf215546Sopenharmony_ci enum pipe_format dstfmt, unsigned dstmask, 315bf215546Sopenharmony_ci unsigned texdim, bool texisarray, bool is_ms, 316bf215546Sopenharmony_ci struct pan_shader_info *shader_info) 317bf215546Sopenharmony_ci{ 318bf215546Sopenharmony_ci nir_builder b = 319bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 320bf215546Sopenharmony_ci GENX(pan_shader_get_compiler_options)(), 321bf215546Sopenharmony_ci "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)", 322bf215546Sopenharmony_ci util_format_name(srcfmt), util_format_name(dstfmt), 323bf215546Sopenharmony_ci texdim, texisarray ? "[]" : "", is_ms ? ",ms" : ""); 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci nir_variable *coord_var = 326bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_in, 327bf215546Sopenharmony_ci glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray), 328bf215546Sopenharmony_ci "coord"); 329bf215546Sopenharmony_ci coord_var->data.location = VARYING_SLOT_VAR0; 330bf215546Sopenharmony_ci nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var)); 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1); 333bf215546Sopenharmony_ci tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf; 334bf215546Sopenharmony_ci tex->texture_index = 0; 335bf215546Sopenharmony_ci tex->is_array = texisarray; 336bf215546Sopenharmony_ci tex->dest_type = util_format_is_unorm(srcfmt) ? 337bf215546Sopenharmony_ci nir_type_float32 : nir_type_uint32; 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci switch (texdim) { 340bf215546Sopenharmony_ci case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break; 341bf215546Sopenharmony_ci case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break; 342bf215546Sopenharmony_ci case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break; 343bf215546Sopenharmony_ci default: unreachable("Invalid texture dimension"); 344bf215546Sopenharmony_ci } 345bf215546Sopenharmony_ci 346bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 347bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(coord); 348bf215546Sopenharmony_ci tex->coord_components = texdim + texisarray; 349bf215546Sopenharmony_ci 350bf215546Sopenharmony_ci if (is_ms) { 351bf215546Sopenharmony_ci tex->src[1].src_type = nir_tex_src_ms_index; 352bf215546Sopenharmony_ci tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); 353bf215546Sopenharmony_ci } 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 356bf215546Sopenharmony_ci nir_alu_type_get_type_size(tex->dest_type), NULL); 357bf215546Sopenharmony_ci nir_builder_instr_insert(&b, &tex->instr); 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci nir_ssa_def *texel = &tex->dest.ssa; 360bf215546Sopenharmony_ci 361bf215546Sopenharmony_ci unsigned dstcompsz = 362bf215546Sopenharmony_ci util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); 363bf215546Sopenharmony_ci unsigned ndstcomps = util_format_get_nr_components(dstfmt); 364bf215546Sopenharmony_ci const struct glsl_type *outtype = NULL; 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) { 367bf215546Sopenharmony_ci nir_ssa_def *rgb = 368bf215546Sopenharmony_ci nir_f2u32(&b, nir_fmul(&b, texel, 369bf215546Sopenharmony_ci nir_vec3(&b, 370bf215546Sopenharmony_ci nir_imm_float(&b, 31), 371bf215546Sopenharmony_ci nir_imm_float(&b, 63), 372bf215546Sopenharmony_ci nir_imm_float(&b, 31)))); 373bf215546Sopenharmony_ci nir_ssa_def *rg = 374bf215546Sopenharmony_ci nir_vec2(&b, 375bf215546Sopenharmony_ci nir_ior(&b, nir_channel(&b, rgb, 0), 376bf215546Sopenharmony_ci nir_ishl(&b, nir_channel(&b, rgb, 1), 377bf215546Sopenharmony_ci nir_imm_int(&b, 5))), 378bf215546Sopenharmony_ci nir_ior(&b, 379bf215546Sopenharmony_ci nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3), 380bf215546Sopenharmony_ci nir_ishl(&b, nir_channel(&b, rgb, 2), 381bf215546Sopenharmony_ci nir_imm_int(&b, 3)))); 382bf215546Sopenharmony_ci rg = nir_iand_imm(&b, rg, 255); 383bf215546Sopenharmony_ci texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255); 384bf215546Sopenharmony_ci outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2); 385bf215546Sopenharmony_ci } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) { 386bf215546Sopenharmony_ci nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255)); 387bf215546Sopenharmony_ci nir_ssa_def *rgb = 388bf215546Sopenharmony_ci nir_vec3(&b, 389bf215546Sopenharmony_ci nir_channel(&b, rg, 0), 390bf215546Sopenharmony_ci nir_ior(&b, 391bf215546Sopenharmony_ci nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5), 392bf215546Sopenharmony_ci nir_ishl(&b, nir_channel(&b, rg, 1), 393bf215546Sopenharmony_ci nir_imm_int(&b, 3))), 394bf215546Sopenharmony_ci nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3)); 395bf215546Sopenharmony_ci rgb = nir_iand(&b, rgb, 396bf215546Sopenharmony_ci nir_vec3(&b, 397bf215546Sopenharmony_ci nir_imm_int(&b, 31), 398bf215546Sopenharmony_ci nir_imm_int(&b, 63), 399bf215546Sopenharmony_ci nir_imm_int(&b, 31))); 400bf215546Sopenharmony_ci texel = nir_fmul(&b, nir_u2f32(&b, rgb), 401bf215546Sopenharmony_ci nir_vec3(&b, 402bf215546Sopenharmony_ci nir_imm_float(&b, 1.0 / 31), 403bf215546Sopenharmony_ci nir_imm_float(&b, 1.0 / 63), 404bf215546Sopenharmony_ci nir_imm_float(&b, 1.0 / 31))); 405bf215546Sopenharmony_ci outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 406bf215546Sopenharmony_ci } else { 407bf215546Sopenharmony_ci assert(srcfmt == dstfmt); 408bf215546Sopenharmony_ci enum glsl_base_type basetype; 409bf215546Sopenharmony_ci if (util_format_is_unorm(dstfmt)) { 410bf215546Sopenharmony_ci basetype = GLSL_TYPE_FLOAT; 411bf215546Sopenharmony_ci } else if (dstcompsz == 16) { 412bf215546Sopenharmony_ci basetype = GLSL_TYPE_UINT16; 413bf215546Sopenharmony_ci } else { 414bf215546Sopenharmony_ci assert(dstcompsz == 32); 415bf215546Sopenharmony_ci basetype = GLSL_TYPE_UINT; 416bf215546Sopenharmony_ci } 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci if (dstcompsz == 16) 419bf215546Sopenharmony_ci texel = nir_u2u16(&b, texel); 420bf215546Sopenharmony_ci 421bf215546Sopenharmony_ci texel = nir_channels(&b, texel, (1 << ndstcomps) - 1); 422bf215546Sopenharmony_ci outtype = glsl_vector_type(basetype, ndstcomps); 423bf215546Sopenharmony_ci } 424bf215546Sopenharmony_ci 425bf215546Sopenharmony_ci nir_variable *out = 426bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, outtype, "out"); 427bf215546Sopenharmony_ci out->data.location = FRAG_RESULT_DATA0; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci unsigned fullmask = (1 << ndstcomps) - 1; 430bf215546Sopenharmony_ci if (dstcompsz > 8 && dstmask != fullmask) { 431bf215546Sopenharmony_ci nir_ssa_def *oldtexel = nir_load_var(&b, out); 432bf215546Sopenharmony_ci nir_ssa_def *dstcomps[4]; 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci for (unsigned i = 0; i < ndstcomps; i++) { 435bf215546Sopenharmony_ci if (dstmask & BITFIELD_BIT(i)) 436bf215546Sopenharmony_ci dstcomps[i] = nir_channel(&b, texel, i); 437bf215546Sopenharmony_ci else 438bf215546Sopenharmony_ci dstcomps[i] = nir_channel(&b, oldtexel, i); 439bf215546Sopenharmony_ci } 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_ci texel = nir_vec(&b, dstcomps, ndstcomps); 442bf215546Sopenharmony_ci } 443bf215546Sopenharmony_ci 444bf215546Sopenharmony_ci nir_store_var(&b, out, texel, 0xff); 445bf215546Sopenharmony_ci 446bf215546Sopenharmony_ci struct panfrost_compile_inputs inputs = { 447bf215546Sopenharmony_ci .gpu_id = pdev->gpu_id, 448bf215546Sopenharmony_ci .is_blit = true, 449bf215546Sopenharmony_ci .no_ubo_to_push = true, 450bf215546Sopenharmony_ci }; 451bf215546Sopenharmony_ci 452bf215546Sopenharmony_ci pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) { 453bf215546Sopenharmony_ci cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12; 454bf215546Sopenharmony_ci cfg.register_format = dstcompsz == 2 ? 455bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_U16 : 456bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_U32; 457bf215546Sopenharmony_ci } 458bf215546Sopenharmony_ci inputs.bifrost.static_rt_conv = true; 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci struct util_dynarray binary; 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci util_dynarray_init(&binary, NULL); 463bf215546Sopenharmony_ci GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 464bf215546Sopenharmony_ci 465bf215546Sopenharmony_ci shader_info->fs.sample_shading = is_ms; 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci mali_ptr shader = 468bf215546Sopenharmony_ci pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 469bf215546Sopenharmony_ci 470bf215546Sopenharmony_ci util_dynarray_fini(&binary); 471bf215546Sopenharmony_ci ralloc_free(b.shader); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci return shader; 474bf215546Sopenharmony_ci} 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_cistatic enum pipe_format 477bf215546Sopenharmony_cipanvk_meta_copy_img_format(enum pipe_format fmt) 478bf215546Sopenharmony_ci{ 479bf215546Sopenharmony_ci /* We can't use a non-compressed format when handling a tiled/AFBC 480bf215546Sopenharmony_ci * compressed format because the tile size differ (4x4 blocks for 481bf215546Sopenharmony_ci * compressed formats and 16x16 texels for non-compressed ones). 482bf215546Sopenharmony_ci */ 483bf215546Sopenharmony_ci assert(!util_format_is_compressed(fmt)); 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci /* Pick blendable formats when we can, otherwise pick the UINT variant 486bf215546Sopenharmony_ci * matching the texel size. 487bf215546Sopenharmony_ci */ 488bf215546Sopenharmony_ci switch (util_format_get_blocksize(fmt)) { 489bf215546Sopenharmony_ci case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 490bf215546Sopenharmony_ci case 12: return PIPE_FORMAT_R32G32B32_UINT; 491bf215546Sopenharmony_ci case 8: return PIPE_FORMAT_R32G32_UINT; 492bf215546Sopenharmony_ci case 6: return PIPE_FORMAT_R16G16B16_UINT; 493bf215546Sopenharmony_ci case 4: return PIPE_FORMAT_R8G8B8A8_UNORM; 494bf215546Sopenharmony_ci case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM || 495bf215546Sopenharmony_ci fmt == PIPE_FORMAT_B5G6R5_UNORM) ? 496bf215546Sopenharmony_ci PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM; 497bf215546Sopenharmony_ci case 1: return PIPE_FORMAT_R8_UNORM; 498bf215546Sopenharmony_ci default: unreachable("Unsupported format\n"); 499bf215546Sopenharmony_ci } 500bf215546Sopenharmony_ci} 501bf215546Sopenharmony_ci 502bf215546Sopenharmony_cistruct panvk_meta_copy_img2img_format_info { 503bf215546Sopenharmony_ci enum pipe_format srcfmt; 504bf215546Sopenharmony_ci enum pipe_format dstfmt; 505bf215546Sopenharmony_ci unsigned dstmask; 506bf215546Sopenharmony_ci} PACKED; 507bf215546Sopenharmony_ci 508bf215546Sopenharmony_cistatic const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = { 509bf215546Sopenharmony_ci { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1}, 510bf215546Sopenharmony_ci { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, 511bf215546Sopenharmony_ci { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, 512bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, 513bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, 514bf215546Sopenharmony_ci /* Z24S8(depth) */ 515bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 }, 516bf215546Sopenharmony_ci /* Z24S8(stencil) */ 517bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 }, 518bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf }, 519bf215546Sopenharmony_ci { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 520bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 }, 521bf215546Sopenharmony_ci /* Z32S8X24(depth) */ 522bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 }, 523bf215546Sopenharmony_ci /* Z32S8X24(stencil) */ 524bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 }, 525bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 526bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 527bf215546Sopenharmony_ci}; 528bf215546Sopenharmony_ci 529bf215546Sopenharmony_cistatic unsigned 530bf215546Sopenharmony_cipanvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key) 531bf215546Sopenharmony_ci{ 532bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS); 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { 535bf215546Sopenharmony_ci if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key))) 536bf215546Sopenharmony_ci return i; 537bf215546Sopenharmony_ci } 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci unreachable("Invalid image format\n"); 540bf215546Sopenharmony_ci} 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_cistatic unsigned 543bf215546Sopenharmony_cipanvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask) 544bf215546Sopenharmony_ci{ 545bf215546Sopenharmony_ci if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT && 546bf215546Sopenharmony_ci aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) { 547bf215546Sopenharmony_ci enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt); 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_ci return (1 << util_format_get_nr_components(outfmt)) - 1; 550bf215546Sopenharmony_ci } 551bf215546Sopenharmony_ci 552bf215546Sopenharmony_ci switch (imgfmt) { 553bf215546Sopenharmony_ci case PIPE_FORMAT_S8_UINT: 554bf215546Sopenharmony_ci return 1; 555bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM: 556bf215546Sopenharmony_ci return 3; 557bf215546Sopenharmony_ci case PIPE_FORMAT_Z16_UNORM_S8_UINT: 558bf215546Sopenharmony_ci return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8; 559bf215546Sopenharmony_ci case PIPE_FORMAT_Z24_UNORM_S8_UINT: 560bf215546Sopenharmony_ci return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8; 561bf215546Sopenharmony_ci case PIPE_FORMAT_Z24X8_UNORM: 562bf215546Sopenharmony_ci assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT); 563bf215546Sopenharmony_ci return 7; 564bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT: 565bf215546Sopenharmony_ci return 0xf; 566bf215546Sopenharmony_ci case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 567bf215546Sopenharmony_ci return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2; 568bf215546Sopenharmony_ci default: 569bf215546Sopenharmony_ci unreachable("Invalid depth format\n"); 570bf215546Sopenharmony_ci } 571bf215546Sopenharmony_ci} 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_cistatic void 574bf215546Sopenharmony_cipanvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf, 575bf215546Sopenharmony_ci const struct panvk_image *src, 576bf215546Sopenharmony_ci const struct panvk_image *dst, 577bf215546Sopenharmony_ci const VkImageCopy2 *region) 578bf215546Sopenharmony_ci{ 579bf215546Sopenharmony_ci struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 580bf215546Sopenharmony_ci struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 581bf215546Sopenharmony_ci struct panvk_meta_copy_img2img_format_info key = { 582bf215546Sopenharmony_ci .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format), 583bf215546Sopenharmony_ci .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format), 584bf215546Sopenharmony_ci .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format, 585bf215546Sopenharmony_ci region->dstSubresource.aspectMask), 586bf215546Sopenharmony_ci }; 587bf215546Sopenharmony_ci 588bf215546Sopenharmony_ci assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples); 589bf215546Sopenharmony_ci 590bf215546Sopenharmony_ci unsigned texdimidx = 591bf215546Sopenharmony_ci panvk_meta_copy_tex_type(src->pimage.layout.dim, 592bf215546Sopenharmony_ci src->pimage.layout.array_size > 1); 593bf215546Sopenharmony_ci unsigned fmtidx = 594bf215546Sopenharmony_ci panvk_meta_copy_img2img_format_idx(key); 595bf215546Sopenharmony_ci unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0; 596bf215546Sopenharmony_ci 597bf215546Sopenharmony_ci mali_ptr rsd = 598bf215546Sopenharmony_ci cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd; 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci struct pan_image_view srcview = { 601bf215546Sopenharmony_ci .format = key.srcfmt, 602bf215546Sopenharmony_ci .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 603bf215546Sopenharmony_ci MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim, 604bf215546Sopenharmony_ci .image = &src->pimage, 605bf215546Sopenharmony_ci .nr_samples = src->pimage.layout.nr_samples, 606bf215546Sopenharmony_ci .first_level = region->srcSubresource.mipLevel, 607bf215546Sopenharmony_ci .last_level = region->srcSubresource.mipLevel, 608bf215546Sopenharmony_ci .first_layer = region->srcSubresource.baseArrayLayer, 609bf215546Sopenharmony_ci .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1, 610bf215546Sopenharmony_ci .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 611bf215546Sopenharmony_ci }; 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_ci struct pan_image_view dstview = { 614bf215546Sopenharmony_ci .format = key.dstfmt, 615bf215546Sopenharmony_ci .dim = MALI_TEXTURE_DIMENSION_2D, 616bf215546Sopenharmony_ci .image = &dst->pimage, 617bf215546Sopenharmony_ci .nr_samples = dst->pimage.layout.nr_samples, 618bf215546Sopenharmony_ci .first_level = region->dstSubresource.mipLevel, 619bf215546Sopenharmony_ci .last_level = region->dstSubresource.mipLevel, 620bf215546Sopenharmony_ci .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 621bf215546Sopenharmony_ci }; 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci unsigned minx = MAX2(region->dstOffset.x, 0); 624bf215546Sopenharmony_ci unsigned miny = MAX2(region->dstOffset.y, 0); 625bf215546Sopenharmony_ci unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0); 626bf215546Sopenharmony_ci unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0); 627bf215546Sopenharmony_ci 628bf215546Sopenharmony_ci mali_ptr vpd = 629bf215546Sopenharmony_ci panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, 630bf215546Sopenharmony_ci minx, miny, maxx, maxy); 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_ci float dst_rect[] = { 633bf215546Sopenharmony_ci minx, miny, 0.0, 1.0, 634bf215546Sopenharmony_ci maxx + 1, miny, 0.0, 1.0, 635bf215546Sopenharmony_ci minx, maxy + 1, 0.0, 1.0, 636bf215546Sopenharmony_ci maxx + 1, maxy + 1, 0.0, 1.0, 637bf215546Sopenharmony_ci }; 638bf215546Sopenharmony_ci 639bf215546Sopenharmony_ci mali_ptr dst_coords = 640bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect, 641bf215546Sopenharmony_ci sizeof(dst_rect), 64); 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci /* TODO: don't force preloads of dst resources if unneeded */ 644bf215546Sopenharmony_ci 645bf215546Sopenharmony_ci unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel); 646bf215546Sopenharmony_ci unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel); 647bf215546Sopenharmony_ci cmdbuf->state.fb.crc_valid[0] = false; 648bf215546Sopenharmony_ci *fbinfo = (struct pan_fb_info){ 649bf215546Sopenharmony_ci .width = width, 650bf215546Sopenharmony_ci .height = height, 651bf215546Sopenharmony_ci .extent.minx = minx & ~31, 652bf215546Sopenharmony_ci .extent.miny = miny & ~31, 653bf215546Sopenharmony_ci .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1, 654bf215546Sopenharmony_ci .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1, 655bf215546Sopenharmony_ci .nr_samples = dst->pimage.layout.nr_samples, 656bf215546Sopenharmony_ci .rt_count = 1, 657bf215546Sopenharmony_ci .rts[0].view = &dstview, 658bf215546Sopenharmony_ci .rts[0].preload = true, 659bf215546Sopenharmony_ci .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0], 660bf215546Sopenharmony_ci }; 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci mali_ptr texture = 663bf215546Sopenharmony_ci panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview); 664bf215546Sopenharmony_ci mali_ptr sampler = 665bf215546Sopenharmony_ci panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base); 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 668bf215546Sopenharmony_ci 669bf215546Sopenharmony_ci minx = MAX2(region->srcOffset.x, 0); 670bf215546Sopenharmony_ci miny = MAX2(region->srcOffset.y, 0); 671bf215546Sopenharmony_ci maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0); 672bf215546Sopenharmony_ci maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0); 673bf215546Sopenharmony_ci assert(region->dstOffset.z >= 0); 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci unsigned first_src_layer = MAX2(0, region->srcOffset.z); 676bf215546Sopenharmony_ci unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z); 677bf215546Sopenharmony_ci unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth); 678bf215546Sopenharmony_ci for (unsigned l = 0; l < nlayers; l++) { 679bf215546Sopenharmony_ci unsigned src_l = l + first_src_layer; 680bf215546Sopenharmony_ci float src_rect[] = { 681bf215546Sopenharmony_ci minx, miny, src_l, 1.0, 682bf215546Sopenharmony_ci maxx + 1, miny, src_l, 1.0, 683bf215546Sopenharmony_ci minx, maxy + 1, src_l, 1.0, 684bf215546Sopenharmony_ci maxx + 1, maxy + 1, src_l, 1.0, 685bf215546Sopenharmony_ci }; 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci mali_ptr src_coords = 688bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect, 689bf215546Sopenharmony_ci sizeof(src_rect), 64); 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci dstview.first_layer = dstview.last_layer = l + first_dst_layer; 694bf215546Sopenharmony_ci batch->blit.src = src->pimage.data.bo; 695bf215546Sopenharmony_ci batch->blit.dst = dst->pimage.data.bo; 696bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 697bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 698bf215546Sopenharmony_ci panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 699bf215546Sopenharmony_ci 700bf215546Sopenharmony_ci mali_ptr tsd, tiler; 701bf215546Sopenharmony_ci 702bf215546Sopenharmony_ci tsd = batch->tls.gpu; 703bf215546Sopenharmony_ci tiler = batch->tiler.descs.gpu; 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci struct panfrost_ptr job; 706bf215546Sopenharmony_ci 707bf215546Sopenharmony_ci job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, 708bf215546Sopenharmony_ci &batch->scoreboard, 709bf215546Sopenharmony_ci src_coords, dst_coords, 710bf215546Sopenharmony_ci texture, sampler, 0, 711bf215546Sopenharmony_ci vpd, rsd, tsd, tiler); 712bf215546Sopenharmony_ci 713bf215546Sopenharmony_ci util_dynarray_append(&batch->jobs, void *, job.cpu); 714bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 715bf215546Sopenharmony_ci } 716bf215546Sopenharmony_ci} 717bf215546Sopenharmony_ci 718bf215546Sopenharmony_cistatic void 719bf215546Sopenharmony_cipanvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms) 720bf215546Sopenharmony_ci{ 721bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS); 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { 724bf215546Sopenharmony_ci for (unsigned texdim = 1; texdim <= 3; texdim++) { 725bf215546Sopenharmony_ci unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); 726bf215546Sopenharmony_ci assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci /* No MSAA on 3D textures */ 729bf215546Sopenharmony_ci if (texdim == 3 && is_ms) continue; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci struct pan_shader_info shader_info; 732bf215546Sopenharmony_ci mali_ptr shader = 733bf215546Sopenharmony_ci panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 734bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].srcfmt, 735bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstfmt, 736bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstmask, 737bf215546Sopenharmony_ci texdim, false, is_ms, &shader_info); 738bf215546Sopenharmony_ci dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = 739bf215546Sopenharmony_ci panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 740bf215546Sopenharmony_ci shader, &shader_info, 741bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstfmt, 742bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstmask, 743bf215546Sopenharmony_ci true); 744bf215546Sopenharmony_ci if (texdim == 3) 745bf215546Sopenharmony_ci continue; 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci memset(&shader_info, 0, sizeof(shader_info)); 748bf215546Sopenharmony_ci texdimidx = panvk_meta_copy_tex_type(texdim, true); 749bf215546Sopenharmony_ci assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); 750bf215546Sopenharmony_ci shader = 751bf215546Sopenharmony_ci panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 752bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].srcfmt, 753bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstfmt, 754bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstmask, 755bf215546Sopenharmony_ci texdim, true, is_ms, &shader_info); 756bf215546Sopenharmony_ci dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = 757bf215546Sopenharmony_ci panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 758bf215546Sopenharmony_ci shader, &shader_info, 759bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstfmt, 760bf215546Sopenharmony_ci panvk_meta_copy_img2img_fmts[i].dstmask, 761bf215546Sopenharmony_ci true); 762bf215546Sopenharmony_ci } 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci} 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_civoid 767bf215546Sopenharmony_cipanvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer, 768bf215546Sopenharmony_ci const VkCopyImageInfo2 *pCopyImageInfo) 769bf215546Sopenharmony_ci{ 770bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 771bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage); 772bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage); 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) { 775bf215546Sopenharmony_ci panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]); 776bf215546Sopenharmony_ci } 777bf215546Sopenharmony_ci} 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_cistatic unsigned 780bf215546Sopenharmony_cipanvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask) 781bf215546Sopenharmony_ci{ 782bf215546Sopenharmony_ci unsigned imgtexelsz = util_format_get_blocksize(imgfmt); 783bf215546Sopenharmony_ci unsigned nbufcomps = util_bitcount(mask); 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_ci if (nbufcomps == util_format_get_nr_components(imgfmt)) 786bf215546Sopenharmony_ci return imgtexelsz; 787bf215546Sopenharmony_ci 788bf215546Sopenharmony_ci /* Special case for Z24 buffers which are not tightly packed */ 789bf215546Sopenharmony_ci if (mask == 7 && imgtexelsz == 4) 790bf215546Sopenharmony_ci return 4; 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci /* Special case for S8 extraction from Z32_S8X24 */ 793bf215546Sopenharmony_ci if (mask == 2 && imgtexelsz == 8) 794bf215546Sopenharmony_ci return 1; 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci unsigned compsz = 797bf215546Sopenharmony_ci util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); 798bf215546Sopenharmony_ci 799bf215546Sopenharmony_ci assert(!(compsz % 8)); 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci return nbufcomps * compsz / 8; 802bf215546Sopenharmony_ci} 803bf215546Sopenharmony_ci 804bf215546Sopenharmony_cistatic enum pipe_format 805bf215546Sopenharmony_cipanvk_meta_copy_buf2img_format(enum pipe_format imgfmt) 806bf215546Sopenharmony_ci{ 807bf215546Sopenharmony_ci /* Pick blendable formats when we can, and the FLOAT variant matching the 808bf215546Sopenharmony_ci * texelsize otherwise. 809bf215546Sopenharmony_ci */ 810bf215546Sopenharmony_ci switch (util_format_get_blocksize(imgfmt)) { 811bf215546Sopenharmony_ci case 1: return PIPE_FORMAT_R8_UNORM; 812bf215546Sopenharmony_ci /* AFBC stores things differently for RGB565, 813bf215546Sopenharmony_ci * we can't simply map to R8G8 in that case */ 814bf215546Sopenharmony_ci case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || 815bf215546Sopenharmony_ci imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ? 816bf215546Sopenharmony_ci PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM; 817bf215546Sopenharmony_ci case 4: return PIPE_FORMAT_R8G8B8A8_UNORM; 818bf215546Sopenharmony_ci case 6: return PIPE_FORMAT_R16G16B16_UINT; 819bf215546Sopenharmony_ci case 8: return PIPE_FORMAT_R32G32_UINT; 820bf215546Sopenharmony_ci case 12: return PIPE_FORMAT_R32G32B32_UINT; 821bf215546Sopenharmony_ci case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 822bf215546Sopenharmony_ci default: unreachable("Invalid format\n"); 823bf215546Sopenharmony_ci } 824bf215546Sopenharmony_ci} 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_cistruct panvk_meta_copy_format_info { 827bf215546Sopenharmony_ci enum pipe_format imgfmt; 828bf215546Sopenharmony_ci unsigned mask; 829bf215546Sopenharmony_ci} PACKED; 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_cistatic const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = { 832bf215546Sopenharmony_ci { PIPE_FORMAT_R8_UNORM, 0x1 }, 833bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8_UNORM, 0x3 }, 834bf215546Sopenharmony_ci { PIPE_FORMAT_R5G6B5_UNORM, 0x7 }, 835bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf }, 836bf215546Sopenharmony_ci { PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 837bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, 0x3 }, 838bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 839bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 840bf215546Sopenharmony_ci /* S8 -> Z24S8 */ 841bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 }, 842bf215546Sopenharmony_ci /* S8 -> Z32_S8X24 */ 843bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, 0x2 }, 844bf215546Sopenharmony_ci /* Z24X8 -> Z24S8 */ 845bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 }, 846bf215546Sopenharmony_ci /* Z32 -> Z32_S8X24 */ 847bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, 0x1 }, 848bf215546Sopenharmony_ci}; 849bf215546Sopenharmony_ci 850bf215546Sopenharmony_cistruct panvk_meta_copy_buf2img_info { 851bf215546Sopenharmony_ci struct { 852bf215546Sopenharmony_ci mali_ptr ptr; 853bf215546Sopenharmony_ci struct { 854bf215546Sopenharmony_ci unsigned line; 855bf215546Sopenharmony_ci unsigned surf; 856bf215546Sopenharmony_ci } stride; 857bf215546Sopenharmony_ci } buf; 858bf215546Sopenharmony_ci} PACKED; 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci#define panvk_meta_copy_buf2img_get_info_field(b, field) \ 861bf215546Sopenharmony_ci nir_load_push_constant((b), 1, \ 862bf215546Sopenharmony_ci sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \ 863bf215546Sopenharmony_ci nir_imm_int(b, 0), \ 864bf215546Sopenharmony_ci .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \ 865bf215546Sopenharmony_ci .range = ~0) 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_cistatic mali_ptr 868bf215546Sopenharmony_cipanvk_meta_copy_buf2img_shader(struct panfrost_device *pdev, 869bf215546Sopenharmony_ci struct pan_pool *bin_pool, 870bf215546Sopenharmony_ci struct panvk_meta_copy_format_info key, 871bf215546Sopenharmony_ci struct pan_shader_info *shader_info) 872bf215546Sopenharmony_ci{ 873bf215546Sopenharmony_ci nir_builder b = 874bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 875bf215546Sopenharmony_ci GENX(pan_shader_get_compiler_options)(), 876bf215546Sopenharmony_ci "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)", 877bf215546Sopenharmony_ci util_format_name(key.imgfmt), 878bf215546Sopenharmony_ci key.mask); 879bf215546Sopenharmony_ci 880bf215546Sopenharmony_ci nir_variable *coord_var = 881bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_in, 882bf215546Sopenharmony_ci glsl_vector_type(GLSL_TYPE_FLOAT, 3), 883bf215546Sopenharmony_ci "coord"); 884bf215546Sopenharmony_ci coord_var->data.location = VARYING_SLOT_VAR0; 885bf215546Sopenharmony_ci nir_ssa_def *coord = nir_load_var(&b, coord_var); 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci coord = nir_f2u32(&b, coord); 888bf215546Sopenharmony_ci 889bf215546Sopenharmony_ci nir_ssa_def *bufptr = 890bf215546Sopenharmony_ci panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr); 891bf215546Sopenharmony_ci nir_ssa_def *buflinestride = 892bf215546Sopenharmony_ci panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line); 893bf215546Sopenharmony_ci nir_ssa_def *bufsurfstride = 894bf215546Sopenharmony_ci panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf); 895bf215546Sopenharmony_ci 896bf215546Sopenharmony_ci unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); 897bf215546Sopenharmony_ci unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 898bf215546Sopenharmony_ci unsigned writemask = key.mask; 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci nir_ssa_def *offset = 901bf215546Sopenharmony_ci nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); 902bf215546Sopenharmony_ci offset = nir_iadd(&b, offset, 903bf215546Sopenharmony_ci nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); 904bf215546Sopenharmony_ci offset = nir_iadd(&b, offset, 905bf215546Sopenharmony_ci nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); 906bf215546Sopenharmony_ci bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); 907bf215546Sopenharmony_ci 908bf215546Sopenharmony_ci unsigned imgcompsz = 909bf215546Sopenharmony_ci (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ? 910bf215546Sopenharmony_ci 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); 911bf215546Sopenharmony_ci 912bf215546Sopenharmony_ci unsigned nimgcomps = imgtexelsz / imgcompsz; 913bf215546Sopenharmony_ci unsigned bufcompsz = MIN2(buftexelsz, imgcompsz); 914bf215546Sopenharmony_ci unsigned nbufcomps = buftexelsz / bufcompsz; 915bf215546Sopenharmony_ci 916bf215546Sopenharmony_ci assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); 917bf215546Sopenharmony_ci assert(nbufcomps <= 4 && nimgcomps <= 4); 918bf215546Sopenharmony_ci 919bf215546Sopenharmony_ci nir_ssa_def *texel = 920bf215546Sopenharmony_ci nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8); 921bf215546Sopenharmony_ci 922bf215546Sopenharmony_ci enum glsl_base_type basetype; 923bf215546Sopenharmony_ci if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { 924bf215546Sopenharmony_ci texel = nir_vec3(&b, 925bf215546Sopenharmony_ci nir_iand_imm(&b, texel, BITFIELD_MASK(5)), 926bf215546Sopenharmony_ci nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)), 927bf215546Sopenharmony_ci nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5))); 928bf215546Sopenharmony_ci texel = nir_fmul(&b, 929bf215546Sopenharmony_ci nir_u2f32(&b, texel), 930bf215546Sopenharmony_ci nir_vec3(&b, 931bf215546Sopenharmony_ci nir_imm_float(&b, 1.0f / 31), 932bf215546Sopenharmony_ci nir_imm_float(&b, 1.0f / 63), 933bf215546Sopenharmony_ci nir_imm_float(&b, 1.0f / 31))); 934bf215546Sopenharmony_ci nimgcomps = 3; 935bf215546Sopenharmony_ci basetype = GLSL_TYPE_FLOAT; 936bf215546Sopenharmony_ci } else if (imgcompsz == 1) { 937bf215546Sopenharmony_ci assert(bufcompsz == 1); 938bf215546Sopenharmony_ci /* Blendable formats are unorm and the fixed-function blend unit 939bf215546Sopenharmony_ci * takes float values. 940bf215546Sopenharmony_ci */ 941bf215546Sopenharmony_ci texel = nir_fmul(&b, nir_u2f32(&b, texel), 942bf215546Sopenharmony_ci nir_imm_float(&b, 1.0f / 255)); 943bf215546Sopenharmony_ci basetype = GLSL_TYPE_FLOAT; 944bf215546Sopenharmony_ci } else { 945bf215546Sopenharmony_ci texel = nir_u2uN(&b, texel, imgcompsz * 8); 946bf215546Sopenharmony_ci basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT; 947bf215546Sopenharmony_ci } 948bf215546Sopenharmony_ci 949bf215546Sopenharmony_ci /* We always pass the texel using 32-bit regs for now */ 950bf215546Sopenharmony_ci nir_variable *out = 951bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, 952bf215546Sopenharmony_ci glsl_vector_type(basetype, nimgcomps), 953bf215546Sopenharmony_ci "out"); 954bf215546Sopenharmony_ci out->data.location = FRAG_RESULT_DATA0; 955bf215546Sopenharmony_ci 956bf215546Sopenharmony_ci uint16_t fullmask = (1 << nimgcomps) - 1; 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci assert(fullmask >= writemask); 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci if (fullmask != writemask) { 961bf215546Sopenharmony_ci unsigned first_written_comp = ffs(writemask) - 1; 962bf215546Sopenharmony_ci nir_ssa_def *oldtexel = NULL; 963bf215546Sopenharmony_ci if (imgcompsz > 1) 964bf215546Sopenharmony_ci oldtexel = nir_load_var(&b, out); 965bf215546Sopenharmony_ci 966bf215546Sopenharmony_ci nir_ssa_def *texel_comps[4]; 967bf215546Sopenharmony_ci for (unsigned i = 0; i < nimgcomps; i++) { 968bf215546Sopenharmony_ci if (writemask & BITFIELD_BIT(i)) 969bf215546Sopenharmony_ci texel_comps[i] = nir_channel(&b, texel, i - first_written_comp); 970bf215546Sopenharmony_ci else if (imgcompsz > 1) 971bf215546Sopenharmony_ci texel_comps[i] = nir_channel(&b, oldtexel, i); 972bf215546Sopenharmony_ci else 973bf215546Sopenharmony_ci texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size); 974bf215546Sopenharmony_ci } 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci texel = nir_vec(&b, texel_comps, nimgcomps); 977bf215546Sopenharmony_ci } 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci nir_store_var(&b, out, texel, 0xff); 980bf215546Sopenharmony_ci 981bf215546Sopenharmony_ci struct panfrost_compile_inputs inputs = { 982bf215546Sopenharmony_ci .gpu_id = pdev->gpu_id, 983bf215546Sopenharmony_ci .is_blit = true, 984bf215546Sopenharmony_ci .no_ubo_to_push = true, 985bf215546Sopenharmony_ci }; 986bf215546Sopenharmony_ci 987bf215546Sopenharmony_ci pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) { 988bf215546Sopenharmony_ci cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12; 989bf215546Sopenharmony_ci cfg.register_format = imgcompsz == 2 ? 990bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_U16 : 991bf215546Sopenharmony_ci MALI_REGISTER_FILE_FORMAT_U32; 992bf215546Sopenharmony_ci } 993bf215546Sopenharmony_ci inputs.bifrost.static_rt_conv = true; 994bf215546Sopenharmony_ci 995bf215546Sopenharmony_ci struct util_dynarray binary; 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci util_dynarray_init(&binary, NULL); 998bf215546Sopenharmony_ci GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 999bf215546Sopenharmony_ci shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4); 1000bf215546Sopenharmony_ci 1001bf215546Sopenharmony_ci mali_ptr shader = 1002bf215546Sopenharmony_ci pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1003bf215546Sopenharmony_ci 1004bf215546Sopenharmony_ci util_dynarray_fini(&binary); 1005bf215546Sopenharmony_ci ralloc_free(b.shader); 1006bf215546Sopenharmony_ci 1007bf215546Sopenharmony_ci return shader; 1008bf215546Sopenharmony_ci} 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_cistatic unsigned 1011bf215546Sopenharmony_cipanvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key) 1012bf215546Sopenharmony_ci{ 1013bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { 1014bf215546Sopenharmony_ci if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key))) 1015bf215546Sopenharmony_ci return i; 1016bf215546Sopenharmony_ci } 1017bf215546Sopenharmony_ci 1018bf215546Sopenharmony_ci unreachable("Invalid image format\n"); 1019bf215546Sopenharmony_ci} 1020bf215546Sopenharmony_ci 1021bf215546Sopenharmony_cistatic void 1022bf215546Sopenharmony_cipanvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf, 1023bf215546Sopenharmony_ci const struct panvk_buffer *buf, 1024bf215546Sopenharmony_ci const struct panvk_image *img, 1025bf215546Sopenharmony_ci const VkBufferImageCopy2 *region) 1026bf215546Sopenharmony_ci{ 1027bf215546Sopenharmony_ci struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 1028bf215546Sopenharmony_ci unsigned minx = MAX2(region->imageOffset.x, 0); 1029bf215546Sopenharmony_ci unsigned miny = MAX2(region->imageOffset.y, 0); 1030bf215546Sopenharmony_ci unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0); 1031bf215546Sopenharmony_ci unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); 1032bf215546Sopenharmony_ci 1033bf215546Sopenharmony_ci mali_ptr vpd = 1034bf215546Sopenharmony_ci panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, 1035bf215546Sopenharmony_ci minx, miny, maxx, maxy); 1036bf215546Sopenharmony_ci 1037bf215546Sopenharmony_ci float dst_rect[] = { 1038bf215546Sopenharmony_ci minx, miny, 0.0, 1.0, 1039bf215546Sopenharmony_ci maxx + 1, miny, 0.0, 1.0, 1040bf215546Sopenharmony_ci minx, maxy + 1, 0.0, 1.0, 1041bf215546Sopenharmony_ci maxx + 1, maxy + 1, 0.0, 1.0, 1042bf215546Sopenharmony_ci }; 1043bf215546Sopenharmony_ci mali_ptr dst_coords = 1044bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect, 1045bf215546Sopenharmony_ci sizeof(dst_rect), 64); 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci struct panvk_meta_copy_format_info key = { 1048bf215546Sopenharmony_ci .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format), 1049bf215546Sopenharmony_ci .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, 1050bf215546Sopenharmony_ci region->imageSubresource.aspectMask), 1051bf215546Sopenharmony_ci }; 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key); 1054bf215546Sopenharmony_ci 1055bf215546Sopenharmony_ci mali_ptr rsd = 1056bf215546Sopenharmony_ci cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd; 1057bf215546Sopenharmony_ci 1058bf215546Sopenharmony_ci const struct vk_image_buffer_layout buflayout = 1059bf215546Sopenharmony_ci vk_image_buffer_copy_layout(&img->vk, region); 1060bf215546Sopenharmony_ci struct panvk_meta_copy_buf2img_info info = { 1061bf215546Sopenharmony_ci .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset), 1062bf215546Sopenharmony_ci .buf.stride.line = buflayout.row_stride_B, 1063bf215546Sopenharmony_ci .buf.stride.surf = buflayout.image_stride_B, 1064bf215546Sopenharmony_ci }; 1065bf215546Sopenharmony_ci 1066bf215546Sopenharmony_ci mali_ptr pushconsts = 1067bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci struct pan_image_view view = { 1070bf215546Sopenharmony_ci .format = key.imgfmt, 1071bf215546Sopenharmony_ci .dim = MALI_TEXTURE_DIMENSION_2D, 1072bf215546Sopenharmony_ci .image = &img->pimage, 1073bf215546Sopenharmony_ci .nr_samples = img->pimage.layout.nr_samples, 1074bf215546Sopenharmony_ci .first_level = region->imageSubresource.mipLevel, 1075bf215546Sopenharmony_ci .last_level = region->imageSubresource.mipLevel, 1076bf215546Sopenharmony_ci .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 1077bf215546Sopenharmony_ci }; 1078bf215546Sopenharmony_ci 1079bf215546Sopenharmony_ci /* TODO: don't force preloads of dst resources if unneeded */ 1080bf215546Sopenharmony_ci cmdbuf->state.fb.crc_valid[0] = false; 1081bf215546Sopenharmony_ci *fbinfo = (struct pan_fb_info){ 1082bf215546Sopenharmony_ci .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel), 1083bf215546Sopenharmony_ci .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel), 1084bf215546Sopenharmony_ci .extent.minx = minx, 1085bf215546Sopenharmony_ci .extent.maxx = maxx, 1086bf215546Sopenharmony_ci .extent.miny = miny, 1087bf215546Sopenharmony_ci .extent.maxy = maxy, 1088bf215546Sopenharmony_ci .nr_samples = 1, 1089bf215546Sopenharmony_ci .rt_count = 1, 1090bf215546Sopenharmony_ci .rts[0].view = &view, 1091bf215546Sopenharmony_ci .rts[0].preload = true, 1092bf215546Sopenharmony_ci .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0], 1093bf215546Sopenharmony_ci }; 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_ci assert(region->imageSubresource.layerCount == 1 || 1098bf215546Sopenharmony_ci region->imageExtent.depth == 1); 1099bf215546Sopenharmony_ci assert(region->imageOffset.z >= 0); 1100bf215546Sopenharmony_ci unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z); 1101bf215546Sopenharmony_ci unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth); 1102bf215546Sopenharmony_ci for (unsigned l = 0; l < nlayers; l++) { 1103bf215546Sopenharmony_ci float src_rect[] = { 1104bf215546Sopenharmony_ci 0, 0, l, 1.0, 1105bf215546Sopenharmony_ci region->imageExtent.width, 0, l, 1.0, 1106bf215546Sopenharmony_ci 0, region->imageExtent.height, l, 1.0, 1107bf215546Sopenharmony_ci region->imageExtent.width, region->imageExtent.height, l, 1.0, 1108bf215546Sopenharmony_ci }; 1109bf215546Sopenharmony_ci 1110bf215546Sopenharmony_ci mali_ptr src_coords = 1111bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect, 1112bf215546Sopenharmony_ci sizeof(src_rect), 64); 1113bf215546Sopenharmony_ci 1114bf215546Sopenharmony_ci struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1115bf215546Sopenharmony_ci 1116bf215546Sopenharmony_ci view.first_layer = view.last_layer = l + first_layer; 1117bf215546Sopenharmony_ci batch->blit.src = buf->bo; 1118bf215546Sopenharmony_ci batch->blit.dst = img->pimage.data.bo; 1119bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 1120bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 1121bf215546Sopenharmony_ci panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci mali_ptr tsd, tiler; 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_ci tsd = batch->tls.gpu; 1126bf215546Sopenharmony_ci tiler = batch->tiler.descs.gpu; 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci struct panfrost_ptr job; 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, 1131bf215546Sopenharmony_ci &batch->scoreboard, 1132bf215546Sopenharmony_ci src_coords, dst_coords, 1133bf215546Sopenharmony_ci 0, 0, pushconsts, 1134bf215546Sopenharmony_ci vpd, rsd, tsd, tiler); 1135bf215546Sopenharmony_ci 1136bf215546Sopenharmony_ci util_dynarray_append(&batch->jobs, void *, job.cpu); 1137bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci} 1140bf215546Sopenharmony_ci 1141bf215546Sopenharmony_cistatic void 1142bf215546Sopenharmony_cipanvk_meta_copy_buf2img_init(struct panvk_physical_device *dev) 1143bf215546Sopenharmony_ci{ 1144bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS); 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { 1147bf215546Sopenharmony_ci struct pan_shader_info shader_info; 1148bf215546Sopenharmony_ci mali_ptr shader = 1149bf215546Sopenharmony_ci panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 1150bf215546Sopenharmony_ci panvk_meta_copy_buf2img_fmts[i], 1151bf215546Sopenharmony_ci &shader_info); 1152bf215546Sopenharmony_ci dev->meta.copy.buf2img[i].rsd = 1153bf215546Sopenharmony_ci panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 1154bf215546Sopenharmony_ci shader, &shader_info, 1155bf215546Sopenharmony_ci panvk_meta_copy_buf2img_fmts[i].imgfmt, 1156bf215546Sopenharmony_ci panvk_meta_copy_buf2img_fmts[i].mask, 1157bf215546Sopenharmony_ci false); 1158bf215546Sopenharmony_ci } 1159bf215546Sopenharmony_ci} 1160bf215546Sopenharmony_ci 1161bf215546Sopenharmony_civoid 1162bf215546Sopenharmony_cipanvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer, 1163bf215546Sopenharmony_ci const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) 1164bf215546Sopenharmony_ci{ 1165bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1166bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer); 1167bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage); 1168bf215546Sopenharmony_ci 1169bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) { 1170bf215546Sopenharmony_ci panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]); 1171bf215546Sopenharmony_ci } 1172bf215546Sopenharmony_ci} 1173bf215546Sopenharmony_ci 1174bf215546Sopenharmony_cistatic const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = { 1175bf215546Sopenharmony_ci { PIPE_FORMAT_R8_UINT, 0x1 }, 1176bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8_UINT, 0x3 }, 1177bf215546Sopenharmony_ci { PIPE_FORMAT_R5G6B5_UNORM, 0x7 }, 1178bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UINT, 0xf }, 1179bf215546Sopenharmony_ci { PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 1180bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, 0x3 }, 1181bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 1182bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 1183bf215546Sopenharmony_ci /* S8 -> Z24S8 */ 1184bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 }, 1185bf215546Sopenharmony_ci /* S8 -> Z32_S8X24 */ 1186bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, 0x2 }, 1187bf215546Sopenharmony_ci /* Z24X8 -> Z24S8 */ 1188bf215546Sopenharmony_ci { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 }, 1189bf215546Sopenharmony_ci /* Z32 -> Z32_S8X24 */ 1190bf215546Sopenharmony_ci { PIPE_FORMAT_R32G32_UINT, 0x1 }, 1191bf215546Sopenharmony_ci}; 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_cistatic enum pipe_format 1194bf215546Sopenharmony_cipanvk_meta_copy_img2buf_format(enum pipe_format imgfmt) 1195bf215546Sopenharmony_ci{ 1196bf215546Sopenharmony_ci /* Pick blendable formats when we can, and the FLOAT variant matching the 1197bf215546Sopenharmony_ci * texelsize otherwise. 1198bf215546Sopenharmony_ci */ 1199bf215546Sopenharmony_ci switch (util_format_get_blocksize(imgfmt)) { 1200bf215546Sopenharmony_ci case 1: return PIPE_FORMAT_R8_UINT; 1201bf215546Sopenharmony_ci /* AFBC stores things differently for RGB565, 1202bf215546Sopenharmony_ci * we can't simply map to R8G8 in that case */ 1203bf215546Sopenharmony_ci case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || 1204bf215546Sopenharmony_ci imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ? 1205bf215546Sopenharmony_ci PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT; 1206bf215546Sopenharmony_ci case 4: return PIPE_FORMAT_R8G8B8A8_UINT; 1207bf215546Sopenharmony_ci case 6: return PIPE_FORMAT_R16G16B16_UINT; 1208bf215546Sopenharmony_ci case 8: return PIPE_FORMAT_R32G32_UINT; 1209bf215546Sopenharmony_ci case 12: return PIPE_FORMAT_R32G32B32_UINT; 1210bf215546Sopenharmony_ci case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 1211bf215546Sopenharmony_ci default: unreachable("Invalid format\n"); 1212bf215546Sopenharmony_ci } 1213bf215546Sopenharmony_ci} 1214bf215546Sopenharmony_ci 1215bf215546Sopenharmony_cistruct panvk_meta_copy_img2buf_info { 1216bf215546Sopenharmony_ci struct { 1217bf215546Sopenharmony_ci mali_ptr ptr; 1218bf215546Sopenharmony_ci struct { 1219bf215546Sopenharmony_ci unsigned line; 1220bf215546Sopenharmony_ci unsigned surf; 1221bf215546Sopenharmony_ci } stride; 1222bf215546Sopenharmony_ci } buf; 1223bf215546Sopenharmony_ci struct { 1224bf215546Sopenharmony_ci struct { 1225bf215546Sopenharmony_ci unsigned x, y, z; 1226bf215546Sopenharmony_ci } offset; 1227bf215546Sopenharmony_ci struct { 1228bf215546Sopenharmony_ci unsigned minx, miny, maxx, maxy; 1229bf215546Sopenharmony_ci } extent; 1230bf215546Sopenharmony_ci } img; 1231bf215546Sopenharmony_ci} PACKED; 1232bf215546Sopenharmony_ci 1233bf215546Sopenharmony_ci#define panvk_meta_copy_img2buf_get_info_field(b, field) \ 1234bf215546Sopenharmony_ci nir_load_push_constant((b), 1, \ 1235bf215546Sopenharmony_ci sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \ 1236bf215546Sopenharmony_ci nir_imm_int(b, 0), \ 1237bf215546Sopenharmony_ci .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \ 1238bf215546Sopenharmony_ci .range = ~0) 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_cistatic mali_ptr 1241bf215546Sopenharmony_cipanvk_meta_copy_img2buf_shader(struct panfrost_device *pdev, 1242bf215546Sopenharmony_ci struct pan_pool *bin_pool, 1243bf215546Sopenharmony_ci struct panvk_meta_copy_format_info key, 1244bf215546Sopenharmony_ci unsigned texdim, unsigned texisarray, 1245bf215546Sopenharmony_ci struct pan_shader_info *shader_info) 1246bf215546Sopenharmony_ci{ 1247bf215546Sopenharmony_ci unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); 1248bf215546Sopenharmony_ci unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1249bf215546Sopenharmony_ci 1250bf215546Sopenharmony_ci /* FIXME: Won't work on compute queues, but we can't do that with 1251bf215546Sopenharmony_ci * a compute shader if the destination is an AFBC surface. 1252bf215546Sopenharmony_ci */ 1253bf215546Sopenharmony_ci nir_builder b = 1254bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1255bf215546Sopenharmony_ci GENX(pan_shader_get_compiler_options)(), 1256bf215546Sopenharmony_ci "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)", 1257bf215546Sopenharmony_ci texdim, texisarray ? "[]" : "", 1258bf215546Sopenharmony_ci util_format_name(key.imgfmt), 1259bf215546Sopenharmony_ci key.mask); 1260bf215546Sopenharmony_ci 1261bf215546Sopenharmony_ci nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1262bf215546Sopenharmony_ci nir_ssa_def *bufptr = 1263bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr); 1264bf215546Sopenharmony_ci nir_ssa_def *buflinestride = 1265bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line); 1266bf215546Sopenharmony_ci nir_ssa_def *bufsurfstride = 1267bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf); 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_ci nir_ssa_def *imgminx = 1270bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx); 1271bf215546Sopenharmony_ci nir_ssa_def *imgminy = 1272bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny); 1273bf215546Sopenharmony_ci nir_ssa_def *imgmaxx = 1274bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx); 1275bf215546Sopenharmony_ci nir_ssa_def *imgmaxy = 1276bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy); 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci nir_ssa_def *imgcoords, *inbounds; 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci switch (texdim + texisarray) { 1281bf215546Sopenharmony_ci case 1: 1282bf215546Sopenharmony_ci imgcoords = 1283bf215546Sopenharmony_ci nir_iadd(&b, 1284bf215546Sopenharmony_ci nir_channel(&b, coord, 0), 1285bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)); 1286bf215546Sopenharmony_ci inbounds = 1287bf215546Sopenharmony_ci nir_iand(&b, 1288bf215546Sopenharmony_ci nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1289bf215546Sopenharmony_ci nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx)); 1290bf215546Sopenharmony_ci break; 1291bf215546Sopenharmony_ci case 2: 1292bf215546Sopenharmony_ci imgcoords = 1293bf215546Sopenharmony_ci nir_vec2(&b, 1294bf215546Sopenharmony_ci nir_iadd(&b, 1295bf215546Sopenharmony_ci nir_channel(&b, coord, 0), 1296bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), 1297bf215546Sopenharmony_ci nir_iadd(&b, 1298bf215546Sopenharmony_ci nir_channel(&b, coord, 1), 1299bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); 1300bf215546Sopenharmony_ci inbounds = 1301bf215546Sopenharmony_ci nir_iand(&b, 1302bf215546Sopenharmony_ci nir_iand(&b, 1303bf215546Sopenharmony_ci nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1304bf215546Sopenharmony_ci nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), 1305bf215546Sopenharmony_ci nir_iand(&b, 1306bf215546Sopenharmony_ci nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), 1307bf215546Sopenharmony_ci nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); 1308bf215546Sopenharmony_ci break; 1309bf215546Sopenharmony_ci case 3: 1310bf215546Sopenharmony_ci imgcoords = 1311bf215546Sopenharmony_ci nir_vec3(&b, 1312bf215546Sopenharmony_ci nir_iadd(&b, 1313bf215546Sopenharmony_ci nir_channel(&b, coord, 0), 1314bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), 1315bf215546Sopenharmony_ci nir_iadd(&b, 1316bf215546Sopenharmony_ci nir_channel(&b, coord, 1), 1317bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)), 1318bf215546Sopenharmony_ci nir_iadd(&b, 1319bf215546Sopenharmony_ci nir_channel(&b, coord, 2), 1320bf215546Sopenharmony_ci panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); 1321bf215546Sopenharmony_ci inbounds = 1322bf215546Sopenharmony_ci nir_iand(&b, 1323bf215546Sopenharmony_ci nir_iand(&b, 1324bf215546Sopenharmony_ci nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1325bf215546Sopenharmony_ci nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), 1326bf215546Sopenharmony_ci nir_iand(&b, 1327bf215546Sopenharmony_ci nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), 1328bf215546Sopenharmony_ci nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); 1329bf215546Sopenharmony_ci break; 1330bf215546Sopenharmony_ci default: 1331bf215546Sopenharmony_ci unreachable("Invalid texture dimension\n"); 1332bf215546Sopenharmony_ci } 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_ci nir_push_if(&b, inbounds); 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4 1337bf215546Sopenharmony_ci * blocks instead of 16x16 texels in that case, and there's nothing we can 1338bf215546Sopenharmony_ci * do to force the tile size to 4x4 in the render path. 1339bf215546Sopenharmony_ci * This being said, compressed textures are not compatible with AFBC, so we 1340bf215546Sopenharmony_ci * could use a compute shader arranging the blocks properly. 1341bf215546Sopenharmony_ci */ 1342bf215546Sopenharmony_ci nir_ssa_def *offset = 1343bf215546Sopenharmony_ci nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); 1344bf215546Sopenharmony_ci offset = nir_iadd(&b, offset, 1345bf215546Sopenharmony_ci nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); 1346bf215546Sopenharmony_ci offset = nir_iadd(&b, offset, 1347bf215546Sopenharmony_ci nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); 1348bf215546Sopenharmony_ci bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); 1349bf215546Sopenharmony_ci 1350bf215546Sopenharmony_ci unsigned imgcompsz = imgtexelsz <= 4 ? 1351bf215546Sopenharmony_ci 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); 1352bf215546Sopenharmony_ci unsigned nimgcomps = imgtexelsz / imgcompsz; 1353bf215546Sopenharmony_ci assert(nimgcomps <= 4); 1354bf215546Sopenharmony_ci 1355bf215546Sopenharmony_ci nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); 1356bf215546Sopenharmony_ci tex->op = nir_texop_txf; 1357bf215546Sopenharmony_ci tex->texture_index = 0; 1358bf215546Sopenharmony_ci tex->is_array = texisarray; 1359bf215546Sopenharmony_ci tex->dest_type = util_format_is_unorm(key.imgfmt) ? 1360bf215546Sopenharmony_ci nir_type_float32 : nir_type_uint32; 1361bf215546Sopenharmony_ci 1362bf215546Sopenharmony_ci switch (texdim) { 1363bf215546Sopenharmony_ci case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break; 1364bf215546Sopenharmony_ci case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break; 1365bf215546Sopenharmony_ci case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break; 1366bf215546Sopenharmony_ci default: unreachable("Invalid texture dimension"); 1367bf215546Sopenharmony_ci } 1368bf215546Sopenharmony_ci 1369bf215546Sopenharmony_ci tex->src[0].src_type = nir_tex_src_coord; 1370bf215546Sopenharmony_ci tex->src[0].src = nir_src_for_ssa(imgcoords); 1371bf215546Sopenharmony_ci tex->coord_components = texdim + texisarray; 1372bf215546Sopenharmony_ci nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 1373bf215546Sopenharmony_ci nir_alu_type_get_type_size(tex->dest_type), NULL); 1374bf215546Sopenharmony_ci nir_builder_instr_insert(&b, &tex->instr); 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci nir_ssa_def *texel = &tex->dest.ssa; 1377bf215546Sopenharmony_ci 1378bf215546Sopenharmony_ci unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1; 1379bf215546Sopenharmony_ci unsigned nbufcomps = util_bitcount(fullmask); 1380bf215546Sopenharmony_ci if (key.mask != fullmask) { 1381bf215546Sopenharmony_ci nir_ssa_def *bufcomps[4]; 1382bf215546Sopenharmony_ci nbufcomps = 0; 1383bf215546Sopenharmony_ci for (unsigned i = 0; i < nimgcomps; i++) { 1384bf215546Sopenharmony_ci if (key.mask & BITFIELD_BIT(i)) 1385bf215546Sopenharmony_ci bufcomps[nbufcomps++] = nir_channel(&b, texel, i); 1386bf215546Sopenharmony_ci } 1387bf215546Sopenharmony_ci 1388bf215546Sopenharmony_ci texel = nir_vec(&b, bufcomps, nbufcomps); 1389bf215546Sopenharmony_ci } 1390bf215546Sopenharmony_ci 1391bf215546Sopenharmony_ci unsigned bufcompsz = buftexelsz / nbufcomps; 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_ci if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { 1394bf215546Sopenharmony_ci texel = nir_fmul(&b, texel, 1395bf215546Sopenharmony_ci nir_vec3(&b, 1396bf215546Sopenharmony_ci nir_imm_float(&b, 31), 1397bf215546Sopenharmony_ci nir_imm_float(&b, 63), 1398bf215546Sopenharmony_ci nir_imm_float(&b, 31))); 1399bf215546Sopenharmony_ci texel = nir_f2u16(&b, texel); 1400bf215546Sopenharmony_ci texel = nir_ior(&b, nir_channel(&b, texel, 0), 1401bf215546Sopenharmony_ci nir_ior(&b, 1402bf215546Sopenharmony_ci nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)), 1403bf215546Sopenharmony_ci nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11)))); 1404bf215546Sopenharmony_ci imgcompsz = 2; 1405bf215546Sopenharmony_ci bufcompsz = 2; 1406bf215546Sopenharmony_ci nbufcomps = 1; 1407bf215546Sopenharmony_ci nimgcomps = 1; 1408bf215546Sopenharmony_ci } else if (imgcompsz == 1) { 1409bf215546Sopenharmony_ci nir_ssa_def *packed = nir_channel(&b, texel, 0); 1410bf215546Sopenharmony_ci for (unsigned i = 1; i < nbufcomps; i++) { 1411bf215546Sopenharmony_ci packed = nir_ior(&b, packed, 1412bf215546Sopenharmony_ci nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff), 1413bf215546Sopenharmony_ci nir_imm_int(&b, i * 8))); 1414bf215546Sopenharmony_ci } 1415bf215546Sopenharmony_ci texel = packed; 1416bf215546Sopenharmony_ci 1417bf215546Sopenharmony_ci bufcompsz = nbufcomps == 3 ? 4 : nbufcomps; 1418bf215546Sopenharmony_ci nbufcomps = 1; 1419bf215546Sopenharmony_ci } 1420bf215546Sopenharmony_ci 1421bf215546Sopenharmony_ci assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); 1422bf215546Sopenharmony_ci assert(nbufcomps <= 4 && nimgcomps <= 4); 1423bf215546Sopenharmony_ci texel = nir_u2uN(&b, texel, bufcompsz * 8); 1424bf215546Sopenharmony_ci 1425bf215546Sopenharmony_ci nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1); 1426bf215546Sopenharmony_ci nir_pop_if(&b, NULL); 1427bf215546Sopenharmony_ci 1428bf215546Sopenharmony_ci struct panfrost_compile_inputs inputs = { 1429bf215546Sopenharmony_ci .gpu_id = pdev->gpu_id, 1430bf215546Sopenharmony_ci .is_blit = true, 1431bf215546Sopenharmony_ci .no_ubo_to_push = true, 1432bf215546Sopenharmony_ci }; 1433bf215546Sopenharmony_ci 1434bf215546Sopenharmony_ci struct util_dynarray binary; 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci util_dynarray_init(&binary, NULL); 1437bf215546Sopenharmony_ci GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1438bf215546Sopenharmony_ci 1439bf215546Sopenharmony_ci shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4); 1440bf215546Sopenharmony_ci 1441bf215546Sopenharmony_ci mali_ptr shader = 1442bf215546Sopenharmony_ci pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1443bf215546Sopenharmony_ci 1444bf215546Sopenharmony_ci util_dynarray_fini(&binary); 1445bf215546Sopenharmony_ci ralloc_free(b.shader); 1446bf215546Sopenharmony_ci 1447bf215546Sopenharmony_ci return shader; 1448bf215546Sopenharmony_ci} 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_cistatic unsigned 1451bf215546Sopenharmony_cipanvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key) 1452bf215546Sopenharmony_ci{ 1453bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { 1454bf215546Sopenharmony_ci if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key))) 1455bf215546Sopenharmony_ci return i; 1456bf215546Sopenharmony_ci } 1457bf215546Sopenharmony_ci 1458bf215546Sopenharmony_ci unreachable("Invalid texel size\n"); 1459bf215546Sopenharmony_ci} 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_cistatic void 1462bf215546Sopenharmony_cipanvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf, 1463bf215546Sopenharmony_ci const struct panvk_buffer *buf, 1464bf215546Sopenharmony_ci const struct panvk_image *img, 1465bf215546Sopenharmony_ci const VkBufferImageCopy2 *region) 1466bf215546Sopenharmony_ci{ 1467bf215546Sopenharmony_ci struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 1468bf215546Sopenharmony_ci struct panvk_meta_copy_format_info key = { 1469bf215546Sopenharmony_ci .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format), 1470bf215546Sopenharmony_ci .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, 1471bf215546Sopenharmony_ci region->imageSubresource.aspectMask), 1472bf215546Sopenharmony_ci }; 1473bf215546Sopenharmony_ci unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1474bf215546Sopenharmony_ci unsigned texdimidx = 1475bf215546Sopenharmony_ci panvk_meta_copy_tex_type(img->pimage.layout.dim, 1476bf215546Sopenharmony_ci img->pimage.layout.array_size > 1); 1477bf215546Sopenharmony_ci unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key); 1478bf215546Sopenharmony_ci 1479bf215546Sopenharmony_ci mali_ptr rsd = 1480bf215546Sopenharmony_ci cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd; 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci struct panvk_meta_copy_img2buf_info info = { 1483bf215546Sopenharmony_ci .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset), 1484bf215546Sopenharmony_ci .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz, 1485bf215546Sopenharmony_ci .img.offset.x = MAX2(region->imageOffset.x & ~15, 0), 1486bf215546Sopenharmony_ci .img.extent.minx = MAX2(region->imageOffset.x, 0), 1487bf215546Sopenharmony_ci .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0), 1488bf215546Sopenharmony_ci }; 1489bf215546Sopenharmony_ci 1490bf215546Sopenharmony_ci if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) { 1491bf215546Sopenharmony_ci info.img.extent.maxy = region->imageSubresource.layerCount - 1; 1492bf215546Sopenharmony_ci } else { 1493bf215546Sopenharmony_ci info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0); 1494bf215546Sopenharmony_ci info.img.offset.z = MAX2(region->imageOffset.z, 0); 1495bf215546Sopenharmony_ci info.img.extent.miny = MAX2(region->imageOffset.y, 0); 1496bf215546Sopenharmony_ci info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); 1497bf215546Sopenharmony_ci } 1498bf215546Sopenharmony_ci 1499bf215546Sopenharmony_ci info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) * 1500bf215546Sopenharmony_ci info.buf.stride.line; 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci mali_ptr pushconsts = 1503bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_ci struct pan_image_view view = { 1506bf215546Sopenharmony_ci .format = key.imgfmt, 1507bf215546Sopenharmony_ci .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 1508bf215546Sopenharmony_ci MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim, 1509bf215546Sopenharmony_ci .image = &img->pimage, 1510bf215546Sopenharmony_ci .nr_samples = img->pimage.layout.nr_samples, 1511bf215546Sopenharmony_ci .first_level = region->imageSubresource.mipLevel, 1512bf215546Sopenharmony_ci .last_level = region->imageSubresource.mipLevel, 1513bf215546Sopenharmony_ci .first_layer = region->imageSubresource.baseArrayLayer, 1514bf215546Sopenharmony_ci .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1, 1515bf215546Sopenharmony_ci .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 1516bf215546Sopenharmony_ci }; 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci mali_ptr texture = 1519bf215546Sopenharmony_ci panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view); 1520bf215546Sopenharmony_ci mali_ptr sampler = 1521bf215546Sopenharmony_ci panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base); 1522bf215546Sopenharmony_ci 1523bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1524bf215546Sopenharmony_ci 1525bf215546Sopenharmony_ci struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1526bf215546Sopenharmony_ci 1527bf215546Sopenharmony_ci struct pan_tls_info tlsinfo = { 0 }; 1528bf215546Sopenharmony_ci 1529bf215546Sopenharmony_ci batch->blit.src = img->pimage.data.bo; 1530bf215546Sopenharmony_ci batch->blit.dst = buf->bo; 1531bf215546Sopenharmony_ci batch->tls = 1532bf215546Sopenharmony_ci pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); 1533bf215546Sopenharmony_ci GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu); 1534bf215546Sopenharmony_ci 1535bf215546Sopenharmony_ci mali_ptr tsd = batch->tls.gpu; 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_ci struct pan_compute_dim wg_sz = { 1538bf215546Sopenharmony_ci 16, 1539bf215546Sopenharmony_ci img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16, 1540bf215546Sopenharmony_ci 1, 1541bf215546Sopenharmony_ci }; 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci struct pan_compute_dim num_wg = { 1544bf215546Sopenharmony_ci (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16, 1545bf215546Sopenharmony_ci img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1546bf215546Sopenharmony_ci region->imageSubresource.layerCount : 1547bf215546Sopenharmony_ci (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16, 1548bf215546Sopenharmony_ci img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ? 1549bf215546Sopenharmony_ci MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1, 1550bf215546Sopenharmony_ci }; 1551bf215546Sopenharmony_ci 1552bf215546Sopenharmony_ci struct panfrost_ptr job = 1553bf215546Sopenharmony_ci panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1554bf215546Sopenharmony_ci &batch->scoreboard, &num_wg, &wg_sz, 1555bf215546Sopenharmony_ci texture, sampler, 1556bf215546Sopenharmony_ci pushconsts, rsd, tsd); 1557bf215546Sopenharmony_ci 1558bf215546Sopenharmony_ci util_dynarray_append(&batch->jobs, void *, job.cpu); 1559bf215546Sopenharmony_ci 1560bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1561bf215546Sopenharmony_ci} 1562bf215546Sopenharmony_ci 1563bf215546Sopenharmony_cistatic void 1564bf215546Sopenharmony_cipanvk_meta_copy_img2buf_init(struct panvk_physical_device *dev) 1565bf215546Sopenharmony_ci{ 1566bf215546Sopenharmony_ci STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS); 1567bf215546Sopenharmony_ci 1568bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { 1569bf215546Sopenharmony_ci for (unsigned texdim = 1; texdim <= 3; texdim++) { 1570bf215546Sopenharmony_ci unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); 1571bf215546Sopenharmony_ci assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); 1572bf215546Sopenharmony_ci 1573bf215546Sopenharmony_ci struct pan_shader_info shader_info; 1574bf215546Sopenharmony_ci mali_ptr shader = 1575bf215546Sopenharmony_ci panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1576bf215546Sopenharmony_ci panvk_meta_copy_img2buf_fmts[i], 1577bf215546Sopenharmony_ci texdim, false, &shader_info); 1578bf215546Sopenharmony_ci dev->meta.copy.img2buf[texdimidx][i].rsd = 1579bf215546Sopenharmony_ci panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, 1580bf215546Sopenharmony_ci &dev->meta.desc_pool.base, 1581bf215546Sopenharmony_ci shader, &shader_info, true); 1582bf215546Sopenharmony_ci 1583bf215546Sopenharmony_ci if (texdim == 3) 1584bf215546Sopenharmony_ci continue; 1585bf215546Sopenharmony_ci 1586bf215546Sopenharmony_ci memset(&shader_info, 0, sizeof(shader_info)); 1587bf215546Sopenharmony_ci texdimidx = panvk_meta_copy_tex_type(texdim, true); 1588bf215546Sopenharmony_ci assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); 1589bf215546Sopenharmony_ci shader = 1590bf215546Sopenharmony_ci panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1591bf215546Sopenharmony_ci panvk_meta_copy_img2buf_fmts[i], 1592bf215546Sopenharmony_ci texdim, true, &shader_info); 1593bf215546Sopenharmony_ci dev->meta.copy.img2buf[texdimidx][i].rsd = 1594bf215546Sopenharmony_ci panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, 1595bf215546Sopenharmony_ci &dev->meta.desc_pool.base, 1596bf215546Sopenharmony_ci shader, &shader_info, true); 1597bf215546Sopenharmony_ci } 1598bf215546Sopenharmony_ci } 1599bf215546Sopenharmony_ci} 1600bf215546Sopenharmony_ci 1601bf215546Sopenharmony_civoid 1602bf215546Sopenharmony_cipanvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer, 1603bf215546Sopenharmony_ci const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) 1604bf215546Sopenharmony_ci{ 1605bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1606bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer); 1607bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage); 1608bf215546Sopenharmony_ci 1609bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) { 1610bf215546Sopenharmony_ci panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]); 1611bf215546Sopenharmony_ci } 1612bf215546Sopenharmony_ci} 1613bf215546Sopenharmony_ci 1614bf215546Sopenharmony_cistruct panvk_meta_copy_buf2buf_info { 1615bf215546Sopenharmony_ci mali_ptr src; 1616bf215546Sopenharmony_ci mali_ptr dst; 1617bf215546Sopenharmony_ci} PACKED; 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci#define panvk_meta_copy_buf2buf_get_info_field(b, field) \ 1620bf215546Sopenharmony_ci nir_load_push_constant((b), 1, \ 1621bf215546Sopenharmony_ci sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \ 1622bf215546Sopenharmony_ci nir_imm_int(b, 0), \ 1623bf215546Sopenharmony_ci .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \ 1624bf215546Sopenharmony_ci .range = ~0) 1625bf215546Sopenharmony_ci 1626bf215546Sopenharmony_cistatic mali_ptr 1627bf215546Sopenharmony_cipanvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev, 1628bf215546Sopenharmony_ci struct pan_pool *bin_pool, 1629bf215546Sopenharmony_ci unsigned blksz, 1630bf215546Sopenharmony_ci struct pan_shader_info *shader_info) 1631bf215546Sopenharmony_ci{ 1632bf215546Sopenharmony_ci /* FIXME: Won't work on compute queues, but we can't do that with 1633bf215546Sopenharmony_ci * a compute shader if the destination is an AFBC surface. 1634bf215546Sopenharmony_ci */ 1635bf215546Sopenharmony_ci nir_builder b = 1636bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1637bf215546Sopenharmony_ci GENX(pan_shader_get_compiler_options)(), 1638bf215546Sopenharmony_ci "panvk_meta_copy_buf2buf(blksz=%d)", 1639bf215546Sopenharmony_ci blksz); 1640bf215546Sopenharmony_ci 1641bf215546Sopenharmony_ci nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1642bf215546Sopenharmony_ci 1643bf215546Sopenharmony_ci nir_ssa_def *offset = 1644bf215546Sopenharmony_ci nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz))); 1645bf215546Sopenharmony_ci nir_ssa_def *srcptr = 1646bf215546Sopenharmony_ci nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset); 1647bf215546Sopenharmony_ci nir_ssa_def *dstptr = 1648bf215546Sopenharmony_ci nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset); 1649bf215546Sopenharmony_ci 1650bf215546Sopenharmony_ci unsigned compsz = blksz < 4 ? blksz : 4; 1651bf215546Sopenharmony_ci unsigned ncomps = blksz / compsz; 1652bf215546Sopenharmony_ci nir_store_global(&b, dstptr, blksz, 1653bf215546Sopenharmony_ci nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8), 1654bf215546Sopenharmony_ci (1 << ncomps) - 1); 1655bf215546Sopenharmony_ci 1656bf215546Sopenharmony_ci struct panfrost_compile_inputs inputs = { 1657bf215546Sopenharmony_ci .gpu_id = pdev->gpu_id, 1658bf215546Sopenharmony_ci .is_blit = true, 1659bf215546Sopenharmony_ci .no_ubo_to_push = true, 1660bf215546Sopenharmony_ci }; 1661bf215546Sopenharmony_ci 1662bf215546Sopenharmony_ci struct util_dynarray binary; 1663bf215546Sopenharmony_ci 1664bf215546Sopenharmony_ci util_dynarray_init(&binary, NULL); 1665bf215546Sopenharmony_ci GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1666bf215546Sopenharmony_ci 1667bf215546Sopenharmony_ci shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4); 1668bf215546Sopenharmony_ci 1669bf215546Sopenharmony_ci mali_ptr shader = 1670bf215546Sopenharmony_ci pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1671bf215546Sopenharmony_ci 1672bf215546Sopenharmony_ci util_dynarray_fini(&binary); 1673bf215546Sopenharmony_ci ralloc_free(b.shader); 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_ci return shader; 1676bf215546Sopenharmony_ci} 1677bf215546Sopenharmony_ci 1678bf215546Sopenharmony_cistatic void 1679bf215546Sopenharmony_cipanvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev) 1680bf215546Sopenharmony_ci{ 1681bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) { 1682bf215546Sopenharmony_ci struct pan_shader_info shader_info; 1683bf215546Sopenharmony_ci mali_ptr shader = 1684bf215546Sopenharmony_ci panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1685bf215546Sopenharmony_ci 1 << i, &shader_info); 1686bf215546Sopenharmony_ci dev->meta.copy.buf2buf[i].rsd = 1687bf215546Sopenharmony_ci panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 1688bf215546Sopenharmony_ci shader, &shader_info, false); 1689bf215546Sopenharmony_ci } 1690bf215546Sopenharmony_ci} 1691bf215546Sopenharmony_ci 1692bf215546Sopenharmony_cistatic void 1693bf215546Sopenharmony_cipanvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf, 1694bf215546Sopenharmony_ci const struct panvk_buffer *src, 1695bf215546Sopenharmony_ci const struct panvk_buffer *dst, 1696bf215546Sopenharmony_ci const VkBufferCopy2 *region) 1697bf215546Sopenharmony_ci{ 1698bf215546Sopenharmony_ci struct panvk_meta_copy_buf2buf_info info = { 1699bf215546Sopenharmony_ci .src = panvk_buffer_gpu_ptr(src, region->srcOffset), 1700bf215546Sopenharmony_ci .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset), 1701bf215546Sopenharmony_ci }; 1702bf215546Sopenharmony_ci 1703bf215546Sopenharmony_ci unsigned alignment = ffs((info.src | info.dst | region->size) & 15); 1704bf215546Sopenharmony_ci unsigned log2blksz = alignment ? alignment - 1 : 4; 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf)); 1707bf215546Sopenharmony_ci mali_ptr rsd = 1708bf215546Sopenharmony_ci cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd; 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_ci mali_ptr pushconsts = 1711bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1716bf215546Sopenharmony_ci 1717bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci mali_ptr tsd = batch->tls.gpu; 1720bf215546Sopenharmony_ci 1721bf215546Sopenharmony_ci unsigned nblocks = region->size >> log2blksz; 1722bf215546Sopenharmony_ci struct pan_compute_dim num_wg = { nblocks, 1, 1 }; 1723bf215546Sopenharmony_ci struct pan_compute_dim wg_sz = { 1, 1, 1}; 1724bf215546Sopenharmony_ci struct panfrost_ptr job = 1725bf215546Sopenharmony_ci panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1726bf215546Sopenharmony_ci &batch->scoreboard, 1727bf215546Sopenharmony_ci &num_wg, &wg_sz, 1728bf215546Sopenharmony_ci 0, 0, pushconsts, rsd, tsd); 1729bf215546Sopenharmony_ci 1730bf215546Sopenharmony_ci util_dynarray_append(&batch->jobs, void *, job.cpu); 1731bf215546Sopenharmony_ci 1732bf215546Sopenharmony_ci batch->blit.src = src->bo; 1733bf215546Sopenharmony_ci batch->blit.dst = dst->bo; 1734bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1735bf215546Sopenharmony_ci} 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_civoid 1738bf215546Sopenharmony_cipanvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer, 1739bf215546Sopenharmony_ci const VkCopyBufferInfo2 *pCopyBufferInfo) 1740bf215546Sopenharmony_ci{ 1741bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1742bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer); 1743bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer); 1744bf215546Sopenharmony_ci 1745bf215546Sopenharmony_ci for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) { 1746bf215546Sopenharmony_ci panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]); 1747bf215546Sopenharmony_ci } 1748bf215546Sopenharmony_ci} 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_cistruct panvk_meta_fill_buf_info { 1751bf215546Sopenharmony_ci mali_ptr start; 1752bf215546Sopenharmony_ci uint32_t val; 1753bf215546Sopenharmony_ci} PACKED; 1754bf215546Sopenharmony_ci 1755bf215546Sopenharmony_ci#define panvk_meta_fill_buf_get_info_field(b, field) \ 1756bf215546Sopenharmony_ci nir_load_push_constant((b), 1, \ 1757bf215546Sopenharmony_ci sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \ 1758bf215546Sopenharmony_ci nir_imm_int(b, 0), \ 1759bf215546Sopenharmony_ci .base = offsetof(struct panvk_meta_fill_buf_info, field), \ 1760bf215546Sopenharmony_ci .range = ~0) 1761bf215546Sopenharmony_ci 1762bf215546Sopenharmony_cistatic mali_ptr 1763bf215546Sopenharmony_cipanvk_meta_fill_buf_shader(struct panfrost_device *pdev, 1764bf215546Sopenharmony_ci struct pan_pool *bin_pool, 1765bf215546Sopenharmony_ci struct pan_shader_info *shader_info) 1766bf215546Sopenharmony_ci{ 1767bf215546Sopenharmony_ci /* FIXME: Won't work on compute queues, but we can't do that with 1768bf215546Sopenharmony_ci * a compute shader if the destination is an AFBC surface. 1769bf215546Sopenharmony_ci */ 1770bf215546Sopenharmony_ci nir_builder b = 1771bf215546Sopenharmony_ci nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1772bf215546Sopenharmony_ci GENX(pan_shader_get_compiler_options)(), 1773bf215546Sopenharmony_ci "panvk_meta_fill_buf()"); 1774bf215546Sopenharmony_ci 1775bf215546Sopenharmony_ci nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci nir_ssa_def *offset = 1778bf215546Sopenharmony_ci nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t)))); 1779bf215546Sopenharmony_ci nir_ssa_def *ptr = 1780bf215546Sopenharmony_ci nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset); 1781bf215546Sopenharmony_ci nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val); 1782bf215546Sopenharmony_ci 1783bf215546Sopenharmony_ci nir_store_global(&b, ptr, sizeof(uint32_t), val, 1); 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_ci struct panfrost_compile_inputs inputs = { 1786bf215546Sopenharmony_ci .gpu_id = pdev->gpu_id, 1787bf215546Sopenharmony_ci .is_blit = true, 1788bf215546Sopenharmony_ci .no_ubo_to_push = true, 1789bf215546Sopenharmony_ci }; 1790bf215546Sopenharmony_ci 1791bf215546Sopenharmony_ci struct util_dynarray binary; 1792bf215546Sopenharmony_ci 1793bf215546Sopenharmony_ci util_dynarray_init(&binary, NULL); 1794bf215546Sopenharmony_ci GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_ci shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4); 1797bf215546Sopenharmony_ci 1798bf215546Sopenharmony_ci mali_ptr shader = 1799bf215546Sopenharmony_ci pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1800bf215546Sopenharmony_ci 1801bf215546Sopenharmony_ci util_dynarray_fini(&binary); 1802bf215546Sopenharmony_ci ralloc_free(b.shader); 1803bf215546Sopenharmony_ci 1804bf215546Sopenharmony_ci return shader; 1805bf215546Sopenharmony_ci} 1806bf215546Sopenharmony_ci 1807bf215546Sopenharmony_cistatic mali_ptr 1808bf215546Sopenharmony_cipanvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev, 1809bf215546Sopenharmony_ci struct pan_pool *bin_pool, 1810bf215546Sopenharmony_ci struct pan_pool *desc_pool) 1811bf215546Sopenharmony_ci{ 1812bf215546Sopenharmony_ci struct pan_shader_info shader_info; 1813bf215546Sopenharmony_ci 1814bf215546Sopenharmony_ci mali_ptr shader = 1815bf215546Sopenharmony_ci panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info); 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci struct panfrost_ptr rsd_ptr = 1818bf215546Sopenharmony_ci pan_pool_alloc_desc_aggregate(desc_pool, 1819bf215546Sopenharmony_ci PAN_DESC(RENDERER_STATE)); 1820bf215546Sopenharmony_ci 1821bf215546Sopenharmony_ci pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 1822bf215546Sopenharmony_ci pan_shader_prepare_rsd(&shader_info, shader, &cfg); 1823bf215546Sopenharmony_ci } 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_ci return rsd_ptr.gpu; 1826bf215546Sopenharmony_ci} 1827bf215546Sopenharmony_ci 1828bf215546Sopenharmony_cistatic void 1829bf215546Sopenharmony_cipanvk_meta_fill_buf_init(struct panvk_physical_device *dev) 1830bf215546Sopenharmony_ci{ 1831bf215546Sopenharmony_ci dev->meta.copy.fillbuf.rsd = 1832bf215546Sopenharmony_ci panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base, 1833bf215546Sopenharmony_ci &dev->meta.desc_pool.base); 1834bf215546Sopenharmony_ci} 1835bf215546Sopenharmony_ci 1836bf215546Sopenharmony_cistatic void 1837bf215546Sopenharmony_cipanvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf, 1838bf215546Sopenharmony_ci const struct panvk_buffer *dst, 1839bf215546Sopenharmony_ci VkDeviceSize size, VkDeviceSize offset, 1840bf215546Sopenharmony_ci uint32_t val) 1841bf215546Sopenharmony_ci{ 1842bf215546Sopenharmony_ci struct panvk_meta_fill_buf_info info = { 1843bf215546Sopenharmony_ci .start = panvk_buffer_gpu_ptr(dst, offset), 1844bf215546Sopenharmony_ci .val = val, 1845bf215546Sopenharmony_ci }; 1846bf215546Sopenharmony_ci size = panvk_buffer_range(dst, offset, size); 1847bf215546Sopenharmony_ci 1848bf215546Sopenharmony_ci /* From the Vulkan spec: 1849bf215546Sopenharmony_ci * 1850bf215546Sopenharmony_ci * "size is the number of bytes to fill, and must be either a multiple 1851bf215546Sopenharmony_ci * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of 1852bf215546Sopenharmony_ci * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the 1853bf215546Sopenharmony_ci * buffer is not a multiple of 4, then the nearest smaller multiple is 1854bf215546Sopenharmony_ci * used." 1855bf215546Sopenharmony_ci */ 1856bf215546Sopenharmony_ci size &= ~3ull; 1857bf215546Sopenharmony_ci 1858bf215546Sopenharmony_ci assert(!(offset & 3) && !(size & 3)); 1859bf215546Sopenharmony_ci 1860bf215546Sopenharmony_ci unsigned nwords = size / sizeof(uint32_t); 1861bf215546Sopenharmony_ci mali_ptr rsd = 1862bf215546Sopenharmony_ci cmdbuf->device->physical_device->meta.copy.fillbuf.rsd; 1863bf215546Sopenharmony_ci 1864bf215546Sopenharmony_ci mali_ptr pushconsts = 1865bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1866bf215546Sopenharmony_ci 1867bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1868bf215546Sopenharmony_ci 1869bf215546Sopenharmony_ci struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1870bf215546Sopenharmony_ci 1871bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1872bf215546Sopenharmony_ci 1873bf215546Sopenharmony_ci mali_ptr tsd = batch->tls.gpu; 1874bf215546Sopenharmony_ci 1875bf215546Sopenharmony_ci struct pan_compute_dim num_wg = { nwords, 1, 1 }; 1876bf215546Sopenharmony_ci struct pan_compute_dim wg_sz = { 1, 1, 1}; 1877bf215546Sopenharmony_ci struct panfrost_ptr job = 1878bf215546Sopenharmony_ci panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1879bf215546Sopenharmony_ci &batch->scoreboard, 1880bf215546Sopenharmony_ci &num_wg, &wg_sz, 1881bf215546Sopenharmony_ci 0, 0, pushconsts, rsd, tsd); 1882bf215546Sopenharmony_ci 1883bf215546Sopenharmony_ci util_dynarray_append(&batch->jobs, void *, job.cpu); 1884bf215546Sopenharmony_ci 1885bf215546Sopenharmony_ci batch->blit.dst = dst->bo; 1886bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1887bf215546Sopenharmony_ci} 1888bf215546Sopenharmony_ci 1889bf215546Sopenharmony_civoid 1890bf215546Sopenharmony_cipanvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, 1891bf215546Sopenharmony_ci VkBuffer dstBuffer, 1892bf215546Sopenharmony_ci VkDeviceSize dstOffset, 1893bf215546Sopenharmony_ci VkDeviceSize fillSize, 1894bf215546Sopenharmony_ci uint32_t data) 1895bf215546Sopenharmony_ci{ 1896bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1897bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); 1898bf215546Sopenharmony_ci 1899bf215546Sopenharmony_ci panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data); 1900bf215546Sopenharmony_ci} 1901bf215546Sopenharmony_ci 1902bf215546Sopenharmony_cistatic void 1903bf215546Sopenharmony_cipanvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf, 1904bf215546Sopenharmony_ci const struct panvk_buffer *dst, VkDeviceSize offset, 1905bf215546Sopenharmony_ci VkDeviceSize size, const void *data) 1906bf215546Sopenharmony_ci{ 1907bf215546Sopenharmony_ci struct panvk_meta_copy_buf2buf_info info = { 1908bf215546Sopenharmony_ci .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4), 1909bf215546Sopenharmony_ci .dst = panvk_buffer_gpu_ptr(dst, offset), 1910bf215546Sopenharmony_ci }; 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_ci unsigned log2blksz = ffs(sizeof(uint32_t)) - 1; 1913bf215546Sopenharmony_ci 1914bf215546Sopenharmony_ci mali_ptr rsd = 1915bf215546Sopenharmony_ci cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd; 1916bf215546Sopenharmony_ci 1917bf215546Sopenharmony_ci mali_ptr pushconsts = 1918bf215546Sopenharmony_ci pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1919bf215546Sopenharmony_ci 1920bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1921bf215546Sopenharmony_ci 1922bf215546Sopenharmony_ci struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1923bf215546Sopenharmony_ci 1924bf215546Sopenharmony_ci panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1925bf215546Sopenharmony_ci 1926bf215546Sopenharmony_ci mali_ptr tsd = batch->tls.gpu; 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_ci unsigned nblocks = size >> log2blksz; 1929bf215546Sopenharmony_ci struct pan_compute_dim num_wg = { nblocks, 1, 1 }; 1930bf215546Sopenharmony_ci struct pan_compute_dim wg_sz = { 1, 1, 1}; 1931bf215546Sopenharmony_ci struct panfrost_ptr job = 1932bf215546Sopenharmony_ci panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1933bf215546Sopenharmony_ci &batch->scoreboard, 1934bf215546Sopenharmony_ci &num_wg, &wg_sz, 1935bf215546Sopenharmony_ci 0, 0, pushconsts, rsd, tsd); 1936bf215546Sopenharmony_ci 1937bf215546Sopenharmony_ci util_dynarray_append(&batch->jobs, void *, job.cpu); 1938bf215546Sopenharmony_ci 1939bf215546Sopenharmony_ci batch->blit.dst = dst->bo; 1940bf215546Sopenharmony_ci panvk_per_arch(cmd_close_batch)(cmdbuf); 1941bf215546Sopenharmony_ci} 1942bf215546Sopenharmony_ci 1943bf215546Sopenharmony_civoid 1944bf215546Sopenharmony_cipanvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, 1945bf215546Sopenharmony_ci VkBuffer dstBuffer, 1946bf215546Sopenharmony_ci VkDeviceSize dstOffset, 1947bf215546Sopenharmony_ci VkDeviceSize dataSize, 1948bf215546Sopenharmony_ci const void *pData) 1949bf215546Sopenharmony_ci{ 1950bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1951bf215546Sopenharmony_ci VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); 1952bf215546Sopenharmony_ci 1953bf215546Sopenharmony_ci panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData); 1954bf215546Sopenharmony_ci} 1955bf215546Sopenharmony_ci 1956bf215546Sopenharmony_civoid 1957bf215546Sopenharmony_cipanvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev) 1958bf215546Sopenharmony_ci{ 1959bf215546Sopenharmony_ci panvk_meta_copy_img2img_init(dev, false); 1960bf215546Sopenharmony_ci panvk_meta_copy_img2img_init(dev, true); 1961bf215546Sopenharmony_ci panvk_meta_copy_buf2img_init(dev); 1962bf215546Sopenharmony_ci panvk_meta_copy_img2buf_init(dev); 1963bf215546Sopenharmony_ci panvk_meta_copy_buf2buf_init(dev); 1964bf215546Sopenharmony_ci panvk_meta_fill_buf_init(dev); 1965bf215546Sopenharmony_ci} 1966