1/* 2 * Copyright © 2021 Collabora Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24#include "gen_macros.h" 25 26#include "nir/nir_builder.h" 27#include "pan_encoder.h" 28#include "pan_shader.h" 29 30#include "panvk_private.h" 31 32static mali_ptr 33panvk_meta_copy_img_emit_texture(struct panfrost_device *pdev, 34 struct pan_pool *desc_pool, 35 const struct pan_image_view *view) 36{ 37 struct panfrost_ptr texture = 38 pan_pool_alloc_desc(desc_pool, TEXTURE); 39 size_t payload_size = 40 GENX(panfrost_estimate_texture_payload_size)(view); 41 struct panfrost_ptr surfaces = 42 pan_pool_alloc_aligned(desc_pool, payload_size, 43 pan_alignment(SURFACE_WITH_STRIDE)); 44 45 GENX(panfrost_new_texture)(pdev, view, texture.cpu, &surfaces); 46 47 return texture.gpu; 48} 49 50static mali_ptr 51panvk_meta_copy_img_emit_sampler(struct panfrost_device *pdev, 52 struct pan_pool *desc_pool) 53{ 54 struct panfrost_ptr sampler = 55 pan_pool_alloc_desc(desc_pool, SAMPLER); 56 57 pan_pack(sampler.cpu, SAMPLER, cfg) { 58 cfg.seamless_cube_map = false; 59 cfg.normalized_coordinates = false; 60 cfg.minify_nearest = true; 61 cfg.magnify_nearest = true; 62 } 63 64 return sampler.gpu; 65} 66 67static void 68panvk_meta_copy_emit_varying(struct pan_pool *pool, 69 mali_ptr coordinates, 70 mali_ptr *varying_bufs, 71 mali_ptr *varyings) 72{ 73 struct panfrost_ptr varying = 74 pan_pool_alloc_desc(pool, ATTRIBUTE); 75 struct panfrost_ptr varying_buffer = 76 pan_pool_alloc_desc_array(pool, 2, ATTRIBUTE_BUFFER); 77 78 pan_pack(varying_buffer.cpu, ATTRIBUTE_BUFFER, cfg) { 79 cfg.pointer = coordinates; 80 cfg.stride = 4 * sizeof(uint32_t); 81 cfg.size = cfg.stride * 4; 82 } 83 84 /* Bifrost needs an empty desc to mark end of prefetching */ 85 pan_pack(varying_buffer.cpu + pan_size(ATTRIBUTE_BUFFER), 86 ATTRIBUTE_BUFFER, cfg); 87 88 pan_pack(varying.cpu, ATTRIBUTE, cfg) { 89 cfg.buffer_index = 0; 90 cfg.format = pool->dev->formats[PIPE_FORMAT_R32G32B32_FLOAT].hw; 91 } 92 93 *varyings = varying.gpu; 94 *varying_bufs = varying_buffer.gpu; 95} 96 97static void 98panvk_meta_copy_emit_dcd(struct pan_pool *pool, 99 mali_ptr src_coords, mali_ptr dst_coords, 100 mali_ptr texture, mali_ptr sampler, 101 mali_ptr vpd, mali_ptr tsd, mali_ptr rsd, 102 mali_ptr push_constants, void *out) 103{ 104 pan_pack(out, DRAW, cfg) { 105 cfg.thread_storage = tsd; 106 cfg.state = rsd; 107 cfg.push_uniforms = push_constants; 108 cfg.position = dst_coords; 109 if (src_coords) { 110 panvk_meta_copy_emit_varying(pool, src_coords, 111 &cfg.varying_buffers, 112 &cfg.varyings); 113 } 114 cfg.viewport = vpd; 115 cfg.textures = texture; 116 cfg.samplers = sampler; 117 } 118} 119 120static struct panfrost_ptr 121panvk_meta_copy_emit_tiler_job(struct pan_pool *desc_pool, 122 struct pan_scoreboard *scoreboard, 123 mali_ptr src_coords, mali_ptr dst_coords, 124 mali_ptr texture, mali_ptr sampler, 125 mali_ptr push_constants, 126 mali_ptr vpd, mali_ptr rsd, 127 mali_ptr tsd, mali_ptr tiler) 128{ 129 struct panfrost_ptr job = 130 pan_pool_alloc_desc(desc_pool, TILER_JOB); 131 132 panvk_meta_copy_emit_dcd(desc_pool, src_coords, dst_coords, 133 texture, sampler, vpd, tsd, rsd, push_constants, 134 pan_section_ptr(job.cpu, TILER_JOB, DRAW)); 135 136 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE, cfg) { 137 cfg.draw_mode = MALI_DRAW_MODE_TRIANGLE_STRIP; 138 cfg.index_count = 4; 139 cfg.job_task_split = 6; 140 } 141 142 pan_section_pack(job.cpu, TILER_JOB, PRIMITIVE_SIZE, cfg) { 143 cfg.constant = 1.0f; 144 } 145 146 void *invoc = pan_section_ptr(job.cpu, 147 TILER_JOB, 148 INVOCATION); 149 panfrost_pack_work_groups_compute(invoc, 1, 4, 150 1, 1, 1, 1, true, false); 151 152 pan_section_pack(job.cpu, TILER_JOB, PADDING, cfg); 153 pan_section_pack(job.cpu, TILER_JOB, TILER, cfg) { 154 cfg.address = tiler; 155 } 156 157 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_TILER, 158 false, false, 0, 0, &job, false); 159 return job; 160} 161 162static struct panfrost_ptr 163panvk_meta_copy_emit_compute_job(struct pan_pool *desc_pool, 164 struct pan_scoreboard *scoreboard, 165 const struct pan_compute_dim *num_wg, 166 const struct pan_compute_dim *wg_sz, 167 mali_ptr texture, mali_ptr sampler, 168 mali_ptr push_constants, 169 mali_ptr rsd, mali_ptr tsd) 170{ 171 struct panfrost_ptr job = 172 pan_pool_alloc_desc(desc_pool, COMPUTE_JOB); 173 174 void *invoc = pan_section_ptr(job.cpu, 175 COMPUTE_JOB, 176 INVOCATION); 177 panfrost_pack_work_groups_compute(invoc, num_wg->x, num_wg->y, num_wg->z, 178 wg_sz->x, wg_sz->y, wg_sz->z, 179 false, false); 180 181 pan_section_pack(job.cpu, COMPUTE_JOB, PARAMETERS, cfg) { 182 cfg.job_task_split = 8; 183 } 184 185 panvk_meta_copy_emit_dcd(desc_pool, 0, 0, texture, sampler, 186 0, tsd, rsd, push_constants, 187 pan_section_ptr(job.cpu, COMPUTE_JOB, DRAW)); 188 189 panfrost_add_job(desc_pool, scoreboard, MALI_JOB_TYPE_COMPUTE, 190 false, false, 0, 0, &job, false); 191 return job; 192} 193 194 195static uint32_t 196panvk_meta_copy_img_bifrost_raw_format(unsigned texelsize) 197{ 198 switch (texelsize) { 199 case 6: return MALI_RGB16UI << 12; 200 case 8: return MALI_RG32UI << 12; 201 case 12: return MALI_RGB32UI << 12; 202 case 16: return MALI_RGBA32UI << 12; 203 default: unreachable("Invalid texel size\n"); 204 } 205} 206 207static mali_ptr 208panvk_meta_copy_to_img_emit_rsd(struct panfrost_device *pdev, 209 struct pan_pool *desc_pool, 210 mali_ptr shader, 211 const struct pan_shader_info *shader_info, 212 enum pipe_format fmt, unsigned wrmask, 213 bool from_img) 214{ 215 struct panfrost_ptr rsd_ptr = 216 pan_pool_alloc_desc_aggregate(desc_pool, 217 PAN_DESC(RENDERER_STATE), 218 PAN_DESC_ARRAY(1, BLEND)); 219 220 bool raw = util_format_get_blocksize(fmt) > 4; 221 unsigned fullmask = (1 << util_format_get_nr_components(fmt)) - 1; 222 bool partialwrite = fullmask != wrmask && !raw; 223 bool readstb = fullmask != wrmask && raw; 224 225 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 226 pan_shader_prepare_rsd(shader_info, shader, &cfg); 227 if (from_img) { 228 cfg.shader.varying_count = 1; 229 cfg.shader.texture_count = 1; 230 cfg.shader.sampler_count = 1; 231 } 232 cfg.properties.depth_source = MALI_DEPTH_SOURCE_FIXED_FUNCTION; 233 cfg.multisample_misc.sample_mask = UINT16_MAX; 234 cfg.multisample_misc.depth_function = MALI_FUNC_ALWAYS; 235 cfg.stencil_mask_misc.stencil_mask_front = 0xFF; 236 cfg.stencil_mask_misc.stencil_mask_back = 0xFF; 237 cfg.stencil_front.compare_function = MALI_FUNC_ALWAYS; 238 cfg.stencil_front.stencil_fail = MALI_STENCIL_OP_REPLACE; 239 cfg.stencil_front.depth_fail = MALI_STENCIL_OP_REPLACE; 240 cfg.stencil_front.depth_pass = MALI_STENCIL_OP_REPLACE; 241 cfg.stencil_front.mask = 0xFF; 242 cfg.stencil_back = cfg.stencil_front; 243 244 cfg.properties.allow_forward_pixel_to_be_killed = true; 245 cfg.properties.allow_forward_pixel_to_kill = 246 !partialwrite && !readstb; 247 cfg.properties.zs_update_operation = 248 MALI_PIXEL_KILL_STRONG_EARLY; 249 cfg.properties.pixel_kill_operation = 250 MALI_PIXEL_KILL_FORCE_EARLY; 251 } 252 253 pan_pack(rsd_ptr.cpu + pan_size(RENDERER_STATE), BLEND, cfg) { 254 cfg.round_to_fb_precision = true; 255 cfg.load_destination = partialwrite; 256 cfg.equation.rgb.a = MALI_BLEND_OPERAND_A_SRC; 257 cfg.equation.rgb.b = MALI_BLEND_OPERAND_B_SRC; 258 cfg.equation.rgb.c = MALI_BLEND_OPERAND_C_ZERO; 259 cfg.equation.alpha.a = MALI_BLEND_OPERAND_A_SRC; 260 cfg.equation.alpha.b = MALI_BLEND_OPERAND_B_SRC; 261 cfg.equation.alpha.c = MALI_BLEND_OPERAND_C_ZERO; 262 cfg.internal.mode = 263 partialwrite ? 264 MALI_BLEND_MODE_FIXED_FUNCTION : 265 MALI_BLEND_MODE_OPAQUE; 266 cfg.equation.color_mask = partialwrite ? wrmask : 0xf; 267 cfg.internal.fixed_function.num_comps = 4; 268 if (!raw) { 269 cfg.internal.fixed_function.conversion.memory_format = 270 panfrost_format_to_bifrost_blend(pdev, fmt, false); 271 cfg.internal.fixed_function.conversion.register_format = 272 MALI_REGISTER_FILE_FORMAT_F32; 273 } else { 274 unsigned imgtexelsz = util_format_get_blocksize(fmt); 275 276 cfg.internal.fixed_function.conversion.memory_format = 277 panvk_meta_copy_img_bifrost_raw_format(imgtexelsz); 278 cfg.internal.fixed_function.conversion.register_format = 279 (imgtexelsz & 2) ? 280 MALI_REGISTER_FILE_FORMAT_U16 : 281 MALI_REGISTER_FILE_FORMAT_U32; 282 } 283 } 284 285 return rsd_ptr.gpu; 286} 287 288static mali_ptr 289panvk_meta_copy_to_buf_emit_rsd(struct panfrost_device *pdev, 290 struct pan_pool *desc_pool, 291 mali_ptr shader, 292 const struct pan_shader_info *shader_info, 293 bool from_img) 294{ 295 struct panfrost_ptr rsd_ptr = 296 pan_pool_alloc_desc_aggregate(desc_pool, 297 PAN_DESC(RENDERER_STATE)); 298 299 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 300 pan_shader_prepare_rsd(shader_info, shader, &cfg); 301 if (from_img) { 302 cfg.shader.texture_count = 1; 303 cfg.shader.sampler_count = 1; 304 } 305 } 306 307 return rsd_ptr.gpu; 308} 309 310static mali_ptr 311panvk_meta_copy_img2img_shader(struct panfrost_device *pdev, 312 struct pan_pool *bin_pool, 313 enum pipe_format srcfmt, 314 enum pipe_format dstfmt, unsigned dstmask, 315 unsigned texdim, bool texisarray, bool is_ms, 316 struct pan_shader_info *shader_info) 317{ 318 nir_builder b = 319 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 320 GENX(pan_shader_get_compiler_options)(), 321 "panvk_meta_copy_img2img(srcfmt=%s,dstfmt=%s,%dD%s%s)", 322 util_format_name(srcfmt), util_format_name(dstfmt), 323 texdim, texisarray ? "[]" : "", is_ms ? ",ms" : ""); 324 325 nir_variable *coord_var = 326 nir_variable_create(b.shader, nir_var_shader_in, 327 glsl_vector_type(GLSL_TYPE_FLOAT, texdim + texisarray), 328 "coord"); 329 coord_var->data.location = VARYING_SLOT_VAR0; 330 nir_ssa_def *coord = nir_f2u32(&b, nir_load_var(&b, coord_var)); 331 332 nir_tex_instr *tex = nir_tex_instr_create(b.shader, is_ms ? 2 : 1); 333 tex->op = is_ms ? nir_texop_txf_ms : nir_texop_txf; 334 tex->texture_index = 0; 335 tex->is_array = texisarray; 336 tex->dest_type = util_format_is_unorm(srcfmt) ? 337 nir_type_float32 : nir_type_uint32; 338 339 switch (texdim) { 340 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break; 341 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break; 342 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break; 343 default: unreachable("Invalid texture dimension"); 344 } 345 346 tex->src[0].src_type = nir_tex_src_coord; 347 tex->src[0].src = nir_src_for_ssa(coord); 348 tex->coord_components = texdim + texisarray; 349 350 if (is_ms) { 351 tex->src[1].src_type = nir_tex_src_ms_index; 352 tex->src[1].src = nir_src_for_ssa(nir_load_sample_id(&b)); 353 } 354 355 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 356 nir_alu_type_get_type_size(tex->dest_type), NULL); 357 nir_builder_instr_insert(&b, &tex->instr); 358 359 nir_ssa_def *texel = &tex->dest.ssa; 360 361 unsigned dstcompsz = 362 util_format_get_component_bits(dstfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); 363 unsigned ndstcomps = util_format_get_nr_components(dstfmt); 364 const struct glsl_type *outtype = NULL; 365 366 if (srcfmt == PIPE_FORMAT_R5G6B5_UNORM && dstfmt == PIPE_FORMAT_R8G8_UNORM) { 367 nir_ssa_def *rgb = 368 nir_f2u32(&b, nir_fmul(&b, texel, 369 nir_vec3(&b, 370 nir_imm_float(&b, 31), 371 nir_imm_float(&b, 63), 372 nir_imm_float(&b, 31)))); 373 nir_ssa_def *rg = 374 nir_vec2(&b, 375 nir_ior(&b, nir_channel(&b, rgb, 0), 376 nir_ishl(&b, nir_channel(&b, rgb, 1), 377 nir_imm_int(&b, 5))), 378 nir_ior(&b, 379 nir_ushr_imm(&b, nir_channel(&b, rgb, 1), 3), 380 nir_ishl(&b, nir_channel(&b, rgb, 2), 381 nir_imm_int(&b, 3)))); 382 rg = nir_iand_imm(&b, rg, 255); 383 texel = nir_fmul_imm(&b, nir_u2f32(&b, rg), 1.0 / 255); 384 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 2); 385 } else if (srcfmt == PIPE_FORMAT_R8G8_UNORM && dstfmt == PIPE_FORMAT_R5G6B5_UNORM) { 386 nir_ssa_def *rg = nir_f2u32(&b, nir_fmul_imm(&b, texel, 255)); 387 nir_ssa_def *rgb = 388 nir_vec3(&b, 389 nir_channel(&b, rg, 0), 390 nir_ior(&b, 391 nir_ushr_imm(&b, nir_channel(&b, rg, 0), 5), 392 nir_ishl(&b, nir_channel(&b, rg, 1), 393 nir_imm_int(&b, 3))), 394 nir_ushr_imm(&b, nir_channel(&b, rg, 1), 3)); 395 rgb = nir_iand(&b, rgb, 396 nir_vec3(&b, 397 nir_imm_int(&b, 31), 398 nir_imm_int(&b, 63), 399 nir_imm_int(&b, 31))); 400 texel = nir_fmul(&b, nir_u2f32(&b, rgb), 401 nir_vec3(&b, 402 nir_imm_float(&b, 1.0 / 31), 403 nir_imm_float(&b, 1.0 / 63), 404 nir_imm_float(&b, 1.0 / 31))); 405 outtype = glsl_vector_type(GLSL_TYPE_FLOAT, 3); 406 } else { 407 assert(srcfmt == dstfmt); 408 enum glsl_base_type basetype; 409 if (util_format_is_unorm(dstfmt)) { 410 basetype = GLSL_TYPE_FLOAT; 411 } else if (dstcompsz == 16) { 412 basetype = GLSL_TYPE_UINT16; 413 } else { 414 assert(dstcompsz == 32); 415 basetype = GLSL_TYPE_UINT; 416 } 417 418 if (dstcompsz == 16) 419 texel = nir_u2u16(&b, texel); 420 421 texel = nir_channels(&b, texel, (1 << ndstcomps) - 1); 422 outtype = glsl_vector_type(basetype, ndstcomps); 423 } 424 425 nir_variable *out = 426 nir_variable_create(b.shader, nir_var_shader_out, outtype, "out"); 427 out->data.location = FRAG_RESULT_DATA0; 428 429 unsigned fullmask = (1 << ndstcomps) - 1; 430 if (dstcompsz > 8 && dstmask != fullmask) { 431 nir_ssa_def *oldtexel = nir_load_var(&b, out); 432 nir_ssa_def *dstcomps[4]; 433 434 for (unsigned i = 0; i < ndstcomps; i++) { 435 if (dstmask & BITFIELD_BIT(i)) 436 dstcomps[i] = nir_channel(&b, texel, i); 437 else 438 dstcomps[i] = nir_channel(&b, oldtexel, i); 439 } 440 441 texel = nir_vec(&b, dstcomps, ndstcomps); 442 } 443 444 nir_store_var(&b, out, texel, 0xff); 445 446 struct panfrost_compile_inputs inputs = { 447 .gpu_id = pdev->gpu_id, 448 .is_blit = true, 449 .no_ubo_to_push = true, 450 }; 451 452 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) { 453 cfg.memory_format = (dstcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12; 454 cfg.register_format = dstcompsz == 2 ? 455 MALI_REGISTER_FILE_FORMAT_U16 : 456 MALI_REGISTER_FILE_FORMAT_U32; 457 } 458 inputs.bifrost.static_rt_conv = true; 459 460 struct util_dynarray binary; 461 462 util_dynarray_init(&binary, NULL); 463 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 464 465 shader_info->fs.sample_shading = is_ms; 466 467 mali_ptr shader = 468 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 469 470 util_dynarray_fini(&binary); 471 ralloc_free(b.shader); 472 473 return shader; 474} 475 476static enum pipe_format 477panvk_meta_copy_img_format(enum pipe_format fmt) 478{ 479 /* We can't use a non-compressed format when handling a tiled/AFBC 480 * compressed format because the tile size differ (4x4 blocks for 481 * compressed formats and 16x16 texels for non-compressed ones). 482 */ 483 assert(!util_format_is_compressed(fmt)); 484 485 /* Pick blendable formats when we can, otherwise pick the UINT variant 486 * matching the texel size. 487 */ 488 switch (util_format_get_blocksize(fmt)) { 489 case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 490 case 12: return PIPE_FORMAT_R32G32B32_UINT; 491 case 8: return PIPE_FORMAT_R32G32_UINT; 492 case 6: return PIPE_FORMAT_R16G16B16_UINT; 493 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM; 494 case 2: return (fmt == PIPE_FORMAT_R5G6B5_UNORM || 495 fmt == PIPE_FORMAT_B5G6R5_UNORM) ? 496 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM; 497 case 1: return PIPE_FORMAT_R8_UNORM; 498 default: unreachable("Unsupported format\n"); 499 } 500} 501 502struct panvk_meta_copy_img2img_format_info { 503 enum pipe_format srcfmt; 504 enum pipe_format dstfmt; 505 unsigned dstmask; 506} PACKED; 507 508static const struct panvk_meta_copy_img2img_format_info panvk_meta_copy_img2img_fmts[] = { 509 { PIPE_FORMAT_R8_UNORM, PIPE_FORMAT_R8_UNORM, 0x1}, 510 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, 511 { PIPE_FORMAT_R5G6B5_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, 512 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, 0x7}, 513 { PIPE_FORMAT_R8G8_UNORM, PIPE_FORMAT_R8G8_UNORM, 0x3}, 514 /* Z24S8(depth) */ 515 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 }, 516 /* Z24S8(stencil) */ 517 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 }, 518 { PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_R8G8B8A8_UNORM, 0xf }, 519 { PIPE_FORMAT_R16G16B16_UINT, PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 520 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x3 }, 521 /* Z32S8X24(depth) */ 522 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x1 }, 523 /* Z32S8X24(stencil) */ 524 { PIPE_FORMAT_R32G32_UINT, PIPE_FORMAT_R32G32_UINT, 0x2 }, 525 { PIPE_FORMAT_R32G32B32_UINT, PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 526 { PIPE_FORMAT_R32G32B32A32_UINT, PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 527}; 528 529static unsigned 530panvk_meta_copy_img2img_format_idx(struct panvk_meta_copy_img2img_format_info key) 531{ 532 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS); 533 534 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { 535 if (!memcmp(&key, &panvk_meta_copy_img2img_fmts[i], sizeof(key))) 536 return i; 537 } 538 539 unreachable("Invalid image format\n"); 540} 541 542static unsigned 543panvk_meta_copy_img_mask(enum pipe_format imgfmt, VkImageAspectFlags aspectMask) 544{ 545 if (aspectMask != VK_IMAGE_ASPECT_DEPTH_BIT && 546 aspectMask != VK_IMAGE_ASPECT_STENCIL_BIT) { 547 enum pipe_format outfmt = panvk_meta_copy_img_format(imgfmt); 548 549 return (1 << util_format_get_nr_components(outfmt)) - 1; 550 } 551 552 switch (imgfmt) { 553 case PIPE_FORMAT_S8_UINT: 554 return 1; 555 case PIPE_FORMAT_Z16_UNORM: 556 return 3; 557 case PIPE_FORMAT_Z16_UNORM_S8_UINT: 558 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 3 : 8; 559 case PIPE_FORMAT_Z24_UNORM_S8_UINT: 560 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 7 : 8; 561 case PIPE_FORMAT_Z24X8_UNORM: 562 assert(aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT); 563 return 7; 564 case PIPE_FORMAT_Z32_FLOAT: 565 return 0xf; 566 case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT: 567 return aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT ? 1 : 2; 568 default: 569 unreachable("Invalid depth format\n"); 570 } 571} 572 573static void 574panvk_meta_copy_img2img(struct panvk_cmd_buffer *cmdbuf, 575 const struct panvk_image *src, 576 const struct panvk_image *dst, 577 const VkImageCopy2 *region) 578{ 579 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 580 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 581 struct panvk_meta_copy_img2img_format_info key = { 582 .srcfmt = panvk_meta_copy_img_format(src->pimage.layout.format), 583 .dstfmt = panvk_meta_copy_img_format(dst->pimage.layout.format), 584 .dstmask = panvk_meta_copy_img_mask(dst->pimage.layout.format, 585 region->dstSubresource.aspectMask), 586 }; 587 588 assert(src->pimage.layout.nr_samples == dst->pimage.layout.nr_samples); 589 590 unsigned texdimidx = 591 panvk_meta_copy_tex_type(src->pimage.layout.dim, 592 src->pimage.layout.array_size > 1); 593 unsigned fmtidx = 594 panvk_meta_copy_img2img_format_idx(key); 595 unsigned ms = dst->pimage.layout.nr_samples > 1 ? 1 : 0; 596 597 mali_ptr rsd = 598 cmdbuf->device->physical_device->meta.copy.img2img[ms][texdimidx][fmtidx].rsd; 599 600 struct pan_image_view srcview = { 601 .format = key.srcfmt, 602 .dim = src->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 603 MALI_TEXTURE_DIMENSION_2D : src->pimage.layout.dim, 604 .image = &src->pimage, 605 .nr_samples = src->pimage.layout.nr_samples, 606 .first_level = region->srcSubresource.mipLevel, 607 .last_level = region->srcSubresource.mipLevel, 608 .first_layer = region->srcSubresource.baseArrayLayer, 609 .last_layer = region->srcSubresource.baseArrayLayer + region->srcSubresource.layerCount - 1, 610 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 611 }; 612 613 struct pan_image_view dstview = { 614 .format = key.dstfmt, 615 .dim = MALI_TEXTURE_DIMENSION_2D, 616 .image = &dst->pimage, 617 .nr_samples = dst->pimage.layout.nr_samples, 618 .first_level = region->dstSubresource.mipLevel, 619 .last_level = region->dstSubresource.mipLevel, 620 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 621 }; 622 623 unsigned minx = MAX2(region->dstOffset.x, 0); 624 unsigned miny = MAX2(region->dstOffset.y, 0); 625 unsigned maxx = MAX2(region->dstOffset.x + region->extent.width - 1, 0); 626 unsigned maxy = MAX2(region->dstOffset.y + region->extent.height - 1, 0); 627 628 mali_ptr vpd = 629 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, 630 minx, miny, maxx, maxy); 631 632 float dst_rect[] = { 633 minx, miny, 0.0, 1.0, 634 maxx + 1, miny, 0.0, 1.0, 635 minx, maxy + 1, 0.0, 1.0, 636 maxx + 1, maxy + 1, 0.0, 1.0, 637 }; 638 639 mali_ptr dst_coords = 640 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect, 641 sizeof(dst_rect), 64); 642 643 /* TODO: don't force preloads of dst resources if unneeded */ 644 645 unsigned width = u_minify(dst->pimage.layout.width, region->dstSubresource.mipLevel); 646 unsigned height = u_minify(dst->pimage.layout.height, region->dstSubresource.mipLevel); 647 cmdbuf->state.fb.crc_valid[0] = false; 648 *fbinfo = (struct pan_fb_info){ 649 .width = width, 650 .height = height, 651 .extent.minx = minx & ~31, 652 .extent.miny = miny & ~31, 653 .extent.maxx = MIN2(ALIGN_POT(maxx + 1, 32), width) - 1, 654 .extent.maxy = MIN2(ALIGN_POT(maxy + 1, 32), height) - 1, 655 .nr_samples = dst->pimage.layout.nr_samples, 656 .rt_count = 1, 657 .rts[0].view = &dstview, 658 .rts[0].preload = true, 659 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0], 660 }; 661 662 mali_ptr texture = 663 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &srcview); 664 mali_ptr sampler = 665 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base); 666 667 panvk_per_arch(cmd_close_batch)(cmdbuf); 668 669 minx = MAX2(region->srcOffset.x, 0); 670 miny = MAX2(region->srcOffset.y, 0); 671 maxx = MAX2(region->srcOffset.x + region->extent.width - 1, 0); 672 maxy = MAX2(region->srcOffset.y + region->extent.height - 1, 0); 673 assert(region->dstOffset.z >= 0); 674 675 unsigned first_src_layer = MAX2(0, region->srcOffset.z); 676 unsigned first_dst_layer = MAX2(region->dstSubresource.baseArrayLayer, region->dstOffset.z); 677 unsigned nlayers = MAX2(region->dstSubresource.layerCount, region->extent.depth); 678 for (unsigned l = 0; l < nlayers; l++) { 679 unsigned src_l = l + first_src_layer; 680 float src_rect[] = { 681 minx, miny, src_l, 1.0, 682 maxx + 1, miny, src_l, 1.0, 683 minx, maxy + 1, src_l, 1.0, 684 maxx + 1, maxy + 1, src_l, 1.0, 685 }; 686 687 mali_ptr src_coords = 688 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect, 689 sizeof(src_rect), 64); 690 691 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 692 693 dstview.first_layer = dstview.last_layer = l + first_dst_layer; 694 batch->blit.src = src->pimage.data.bo; 695 batch->blit.dst = dst->pimage.data.bo; 696 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 697 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 698 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 699 700 mali_ptr tsd, tiler; 701 702 tsd = batch->tls.gpu; 703 tiler = batch->tiler.descs.gpu; 704 705 struct panfrost_ptr job; 706 707 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, 708 &batch->scoreboard, 709 src_coords, dst_coords, 710 texture, sampler, 0, 711 vpd, rsd, tsd, tiler); 712 713 util_dynarray_append(&batch->jobs, void *, job.cpu); 714 panvk_per_arch(cmd_close_batch)(cmdbuf); 715 } 716} 717 718static void 719panvk_meta_copy_img2img_init(struct panvk_physical_device *dev, bool is_ms) 720{ 721 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2img_fmts) == PANVK_META_COPY_IMG2IMG_NUM_FORMATS); 722 723 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2img_fmts); i++) { 724 for (unsigned texdim = 1; texdim <= 3; texdim++) { 725 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); 726 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); 727 728 /* No MSAA on 3D textures */ 729 if (texdim == 3 && is_ms) continue; 730 731 struct pan_shader_info shader_info; 732 mali_ptr shader = 733 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 734 panvk_meta_copy_img2img_fmts[i].srcfmt, 735 panvk_meta_copy_img2img_fmts[i].dstfmt, 736 panvk_meta_copy_img2img_fmts[i].dstmask, 737 texdim, false, is_ms, &shader_info); 738 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = 739 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 740 shader, &shader_info, 741 panvk_meta_copy_img2img_fmts[i].dstfmt, 742 panvk_meta_copy_img2img_fmts[i].dstmask, 743 true); 744 if (texdim == 3) 745 continue; 746 747 memset(&shader_info, 0, sizeof(shader_info)); 748 texdimidx = panvk_meta_copy_tex_type(texdim, true); 749 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2img[0])); 750 shader = 751 panvk_meta_copy_img2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 752 panvk_meta_copy_img2img_fmts[i].srcfmt, 753 panvk_meta_copy_img2img_fmts[i].dstfmt, 754 panvk_meta_copy_img2img_fmts[i].dstmask, 755 texdim, true, is_ms, &shader_info); 756 dev->meta.copy.img2img[is_ms][texdimidx][i].rsd = 757 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 758 shader, &shader_info, 759 panvk_meta_copy_img2img_fmts[i].dstfmt, 760 panvk_meta_copy_img2img_fmts[i].dstmask, 761 true); 762 } 763 } 764} 765 766void 767panvk_per_arch(CmdCopyImage2)(VkCommandBuffer commandBuffer, 768 const VkCopyImageInfo2 *pCopyImageInfo) 769{ 770 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 771 VK_FROM_HANDLE(panvk_image, dst, pCopyImageInfo->dstImage); 772 VK_FROM_HANDLE(panvk_image, src, pCopyImageInfo->srcImage); 773 774 for (unsigned i = 0; i < pCopyImageInfo->regionCount; i++) { 775 panvk_meta_copy_img2img(cmdbuf, src, dst, &pCopyImageInfo->pRegions[i]); 776 } 777} 778 779static unsigned 780panvk_meta_copy_buf_texelsize(enum pipe_format imgfmt, unsigned mask) 781{ 782 unsigned imgtexelsz = util_format_get_blocksize(imgfmt); 783 unsigned nbufcomps = util_bitcount(mask); 784 785 if (nbufcomps == util_format_get_nr_components(imgfmt)) 786 return imgtexelsz; 787 788 /* Special case for Z24 buffers which are not tightly packed */ 789 if (mask == 7 && imgtexelsz == 4) 790 return 4; 791 792 /* Special case for S8 extraction from Z32_S8X24 */ 793 if (mask == 2 && imgtexelsz == 8) 794 return 1; 795 796 unsigned compsz = 797 util_format_get_component_bits(imgfmt, UTIL_FORMAT_COLORSPACE_RGB, 0); 798 799 assert(!(compsz % 8)); 800 801 return nbufcomps * compsz / 8; 802} 803 804static enum pipe_format 805panvk_meta_copy_buf2img_format(enum pipe_format imgfmt) 806{ 807 /* Pick blendable formats when we can, and the FLOAT variant matching the 808 * texelsize otherwise. 809 */ 810 switch (util_format_get_blocksize(imgfmt)) { 811 case 1: return PIPE_FORMAT_R8_UNORM; 812 /* AFBC stores things differently for RGB565, 813 * we can't simply map to R8G8 in that case */ 814 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || 815 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ? 816 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UNORM; 817 case 4: return PIPE_FORMAT_R8G8B8A8_UNORM; 818 case 6: return PIPE_FORMAT_R16G16B16_UINT; 819 case 8: return PIPE_FORMAT_R32G32_UINT; 820 case 12: return PIPE_FORMAT_R32G32B32_UINT; 821 case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 822 default: unreachable("Invalid format\n"); 823 } 824} 825 826struct panvk_meta_copy_format_info { 827 enum pipe_format imgfmt; 828 unsigned mask; 829} PACKED; 830 831static const struct panvk_meta_copy_format_info panvk_meta_copy_buf2img_fmts[] = { 832 { PIPE_FORMAT_R8_UNORM, 0x1 }, 833 { PIPE_FORMAT_R8G8_UNORM, 0x3 }, 834 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 }, 835 { PIPE_FORMAT_R8G8B8A8_UNORM, 0xf }, 836 { PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 837 { PIPE_FORMAT_R32G32_UINT, 0x3 }, 838 { PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 839 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 840 /* S8 -> Z24S8 */ 841 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x8 }, 842 /* S8 -> Z32_S8X24 */ 843 { PIPE_FORMAT_R32G32_UINT, 0x2 }, 844 /* Z24X8 -> Z24S8 */ 845 { PIPE_FORMAT_R8G8B8A8_UNORM, 0x7 }, 846 /* Z32 -> Z32_S8X24 */ 847 { PIPE_FORMAT_R32G32_UINT, 0x1 }, 848}; 849 850struct panvk_meta_copy_buf2img_info { 851 struct { 852 mali_ptr ptr; 853 struct { 854 unsigned line; 855 unsigned surf; 856 } stride; 857 } buf; 858} PACKED; 859 860#define panvk_meta_copy_buf2img_get_info_field(b, field) \ 861 nir_load_push_constant((b), 1, \ 862 sizeof(((struct panvk_meta_copy_buf2img_info *)0)->field) * 8, \ 863 nir_imm_int(b, 0), \ 864 .base = offsetof(struct panvk_meta_copy_buf2img_info, field), \ 865 .range = ~0) 866 867static mali_ptr 868panvk_meta_copy_buf2img_shader(struct panfrost_device *pdev, 869 struct pan_pool *bin_pool, 870 struct panvk_meta_copy_format_info key, 871 struct pan_shader_info *shader_info) 872{ 873 nir_builder b = 874 nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 875 GENX(pan_shader_get_compiler_options)(), 876 "panvk_meta_copy_buf2img(imgfmt=%s,mask=%x)", 877 util_format_name(key.imgfmt), 878 key.mask); 879 880 nir_variable *coord_var = 881 nir_variable_create(b.shader, nir_var_shader_in, 882 glsl_vector_type(GLSL_TYPE_FLOAT, 3), 883 "coord"); 884 coord_var->data.location = VARYING_SLOT_VAR0; 885 nir_ssa_def *coord = nir_load_var(&b, coord_var); 886 887 coord = nir_f2u32(&b, coord); 888 889 nir_ssa_def *bufptr = 890 panvk_meta_copy_buf2img_get_info_field(&b, buf.ptr); 891 nir_ssa_def *buflinestride = 892 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.line); 893 nir_ssa_def *bufsurfstride = 894 panvk_meta_copy_buf2img_get_info_field(&b, buf.stride.surf); 895 896 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); 897 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 898 unsigned writemask = key.mask; 899 900 nir_ssa_def *offset = 901 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); 902 offset = nir_iadd(&b, offset, 903 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); 904 offset = nir_iadd(&b, offset, 905 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); 906 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); 907 908 unsigned imgcompsz = 909 (imgtexelsz <= 4 && key.imgfmt != PIPE_FORMAT_R5G6B5_UNORM) ? 910 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); 911 912 unsigned nimgcomps = imgtexelsz / imgcompsz; 913 unsigned bufcompsz = MIN2(buftexelsz, imgcompsz); 914 unsigned nbufcomps = buftexelsz / bufcompsz; 915 916 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); 917 assert(nbufcomps <= 4 && nimgcomps <= 4); 918 919 nir_ssa_def *texel = 920 nir_load_global(&b, bufptr, bufcompsz, nbufcomps, bufcompsz * 8); 921 922 enum glsl_base_type basetype; 923 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { 924 texel = nir_vec3(&b, 925 nir_iand_imm(&b, texel, BITFIELD_MASK(5)), 926 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 5), BITFIELD_MASK(6)), 927 nir_iand_imm(&b, nir_ushr_imm(&b, texel, 11), BITFIELD_MASK(5))); 928 texel = nir_fmul(&b, 929 nir_u2f32(&b, texel), 930 nir_vec3(&b, 931 nir_imm_float(&b, 1.0f / 31), 932 nir_imm_float(&b, 1.0f / 63), 933 nir_imm_float(&b, 1.0f / 31))); 934 nimgcomps = 3; 935 basetype = GLSL_TYPE_FLOAT; 936 } else if (imgcompsz == 1) { 937 assert(bufcompsz == 1); 938 /* Blendable formats are unorm and the fixed-function blend unit 939 * takes float values. 940 */ 941 texel = nir_fmul(&b, nir_u2f32(&b, texel), 942 nir_imm_float(&b, 1.0f / 255)); 943 basetype = GLSL_TYPE_FLOAT; 944 } else { 945 texel = nir_u2uN(&b, texel, imgcompsz * 8); 946 basetype = imgcompsz == 2 ? GLSL_TYPE_UINT16 : GLSL_TYPE_UINT; 947 } 948 949 /* We always pass the texel using 32-bit regs for now */ 950 nir_variable *out = 951 nir_variable_create(b.shader, nir_var_shader_out, 952 glsl_vector_type(basetype, nimgcomps), 953 "out"); 954 out->data.location = FRAG_RESULT_DATA0; 955 956 uint16_t fullmask = (1 << nimgcomps) - 1; 957 958 assert(fullmask >= writemask); 959 960 if (fullmask != writemask) { 961 unsigned first_written_comp = ffs(writemask) - 1; 962 nir_ssa_def *oldtexel = NULL; 963 if (imgcompsz > 1) 964 oldtexel = nir_load_var(&b, out); 965 966 nir_ssa_def *texel_comps[4]; 967 for (unsigned i = 0; i < nimgcomps; i++) { 968 if (writemask & BITFIELD_BIT(i)) 969 texel_comps[i] = nir_channel(&b, texel, i - first_written_comp); 970 else if (imgcompsz > 1) 971 texel_comps[i] = nir_channel(&b, oldtexel, i); 972 else 973 texel_comps[i] = nir_imm_intN_t(&b, 0, texel->bit_size); 974 } 975 976 texel = nir_vec(&b, texel_comps, nimgcomps); 977 } 978 979 nir_store_var(&b, out, texel, 0xff); 980 981 struct panfrost_compile_inputs inputs = { 982 .gpu_id = pdev->gpu_id, 983 .is_blit = true, 984 .no_ubo_to_push = true, 985 }; 986 987 pan_pack(&inputs.bifrost.rt_conv[0], INTERNAL_CONVERSION, cfg) { 988 cfg.memory_format = (imgcompsz == 2 ? MALI_RG16UI : MALI_RG32UI) << 12; 989 cfg.register_format = imgcompsz == 2 ? 990 MALI_REGISTER_FILE_FORMAT_U16 : 991 MALI_REGISTER_FILE_FORMAT_U32; 992 } 993 inputs.bifrost.static_rt_conv = true; 994 995 struct util_dynarray binary; 996 997 util_dynarray_init(&binary, NULL); 998 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 999 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2img_info), 4); 1000 1001 mali_ptr shader = 1002 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1003 1004 util_dynarray_fini(&binary); 1005 ralloc_free(b.shader); 1006 1007 return shader; 1008} 1009 1010static unsigned 1011panvk_meta_copy_buf2img_format_idx(struct panvk_meta_copy_format_info key) 1012{ 1013 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { 1014 if (!memcmp(&key, &panvk_meta_copy_buf2img_fmts[i], sizeof(key))) 1015 return i; 1016 } 1017 1018 unreachable("Invalid image format\n"); 1019} 1020 1021static void 1022panvk_meta_copy_buf2img(struct panvk_cmd_buffer *cmdbuf, 1023 const struct panvk_buffer *buf, 1024 const struct panvk_image *img, 1025 const VkBufferImageCopy2 *region) 1026{ 1027 struct pan_fb_info *fbinfo = &cmdbuf->state.fb.info; 1028 unsigned minx = MAX2(region->imageOffset.x, 0); 1029 unsigned miny = MAX2(region->imageOffset.y, 0); 1030 unsigned maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0); 1031 unsigned maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); 1032 1033 mali_ptr vpd = 1034 panvk_per_arch(meta_emit_viewport)(&cmdbuf->desc_pool.base, 1035 minx, miny, maxx, maxy); 1036 1037 float dst_rect[] = { 1038 minx, miny, 0.0, 1.0, 1039 maxx + 1, miny, 0.0, 1.0, 1040 minx, maxy + 1, 0.0, 1.0, 1041 maxx + 1, maxy + 1, 0.0, 1.0, 1042 }; 1043 mali_ptr dst_coords = 1044 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, dst_rect, 1045 sizeof(dst_rect), 64); 1046 1047 struct panvk_meta_copy_format_info key = { 1048 .imgfmt = panvk_meta_copy_buf2img_format(img->pimage.layout.format), 1049 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, 1050 region->imageSubresource.aspectMask), 1051 }; 1052 1053 unsigned fmtidx = panvk_meta_copy_buf2img_format_idx(key); 1054 1055 mali_ptr rsd = 1056 cmdbuf->device->physical_device->meta.copy.buf2img[fmtidx].rsd; 1057 1058 const struct vk_image_buffer_layout buflayout = 1059 vk_image_buffer_copy_layout(&img->vk, region); 1060 struct panvk_meta_copy_buf2img_info info = { 1061 .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset), 1062 .buf.stride.line = buflayout.row_stride_B, 1063 .buf.stride.surf = buflayout.image_stride_B, 1064 }; 1065 1066 mali_ptr pushconsts = 1067 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1068 1069 struct pan_image_view view = { 1070 .format = key.imgfmt, 1071 .dim = MALI_TEXTURE_DIMENSION_2D, 1072 .image = &img->pimage, 1073 .nr_samples = img->pimage.layout.nr_samples, 1074 .first_level = region->imageSubresource.mipLevel, 1075 .last_level = region->imageSubresource.mipLevel, 1076 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 1077 }; 1078 1079 /* TODO: don't force preloads of dst resources if unneeded */ 1080 cmdbuf->state.fb.crc_valid[0] = false; 1081 *fbinfo = (struct pan_fb_info){ 1082 .width = u_minify(img->pimage.layout.width, region->imageSubresource.mipLevel), 1083 .height = u_minify(img->pimage.layout.height, region->imageSubresource.mipLevel), 1084 .extent.minx = minx, 1085 .extent.maxx = maxx, 1086 .extent.miny = miny, 1087 .extent.maxy = maxy, 1088 .nr_samples = 1, 1089 .rt_count = 1, 1090 .rts[0].view = &view, 1091 .rts[0].preload = true, 1092 .rts[0].crc_valid = &cmdbuf->state.fb.crc_valid[0], 1093 }; 1094 1095 panvk_per_arch(cmd_close_batch)(cmdbuf); 1096 1097 assert(region->imageSubresource.layerCount == 1 || 1098 region->imageExtent.depth == 1); 1099 assert(region->imageOffset.z >= 0); 1100 unsigned first_layer = MAX2(region->imageSubresource.baseArrayLayer, region->imageOffset.z); 1101 unsigned nlayers = MAX2(region->imageSubresource.layerCount, region->imageExtent.depth); 1102 for (unsigned l = 0; l < nlayers; l++) { 1103 float src_rect[] = { 1104 0, 0, l, 1.0, 1105 region->imageExtent.width, 0, l, 1.0, 1106 0, region->imageExtent.height, l, 1.0, 1107 region->imageExtent.width, region->imageExtent.height, l, 1.0, 1108 }; 1109 1110 mali_ptr src_coords = 1111 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, src_rect, 1112 sizeof(src_rect), 64); 1113 1114 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1115 1116 view.first_layer = view.last_layer = l + first_layer; 1117 batch->blit.src = buf->bo; 1118 batch->blit.dst = img->pimage.data.bo; 1119 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, true); 1120 panvk_per_arch(cmd_alloc_fb_desc)(cmdbuf); 1121 panvk_per_arch(cmd_prepare_tiler_context)(cmdbuf); 1122 1123 mali_ptr tsd, tiler; 1124 1125 tsd = batch->tls.gpu; 1126 tiler = batch->tiler.descs.gpu; 1127 1128 struct panfrost_ptr job; 1129 1130 job = panvk_meta_copy_emit_tiler_job(&cmdbuf->desc_pool.base, 1131 &batch->scoreboard, 1132 src_coords, dst_coords, 1133 0, 0, pushconsts, 1134 vpd, rsd, tsd, tiler); 1135 1136 util_dynarray_append(&batch->jobs, void *, job.cpu); 1137 panvk_per_arch(cmd_close_batch)(cmdbuf); 1138 } 1139} 1140 1141static void 1142panvk_meta_copy_buf2img_init(struct panvk_physical_device *dev) 1143{ 1144 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_buf2img_fmts) == PANVK_META_COPY_BUF2IMG_NUM_FORMATS); 1145 1146 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_buf2img_fmts); i++) { 1147 struct pan_shader_info shader_info; 1148 mali_ptr shader = 1149 panvk_meta_copy_buf2img_shader(&dev->pdev, &dev->meta.bin_pool.base, 1150 panvk_meta_copy_buf2img_fmts[i], 1151 &shader_info); 1152 dev->meta.copy.buf2img[i].rsd = 1153 panvk_meta_copy_to_img_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 1154 shader, &shader_info, 1155 panvk_meta_copy_buf2img_fmts[i].imgfmt, 1156 panvk_meta_copy_buf2img_fmts[i].mask, 1157 false); 1158 } 1159} 1160 1161void 1162panvk_per_arch(CmdCopyBufferToImage2)(VkCommandBuffer commandBuffer, 1163 const VkCopyBufferToImageInfo2 *pCopyBufferToImageInfo) 1164{ 1165 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1166 VK_FROM_HANDLE(panvk_buffer, buf, pCopyBufferToImageInfo->srcBuffer); 1167 VK_FROM_HANDLE(panvk_image, img, pCopyBufferToImageInfo->dstImage); 1168 1169 for (unsigned i = 0; i < pCopyBufferToImageInfo->regionCount; i++) { 1170 panvk_meta_copy_buf2img(cmdbuf, buf, img, &pCopyBufferToImageInfo->pRegions[i]); 1171 } 1172} 1173 1174static const struct panvk_meta_copy_format_info panvk_meta_copy_img2buf_fmts[] = { 1175 { PIPE_FORMAT_R8_UINT, 0x1 }, 1176 { PIPE_FORMAT_R8G8_UINT, 0x3 }, 1177 { PIPE_FORMAT_R5G6B5_UNORM, 0x7 }, 1178 { PIPE_FORMAT_R8G8B8A8_UINT, 0xf }, 1179 { PIPE_FORMAT_R16G16B16_UINT, 0x7 }, 1180 { PIPE_FORMAT_R32G32_UINT, 0x3 }, 1181 { PIPE_FORMAT_R32G32B32_UINT, 0x7 }, 1182 { PIPE_FORMAT_R32G32B32A32_UINT, 0xf }, 1183 /* S8 -> Z24S8 */ 1184 { PIPE_FORMAT_R8G8B8A8_UINT, 0x8 }, 1185 /* S8 -> Z32_S8X24 */ 1186 { PIPE_FORMAT_R32G32_UINT, 0x2 }, 1187 /* Z24X8 -> Z24S8 */ 1188 { PIPE_FORMAT_R8G8B8A8_UINT, 0x7 }, 1189 /* Z32 -> Z32_S8X24 */ 1190 { PIPE_FORMAT_R32G32_UINT, 0x1 }, 1191}; 1192 1193static enum pipe_format 1194panvk_meta_copy_img2buf_format(enum pipe_format imgfmt) 1195{ 1196 /* Pick blendable formats when we can, and the FLOAT variant matching the 1197 * texelsize otherwise. 1198 */ 1199 switch (util_format_get_blocksize(imgfmt)) { 1200 case 1: return PIPE_FORMAT_R8_UINT; 1201 /* AFBC stores things differently for RGB565, 1202 * we can't simply map to R8G8 in that case */ 1203 case 2: return (imgfmt == PIPE_FORMAT_R5G6B5_UNORM || 1204 imgfmt == PIPE_FORMAT_B5G6R5_UNORM) ? 1205 PIPE_FORMAT_R5G6B5_UNORM : PIPE_FORMAT_R8G8_UINT; 1206 case 4: return PIPE_FORMAT_R8G8B8A8_UINT; 1207 case 6: return PIPE_FORMAT_R16G16B16_UINT; 1208 case 8: return PIPE_FORMAT_R32G32_UINT; 1209 case 12: return PIPE_FORMAT_R32G32B32_UINT; 1210 case 16: return PIPE_FORMAT_R32G32B32A32_UINT; 1211 default: unreachable("Invalid format\n"); 1212 } 1213} 1214 1215struct panvk_meta_copy_img2buf_info { 1216 struct { 1217 mali_ptr ptr; 1218 struct { 1219 unsigned line; 1220 unsigned surf; 1221 } stride; 1222 } buf; 1223 struct { 1224 struct { 1225 unsigned x, y, z; 1226 } offset; 1227 struct { 1228 unsigned minx, miny, maxx, maxy; 1229 } extent; 1230 } img; 1231} PACKED; 1232 1233#define panvk_meta_copy_img2buf_get_info_field(b, field) \ 1234 nir_load_push_constant((b), 1, \ 1235 sizeof(((struct panvk_meta_copy_img2buf_info *)0)->field) * 8, \ 1236 nir_imm_int(b, 0), \ 1237 .base = offsetof(struct panvk_meta_copy_img2buf_info, field), \ 1238 .range = ~0) 1239 1240static mali_ptr 1241panvk_meta_copy_img2buf_shader(struct panfrost_device *pdev, 1242 struct pan_pool *bin_pool, 1243 struct panvk_meta_copy_format_info key, 1244 unsigned texdim, unsigned texisarray, 1245 struct pan_shader_info *shader_info) 1246{ 1247 unsigned imgtexelsz = util_format_get_blocksize(key.imgfmt); 1248 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1249 1250 /* FIXME: Won't work on compute queues, but we can't do that with 1251 * a compute shader if the destination is an AFBC surface. 1252 */ 1253 nir_builder b = 1254 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1255 GENX(pan_shader_get_compiler_options)(), 1256 "panvk_meta_copy_img2buf(dim=%dD%s,imgfmt=%s,mask=%x)", 1257 texdim, texisarray ? "[]" : "", 1258 util_format_name(key.imgfmt), 1259 key.mask); 1260 1261 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1262 nir_ssa_def *bufptr = 1263 panvk_meta_copy_img2buf_get_info_field(&b, buf.ptr); 1264 nir_ssa_def *buflinestride = 1265 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.line); 1266 nir_ssa_def *bufsurfstride = 1267 panvk_meta_copy_img2buf_get_info_field(&b, buf.stride.surf); 1268 1269 nir_ssa_def *imgminx = 1270 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.minx); 1271 nir_ssa_def *imgminy = 1272 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.miny); 1273 nir_ssa_def *imgmaxx = 1274 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxx); 1275 nir_ssa_def *imgmaxy = 1276 panvk_meta_copy_img2buf_get_info_field(&b, img.extent.maxy); 1277 1278 nir_ssa_def *imgcoords, *inbounds; 1279 1280 switch (texdim + texisarray) { 1281 case 1: 1282 imgcoords = 1283 nir_iadd(&b, 1284 nir_channel(&b, coord, 0), 1285 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)); 1286 inbounds = 1287 nir_iand(&b, 1288 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1289 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx)); 1290 break; 1291 case 2: 1292 imgcoords = 1293 nir_vec2(&b, 1294 nir_iadd(&b, 1295 nir_channel(&b, coord, 0), 1296 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), 1297 nir_iadd(&b, 1298 nir_channel(&b, coord, 1), 1299 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); 1300 inbounds = 1301 nir_iand(&b, 1302 nir_iand(&b, 1303 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1304 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), 1305 nir_iand(&b, 1306 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), 1307 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); 1308 break; 1309 case 3: 1310 imgcoords = 1311 nir_vec3(&b, 1312 nir_iadd(&b, 1313 nir_channel(&b, coord, 0), 1314 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.x)), 1315 nir_iadd(&b, 1316 nir_channel(&b, coord, 1), 1317 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y)), 1318 nir_iadd(&b, 1319 nir_channel(&b, coord, 2), 1320 panvk_meta_copy_img2buf_get_info_field(&b, img.offset.y))); 1321 inbounds = 1322 nir_iand(&b, 1323 nir_iand(&b, 1324 nir_uge(&b, imgmaxx, nir_channel(&b, imgcoords, 0)), 1325 nir_uge(&b, imgmaxy, nir_channel(&b, imgcoords, 1))), 1326 nir_iand(&b, 1327 nir_uge(&b, nir_channel(&b, imgcoords, 0), imgminx), 1328 nir_uge(&b, nir_channel(&b, imgcoords, 1), imgminy))); 1329 break; 1330 default: 1331 unreachable("Invalid texture dimension\n"); 1332 } 1333 1334 nir_push_if(&b, inbounds); 1335 1336 /* FIXME: doesn't work for tiled+compressed formats since blocks are 4x4 1337 * blocks instead of 16x16 texels in that case, and there's nothing we can 1338 * do to force the tile size to 4x4 in the render path. 1339 * This being said, compressed textures are not compatible with AFBC, so we 1340 * could use a compute shader arranging the blocks properly. 1341 */ 1342 nir_ssa_def *offset = 1343 nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, buftexelsz)); 1344 offset = nir_iadd(&b, offset, 1345 nir_imul(&b, nir_channel(&b, coord, 1), buflinestride)); 1346 offset = nir_iadd(&b, offset, 1347 nir_imul(&b, nir_channel(&b, coord, 2), bufsurfstride)); 1348 bufptr = nir_iadd(&b, bufptr, nir_u2u64(&b, offset)); 1349 1350 unsigned imgcompsz = imgtexelsz <= 4 ? 1351 1 : MIN2(1 << (ffs(imgtexelsz) - 1), 4); 1352 unsigned nimgcomps = imgtexelsz / imgcompsz; 1353 assert(nimgcomps <= 4); 1354 1355 nir_tex_instr *tex = nir_tex_instr_create(b.shader, 1); 1356 tex->op = nir_texop_txf; 1357 tex->texture_index = 0; 1358 tex->is_array = texisarray; 1359 tex->dest_type = util_format_is_unorm(key.imgfmt) ? 1360 nir_type_float32 : nir_type_uint32; 1361 1362 switch (texdim) { 1363 case 1: tex->sampler_dim = GLSL_SAMPLER_DIM_1D; break; 1364 case 2: tex->sampler_dim = GLSL_SAMPLER_DIM_2D; break; 1365 case 3: tex->sampler_dim = GLSL_SAMPLER_DIM_3D; break; 1366 default: unreachable("Invalid texture dimension"); 1367 } 1368 1369 tex->src[0].src_type = nir_tex_src_coord; 1370 tex->src[0].src = nir_src_for_ssa(imgcoords); 1371 tex->coord_components = texdim + texisarray; 1372 nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 1373 nir_alu_type_get_type_size(tex->dest_type), NULL); 1374 nir_builder_instr_insert(&b, &tex->instr); 1375 1376 nir_ssa_def *texel = &tex->dest.ssa; 1377 1378 unsigned fullmask = (1 << util_format_get_nr_components(key.imgfmt)) - 1; 1379 unsigned nbufcomps = util_bitcount(fullmask); 1380 if (key.mask != fullmask) { 1381 nir_ssa_def *bufcomps[4]; 1382 nbufcomps = 0; 1383 for (unsigned i = 0; i < nimgcomps; i++) { 1384 if (key.mask & BITFIELD_BIT(i)) 1385 bufcomps[nbufcomps++] = nir_channel(&b, texel, i); 1386 } 1387 1388 texel = nir_vec(&b, bufcomps, nbufcomps); 1389 } 1390 1391 unsigned bufcompsz = buftexelsz / nbufcomps; 1392 1393 if (key.imgfmt == PIPE_FORMAT_R5G6B5_UNORM) { 1394 texel = nir_fmul(&b, texel, 1395 nir_vec3(&b, 1396 nir_imm_float(&b, 31), 1397 nir_imm_float(&b, 63), 1398 nir_imm_float(&b, 31))); 1399 texel = nir_f2u16(&b, texel); 1400 texel = nir_ior(&b, nir_channel(&b, texel, 0), 1401 nir_ior(&b, 1402 nir_ishl(&b, nir_channel(&b, texel, 1), nir_imm_int(&b, 5)), 1403 nir_ishl(&b, nir_channel(&b, texel, 2), nir_imm_int(&b, 11)))); 1404 imgcompsz = 2; 1405 bufcompsz = 2; 1406 nbufcomps = 1; 1407 nimgcomps = 1; 1408 } else if (imgcompsz == 1) { 1409 nir_ssa_def *packed = nir_channel(&b, texel, 0); 1410 for (unsigned i = 1; i < nbufcomps; i++) { 1411 packed = nir_ior(&b, packed, 1412 nir_ishl(&b, nir_iand_imm(&b, nir_channel(&b, texel, i), 0xff), 1413 nir_imm_int(&b, i * 8))); 1414 } 1415 texel = packed; 1416 1417 bufcompsz = nbufcomps == 3 ? 4 : nbufcomps; 1418 nbufcomps = 1; 1419 } 1420 1421 assert(bufcompsz == 1 || bufcompsz == 2 || bufcompsz == 4); 1422 assert(nbufcomps <= 4 && nimgcomps <= 4); 1423 texel = nir_u2uN(&b, texel, bufcompsz * 8); 1424 1425 nir_store_global(&b, bufptr, bufcompsz, texel, (1 << nbufcomps) - 1); 1426 nir_pop_if(&b, NULL); 1427 1428 struct panfrost_compile_inputs inputs = { 1429 .gpu_id = pdev->gpu_id, 1430 .is_blit = true, 1431 .no_ubo_to_push = true, 1432 }; 1433 1434 struct util_dynarray binary; 1435 1436 util_dynarray_init(&binary, NULL); 1437 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1438 1439 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_img2buf_info), 4); 1440 1441 mali_ptr shader = 1442 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1443 1444 util_dynarray_fini(&binary); 1445 ralloc_free(b.shader); 1446 1447 return shader; 1448} 1449 1450static unsigned 1451panvk_meta_copy_img2buf_format_idx(struct panvk_meta_copy_format_info key) 1452{ 1453 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { 1454 if (!memcmp(&key, &panvk_meta_copy_img2buf_fmts[i], sizeof(key))) 1455 return i; 1456 } 1457 1458 unreachable("Invalid texel size\n"); 1459} 1460 1461static void 1462panvk_meta_copy_img2buf(struct panvk_cmd_buffer *cmdbuf, 1463 const struct panvk_buffer *buf, 1464 const struct panvk_image *img, 1465 const VkBufferImageCopy2 *region) 1466{ 1467 struct panfrost_device *pdev = &cmdbuf->device->physical_device->pdev; 1468 struct panvk_meta_copy_format_info key = { 1469 .imgfmt = panvk_meta_copy_img2buf_format(img->pimage.layout.format), 1470 .mask = panvk_meta_copy_img_mask(img->pimage.layout.format, 1471 region->imageSubresource.aspectMask), 1472 }; 1473 unsigned buftexelsz = panvk_meta_copy_buf_texelsize(key.imgfmt, key.mask); 1474 unsigned texdimidx = 1475 panvk_meta_copy_tex_type(img->pimage.layout.dim, 1476 img->pimage.layout.array_size > 1); 1477 unsigned fmtidx = panvk_meta_copy_img2buf_format_idx(key); 1478 1479 mali_ptr rsd = 1480 cmdbuf->device->physical_device->meta.copy.img2buf[texdimidx][fmtidx].rsd; 1481 1482 struct panvk_meta_copy_img2buf_info info = { 1483 .buf.ptr = panvk_buffer_gpu_ptr(buf, region->bufferOffset), 1484 .buf.stride.line = (region->bufferRowLength ? : region->imageExtent.width) * buftexelsz, 1485 .img.offset.x = MAX2(region->imageOffset.x & ~15, 0), 1486 .img.extent.minx = MAX2(region->imageOffset.x, 0), 1487 .img.extent.maxx = MAX2(region->imageOffset.x + region->imageExtent.width - 1, 0), 1488 }; 1489 1490 if (img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D) { 1491 info.img.extent.maxy = region->imageSubresource.layerCount - 1; 1492 } else { 1493 info.img.offset.y = MAX2(region->imageOffset.y & ~15, 0); 1494 info.img.offset.z = MAX2(region->imageOffset.z, 0); 1495 info.img.extent.miny = MAX2(region->imageOffset.y, 0); 1496 info.img.extent.maxy = MAX2(region->imageOffset.y + region->imageExtent.height - 1, 0); 1497 } 1498 1499 info.buf.stride.surf = (region->bufferImageHeight ? : region->imageExtent.height) * 1500 info.buf.stride.line; 1501 1502 mali_ptr pushconsts = 1503 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1504 1505 struct pan_image_view view = { 1506 .format = key.imgfmt, 1507 .dim = img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_CUBE ? 1508 MALI_TEXTURE_DIMENSION_2D : img->pimage.layout.dim, 1509 .image = &img->pimage, 1510 .nr_samples = img->pimage.layout.nr_samples, 1511 .first_level = region->imageSubresource.mipLevel, 1512 .last_level = region->imageSubresource.mipLevel, 1513 .first_layer = region->imageSubresource.baseArrayLayer, 1514 .last_layer = region->imageSubresource.baseArrayLayer + region->imageSubresource.layerCount - 1, 1515 .swizzle = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y, PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W }, 1516 }; 1517 1518 mali_ptr texture = 1519 panvk_meta_copy_img_emit_texture(pdev, &cmdbuf->desc_pool.base, &view); 1520 mali_ptr sampler = 1521 panvk_meta_copy_img_emit_sampler(pdev, &cmdbuf->desc_pool.base); 1522 1523 panvk_per_arch(cmd_close_batch)(cmdbuf); 1524 1525 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1526 1527 struct pan_tls_info tlsinfo = { 0 }; 1528 1529 batch->blit.src = img->pimage.data.bo; 1530 batch->blit.dst = buf->bo; 1531 batch->tls = 1532 pan_pool_alloc_desc(&cmdbuf->desc_pool.base, LOCAL_STORAGE); 1533 GENX(pan_emit_tls)(&tlsinfo, batch->tls.cpu); 1534 1535 mali_ptr tsd = batch->tls.gpu; 1536 1537 struct pan_compute_dim wg_sz = { 1538 16, 1539 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1 : 16, 1540 1, 1541 }; 1542 1543 struct pan_compute_dim num_wg = { 1544 (ALIGN_POT(info.img.extent.maxx + 1, 16) - info.img.offset.x) / 16, 1545 img->pimage.layout.dim == MALI_TEXTURE_DIMENSION_1D ? 1546 region->imageSubresource.layerCount : 1547 (ALIGN_POT(info.img.extent.maxy + 1, 16) - info.img.offset.y) / 16, 1548 img->pimage.layout.dim != MALI_TEXTURE_DIMENSION_1D ? 1549 MAX2(region->imageSubresource.layerCount, region->imageExtent.depth) : 1, 1550 }; 1551 1552 struct panfrost_ptr job = 1553 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1554 &batch->scoreboard, &num_wg, &wg_sz, 1555 texture, sampler, 1556 pushconsts, rsd, tsd); 1557 1558 util_dynarray_append(&batch->jobs, void *, job.cpu); 1559 1560 panvk_per_arch(cmd_close_batch)(cmdbuf); 1561} 1562 1563static void 1564panvk_meta_copy_img2buf_init(struct panvk_physical_device *dev) 1565{ 1566 STATIC_ASSERT(ARRAY_SIZE(panvk_meta_copy_img2buf_fmts) == PANVK_META_COPY_IMG2BUF_NUM_FORMATS); 1567 1568 for (unsigned i = 0; i < ARRAY_SIZE(panvk_meta_copy_img2buf_fmts); i++) { 1569 for (unsigned texdim = 1; texdim <= 3; texdim++) { 1570 unsigned texdimidx = panvk_meta_copy_tex_type(texdim, false); 1571 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); 1572 1573 struct pan_shader_info shader_info; 1574 mali_ptr shader = 1575 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1576 panvk_meta_copy_img2buf_fmts[i], 1577 texdim, false, &shader_info); 1578 dev->meta.copy.img2buf[texdimidx][i].rsd = 1579 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, 1580 &dev->meta.desc_pool.base, 1581 shader, &shader_info, true); 1582 1583 if (texdim == 3) 1584 continue; 1585 1586 memset(&shader_info, 0, sizeof(shader_info)); 1587 texdimidx = panvk_meta_copy_tex_type(texdim, true); 1588 assert(texdimidx < ARRAY_SIZE(dev->meta.copy.img2buf)); 1589 shader = 1590 panvk_meta_copy_img2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1591 panvk_meta_copy_img2buf_fmts[i], 1592 texdim, true, &shader_info); 1593 dev->meta.copy.img2buf[texdimidx][i].rsd = 1594 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, 1595 &dev->meta.desc_pool.base, 1596 shader, &shader_info, true); 1597 } 1598 } 1599} 1600 1601void 1602panvk_per_arch(CmdCopyImageToBuffer2)(VkCommandBuffer commandBuffer, 1603 const VkCopyImageToBufferInfo2 *pCopyImageToBufferInfo) 1604{ 1605 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1606 VK_FROM_HANDLE(panvk_buffer, buf, pCopyImageToBufferInfo->dstBuffer); 1607 VK_FROM_HANDLE(panvk_image, img, pCopyImageToBufferInfo->srcImage); 1608 1609 for (unsigned i = 0; i < pCopyImageToBufferInfo->regionCount; i++) { 1610 panvk_meta_copy_img2buf(cmdbuf, buf, img, &pCopyImageToBufferInfo->pRegions[i]); 1611 } 1612} 1613 1614struct panvk_meta_copy_buf2buf_info { 1615 mali_ptr src; 1616 mali_ptr dst; 1617} PACKED; 1618 1619#define panvk_meta_copy_buf2buf_get_info_field(b, field) \ 1620 nir_load_push_constant((b), 1, \ 1621 sizeof(((struct panvk_meta_copy_buf2buf_info *)0)->field) * 8, \ 1622 nir_imm_int(b, 0), \ 1623 .base = offsetof(struct panvk_meta_copy_buf2buf_info, field), \ 1624 .range = ~0) 1625 1626static mali_ptr 1627panvk_meta_copy_buf2buf_shader(struct panfrost_device *pdev, 1628 struct pan_pool *bin_pool, 1629 unsigned blksz, 1630 struct pan_shader_info *shader_info) 1631{ 1632 /* FIXME: Won't work on compute queues, but we can't do that with 1633 * a compute shader if the destination is an AFBC surface. 1634 */ 1635 nir_builder b = 1636 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1637 GENX(pan_shader_get_compiler_options)(), 1638 "panvk_meta_copy_buf2buf(blksz=%d)", 1639 blksz); 1640 1641 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1642 1643 nir_ssa_def *offset = 1644 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, blksz))); 1645 nir_ssa_def *srcptr = 1646 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, src), offset); 1647 nir_ssa_def *dstptr = 1648 nir_iadd(&b, panvk_meta_copy_buf2buf_get_info_field(&b, dst), offset); 1649 1650 unsigned compsz = blksz < 4 ? blksz : 4; 1651 unsigned ncomps = blksz / compsz; 1652 nir_store_global(&b, dstptr, blksz, 1653 nir_load_global(&b, srcptr, blksz, ncomps, compsz * 8), 1654 (1 << ncomps) - 1); 1655 1656 struct panfrost_compile_inputs inputs = { 1657 .gpu_id = pdev->gpu_id, 1658 .is_blit = true, 1659 .no_ubo_to_push = true, 1660 }; 1661 1662 struct util_dynarray binary; 1663 1664 util_dynarray_init(&binary, NULL); 1665 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1666 1667 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_copy_buf2buf_info), 4); 1668 1669 mali_ptr shader = 1670 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1671 1672 util_dynarray_fini(&binary); 1673 ralloc_free(b.shader); 1674 1675 return shader; 1676} 1677 1678static void 1679panvk_meta_copy_buf2buf_init(struct panvk_physical_device *dev) 1680{ 1681 for (unsigned i = 0; i < ARRAY_SIZE(dev->meta.copy.buf2buf); i++) { 1682 struct pan_shader_info shader_info; 1683 mali_ptr shader = 1684 panvk_meta_copy_buf2buf_shader(&dev->pdev, &dev->meta.bin_pool.base, 1685 1 << i, &shader_info); 1686 dev->meta.copy.buf2buf[i].rsd = 1687 panvk_meta_copy_to_buf_emit_rsd(&dev->pdev, &dev->meta.desc_pool.base, 1688 shader, &shader_info, false); 1689 } 1690} 1691 1692static void 1693panvk_meta_copy_buf2buf(struct panvk_cmd_buffer *cmdbuf, 1694 const struct panvk_buffer *src, 1695 const struct panvk_buffer *dst, 1696 const VkBufferCopy2 *region) 1697{ 1698 struct panvk_meta_copy_buf2buf_info info = { 1699 .src = panvk_buffer_gpu_ptr(src, region->srcOffset), 1700 .dst = panvk_buffer_gpu_ptr(dst, region->dstOffset), 1701 }; 1702 1703 unsigned alignment = ffs((info.src | info.dst | region->size) & 15); 1704 unsigned log2blksz = alignment ? alignment - 1 : 4; 1705 1706 assert(log2blksz < ARRAY_SIZE(cmdbuf->device->physical_device->meta.copy.buf2buf)); 1707 mali_ptr rsd = 1708 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd; 1709 1710 mali_ptr pushconsts = 1711 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1712 1713 panvk_per_arch(cmd_close_batch)(cmdbuf); 1714 1715 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1716 1717 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1718 1719 mali_ptr tsd = batch->tls.gpu; 1720 1721 unsigned nblocks = region->size >> log2blksz; 1722 struct pan_compute_dim num_wg = { nblocks, 1, 1 }; 1723 struct pan_compute_dim wg_sz = { 1, 1, 1}; 1724 struct panfrost_ptr job = 1725 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1726 &batch->scoreboard, 1727 &num_wg, &wg_sz, 1728 0, 0, pushconsts, rsd, tsd); 1729 1730 util_dynarray_append(&batch->jobs, void *, job.cpu); 1731 1732 batch->blit.src = src->bo; 1733 batch->blit.dst = dst->bo; 1734 panvk_per_arch(cmd_close_batch)(cmdbuf); 1735} 1736 1737void 1738panvk_per_arch(CmdCopyBuffer2)(VkCommandBuffer commandBuffer, 1739 const VkCopyBufferInfo2 *pCopyBufferInfo) 1740{ 1741 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1742 VK_FROM_HANDLE(panvk_buffer, src, pCopyBufferInfo->srcBuffer); 1743 VK_FROM_HANDLE(panvk_buffer, dst, pCopyBufferInfo->dstBuffer); 1744 1745 for (unsigned i = 0; i < pCopyBufferInfo->regionCount; i++) { 1746 panvk_meta_copy_buf2buf(cmdbuf, src, dst, &pCopyBufferInfo->pRegions[i]); 1747 } 1748} 1749 1750struct panvk_meta_fill_buf_info { 1751 mali_ptr start; 1752 uint32_t val; 1753} PACKED; 1754 1755#define panvk_meta_fill_buf_get_info_field(b, field) \ 1756 nir_load_push_constant((b), 1, \ 1757 sizeof(((struct panvk_meta_fill_buf_info *)0)->field) * 8, \ 1758 nir_imm_int(b, 0), \ 1759 .base = offsetof(struct panvk_meta_fill_buf_info, field), \ 1760 .range = ~0) 1761 1762static mali_ptr 1763panvk_meta_fill_buf_shader(struct panfrost_device *pdev, 1764 struct pan_pool *bin_pool, 1765 struct pan_shader_info *shader_info) 1766{ 1767 /* FIXME: Won't work on compute queues, but we can't do that with 1768 * a compute shader if the destination is an AFBC surface. 1769 */ 1770 nir_builder b = 1771 nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, 1772 GENX(pan_shader_get_compiler_options)(), 1773 "panvk_meta_fill_buf()"); 1774 1775 nir_ssa_def *coord = nir_load_global_invocation_id(&b, 32); 1776 1777 nir_ssa_def *offset = 1778 nir_u2u64(&b, nir_imul(&b, nir_channel(&b, coord, 0), nir_imm_int(&b, sizeof(uint32_t)))); 1779 nir_ssa_def *ptr = 1780 nir_iadd(&b, panvk_meta_fill_buf_get_info_field(&b, start), offset); 1781 nir_ssa_def *val = panvk_meta_fill_buf_get_info_field(&b, val); 1782 1783 nir_store_global(&b, ptr, sizeof(uint32_t), val, 1); 1784 1785 struct panfrost_compile_inputs inputs = { 1786 .gpu_id = pdev->gpu_id, 1787 .is_blit = true, 1788 .no_ubo_to_push = true, 1789 }; 1790 1791 struct util_dynarray binary; 1792 1793 util_dynarray_init(&binary, NULL); 1794 GENX(pan_shader_compile)(b.shader, &inputs, &binary, shader_info); 1795 1796 shader_info->push.count = DIV_ROUND_UP(sizeof(struct panvk_meta_fill_buf_info), 4); 1797 1798 mali_ptr shader = 1799 pan_pool_upload_aligned(bin_pool, binary.data, binary.size, 128); 1800 1801 util_dynarray_fini(&binary); 1802 ralloc_free(b.shader); 1803 1804 return shader; 1805} 1806 1807static mali_ptr 1808panvk_meta_fill_buf_emit_rsd(struct panfrost_device *pdev, 1809 struct pan_pool *bin_pool, 1810 struct pan_pool *desc_pool) 1811{ 1812 struct pan_shader_info shader_info; 1813 1814 mali_ptr shader = 1815 panvk_meta_fill_buf_shader(pdev, bin_pool, &shader_info); 1816 1817 struct panfrost_ptr rsd_ptr = 1818 pan_pool_alloc_desc_aggregate(desc_pool, 1819 PAN_DESC(RENDERER_STATE)); 1820 1821 pan_pack(rsd_ptr.cpu, RENDERER_STATE, cfg) { 1822 pan_shader_prepare_rsd(&shader_info, shader, &cfg); 1823 } 1824 1825 return rsd_ptr.gpu; 1826} 1827 1828static void 1829panvk_meta_fill_buf_init(struct panvk_physical_device *dev) 1830{ 1831 dev->meta.copy.fillbuf.rsd = 1832 panvk_meta_fill_buf_emit_rsd(&dev->pdev, &dev->meta.bin_pool.base, 1833 &dev->meta.desc_pool.base); 1834} 1835 1836static void 1837panvk_meta_fill_buf(struct panvk_cmd_buffer *cmdbuf, 1838 const struct panvk_buffer *dst, 1839 VkDeviceSize size, VkDeviceSize offset, 1840 uint32_t val) 1841{ 1842 struct panvk_meta_fill_buf_info info = { 1843 .start = panvk_buffer_gpu_ptr(dst, offset), 1844 .val = val, 1845 }; 1846 size = panvk_buffer_range(dst, offset, size); 1847 1848 /* From the Vulkan spec: 1849 * 1850 * "size is the number of bytes to fill, and must be either a multiple 1851 * of 4, or VK_WHOLE_SIZE to fill the range from offset to the end of 1852 * the buffer. If VK_WHOLE_SIZE is used and the remaining size of the 1853 * buffer is not a multiple of 4, then the nearest smaller multiple is 1854 * used." 1855 */ 1856 size &= ~3ull; 1857 1858 assert(!(offset & 3) && !(size & 3)); 1859 1860 unsigned nwords = size / sizeof(uint32_t); 1861 mali_ptr rsd = 1862 cmdbuf->device->physical_device->meta.copy.fillbuf.rsd; 1863 1864 mali_ptr pushconsts = 1865 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1866 1867 panvk_per_arch(cmd_close_batch)(cmdbuf); 1868 1869 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1870 1871 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1872 1873 mali_ptr tsd = batch->tls.gpu; 1874 1875 struct pan_compute_dim num_wg = { nwords, 1, 1 }; 1876 struct pan_compute_dim wg_sz = { 1, 1, 1}; 1877 struct panfrost_ptr job = 1878 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1879 &batch->scoreboard, 1880 &num_wg, &wg_sz, 1881 0, 0, pushconsts, rsd, tsd); 1882 1883 util_dynarray_append(&batch->jobs, void *, job.cpu); 1884 1885 batch->blit.dst = dst->bo; 1886 panvk_per_arch(cmd_close_batch)(cmdbuf); 1887} 1888 1889void 1890panvk_per_arch(CmdFillBuffer)(VkCommandBuffer commandBuffer, 1891 VkBuffer dstBuffer, 1892 VkDeviceSize dstOffset, 1893 VkDeviceSize fillSize, 1894 uint32_t data) 1895{ 1896 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1897 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); 1898 1899 panvk_meta_fill_buf(cmdbuf, dst, fillSize, dstOffset, data); 1900} 1901 1902static void 1903panvk_meta_update_buf(struct panvk_cmd_buffer *cmdbuf, 1904 const struct panvk_buffer *dst, VkDeviceSize offset, 1905 VkDeviceSize size, const void *data) 1906{ 1907 struct panvk_meta_copy_buf2buf_info info = { 1908 .src = pan_pool_upload_aligned(&cmdbuf->desc_pool.base, data, size, 4), 1909 .dst = panvk_buffer_gpu_ptr(dst, offset), 1910 }; 1911 1912 unsigned log2blksz = ffs(sizeof(uint32_t)) - 1; 1913 1914 mali_ptr rsd = 1915 cmdbuf->device->physical_device->meta.copy.buf2buf[log2blksz].rsd; 1916 1917 mali_ptr pushconsts = 1918 pan_pool_upload_aligned(&cmdbuf->desc_pool.base, &info, sizeof(info), 16); 1919 1920 panvk_per_arch(cmd_close_batch)(cmdbuf); 1921 1922 struct panvk_batch *batch = panvk_cmd_open_batch(cmdbuf); 1923 1924 panvk_per_arch(cmd_alloc_tls_desc)(cmdbuf, false); 1925 1926 mali_ptr tsd = batch->tls.gpu; 1927 1928 unsigned nblocks = size >> log2blksz; 1929 struct pan_compute_dim num_wg = { nblocks, 1, 1 }; 1930 struct pan_compute_dim wg_sz = { 1, 1, 1}; 1931 struct panfrost_ptr job = 1932 panvk_meta_copy_emit_compute_job(&cmdbuf->desc_pool.base, 1933 &batch->scoreboard, 1934 &num_wg, &wg_sz, 1935 0, 0, pushconsts, rsd, tsd); 1936 1937 util_dynarray_append(&batch->jobs, void *, job.cpu); 1938 1939 batch->blit.dst = dst->bo; 1940 panvk_per_arch(cmd_close_batch)(cmdbuf); 1941} 1942 1943void 1944panvk_per_arch(CmdUpdateBuffer)(VkCommandBuffer commandBuffer, 1945 VkBuffer dstBuffer, 1946 VkDeviceSize dstOffset, 1947 VkDeviceSize dataSize, 1948 const void *pData) 1949{ 1950 VK_FROM_HANDLE(panvk_cmd_buffer, cmdbuf, commandBuffer); 1951 VK_FROM_HANDLE(panvk_buffer, dst, dstBuffer); 1952 1953 panvk_meta_update_buf(cmdbuf, dst, dstOffset, dataSize, pData); 1954} 1955 1956void 1957panvk_per_arch(meta_copy_init)(struct panvk_physical_device *dev) 1958{ 1959 panvk_meta_copy_img2img_init(dev, false); 1960 panvk_meta_copy_img2img_init(dev, true); 1961 panvk_meta_copy_buf2img_init(dev); 1962 panvk_meta_copy_img2buf_init(dev); 1963 panvk_meta_copy_buf2buf_init(dev); 1964 panvk_meta_fill_buf_init(dev); 1965} 1966