1/* 2 * Copyright © 2015-2017 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/format/u_format.h" 25#include "util/u_surface.h" 26#include "util/u_blitter.h" 27#include "compiler/nir/nir_builder.h" 28#include "v3d_context.h" 29#include "broadcom/common/v3d_tiling.h" 30#include "broadcom/common/v3d_tfu.h" 31 32/** 33 * The param @op_blit is used to tell if we are saving state for blitter_blit 34 * (if true) or blitter_clear (if false). If other blitter functions are used 35 * that require different state we may need something more elaborated than 36 * this. 37 */ 38 39void 40v3d_blitter_save(struct v3d_context *v3d, bool op_blit) 41{ 42 util_blitter_save_fragment_constant_buffer_slot(v3d->blitter, 43 v3d->constbuf[PIPE_SHADER_FRAGMENT].cb); 44 util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb); 45 util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx); 46 util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs); 47 util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs); 48 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 49 v3d->streamout.targets); 50 util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer); 51 util_blitter_save_viewport(v3d->blitter, &v3d->viewport); 52 util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs); 53 util_blitter_save_blend(v3d->blitter, v3d->blend); 54 util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa); 55 util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref); 56 util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0); 57 util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 58 v3d->streamout.targets); 59 60 if (op_blit) { 61 util_blitter_save_scissor(v3d->blitter, &v3d->scissor); 62 util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer); 63 util_blitter_save_fragment_sampler_states(v3d->blitter, 64 v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers, 65 (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers); 66 util_blitter_save_fragment_sampler_views(v3d->blitter, 67 v3d->tex[PIPE_SHADER_FRAGMENT].num_textures, 68 v3d->tex[PIPE_SHADER_FRAGMENT].textures); 69 } 70} 71 72static void 73v3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 74{ 75 struct v3d_context *v3d = v3d_context(ctx); 76 struct v3d_resource *src = v3d_resource(info->src.resource); 77 struct pipe_resource *tiled = NULL; 78 79 if (!info->mask) 80 return; 81 82 if (!src->tiled && 83 info->src.resource->target != PIPE_TEXTURE_1D && 84 info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) { 85 struct pipe_box box = { 86 .x = 0, 87 .y = 0, 88 .width = u_minify(info->src.resource->width0, 89 info->src.level), 90 .height = u_minify(info->src.resource->height0, 91 info->src.level), 92 .depth = 1, 93 }; 94 struct pipe_resource tmpl = { 95 .target = info->src.resource->target, 96 .format = info->src.resource->format, 97 .width0 = box.width, 98 .height0 = box.height, 99 .depth0 = 1, 100 .array_size = 1, 101 }; 102 tiled = ctx->screen->resource_create(ctx->screen, &tmpl); 103 if (!tiled) { 104 fprintf(stderr, "Failed to create tiled blit temp\n"); 105 return; 106 } 107 ctx->resource_copy_region(ctx, 108 tiled, 0, 109 0, 0, 0, 110 info->src.resource, info->src.level, 111 &box); 112 info->src.level = 0; 113 info->src.resource = tiled; 114 } 115 116 if (!util_blitter_is_blit_supported(v3d->blitter, info)) { 117 fprintf(stderr, "blit unsupported %s -> %s\n", 118 util_format_short_name(info->src.format), 119 util_format_short_name(info->dst.format)); 120 return; 121 } 122 123 v3d_blitter_save(v3d, true); 124 util_blitter_blit(v3d->blitter, info); 125 126 pipe_resource_reference(&tiled, NULL); 127 info->mask = 0; 128} 129 130/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 131 * or R8 texture. 132 */ 133static void 134v3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 135{ 136 struct v3d_context *v3d = v3d_context(ctx); 137 struct v3d_resource *src = v3d_resource(info->src.resource); 138 struct v3d_resource *dst = v3d_resource(info->dst.resource); 139 enum pipe_format src_format, dst_format; 140 141 if ((info->mask & PIPE_MASK_S) == 0) 142 return; 143 144 if (src->separate_stencil) { 145 src = src->separate_stencil; 146 src_format = PIPE_FORMAT_R8_UINT; 147 } else { 148 src_format = PIPE_FORMAT_RGBA8888_UINT; 149 } 150 151 if (dst->separate_stencil) { 152 dst = dst->separate_stencil; 153 dst_format = PIPE_FORMAT_R8_UINT; 154 } else { 155 dst_format = PIPE_FORMAT_RGBA8888_UINT; 156 } 157 158 /* Initialize the surface. */ 159 struct pipe_surface dst_tmpl = { 160 .u.tex = { 161 .level = info->dst.level, 162 .first_layer = info->dst.box.z, 163 .last_layer = info->dst.box.z, 164 }, 165 .format = dst_format, 166 }; 167 struct pipe_surface *dst_surf = 168 ctx->create_surface(ctx, &dst->base, &dst_tmpl); 169 170 /* Initialize the sampler view. */ 171 struct pipe_sampler_view src_tmpl = { 172 .target = src->base.target, 173 .format = src_format, 174 .u.tex = { 175 .first_level = info->src.level, 176 .last_level = info->src.level, 177 .first_layer = 0, 178 .last_layer = (PIPE_TEXTURE_3D ? 179 u_minify(src->base.depth0, 180 info->src.level) - 1 : 181 src->base.array_size - 1), 182 }, 183 .swizzle_r = PIPE_SWIZZLE_X, 184 .swizzle_g = PIPE_SWIZZLE_Y, 185 .swizzle_b = PIPE_SWIZZLE_Z, 186 .swizzle_a = PIPE_SWIZZLE_W, 187 }; 188 struct pipe_sampler_view *src_view = 189 ctx->create_sampler_view(ctx, &src->base, &src_tmpl); 190 191 v3d_blitter_save(v3d, true); 192 util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box, 193 src_view, &info->src.box, 194 src->base.width0, src->base.height0, 195 PIPE_MASK_R, 196 PIPE_TEX_FILTER_NEAREST, 197 info->scissor_enable ? &info->scissor : NULL, 198 info->alpha_blend, false, 0); 199 200 pipe_surface_reference(&dst_surf, NULL); 201 pipe_sampler_view_reference(&src_view, NULL); 202 203 info->mask &= ~PIPE_MASK_S; 204} 205 206static bool 207v3d_tfu(struct pipe_context *pctx, 208 struct pipe_resource *pdst, 209 struct pipe_resource *psrc, 210 unsigned int src_level, 211 unsigned int base_level, 212 unsigned int last_level, 213 unsigned int src_layer, 214 unsigned int dst_layer, 215 bool for_mipmap) 216{ 217 struct v3d_context *v3d = v3d_context(pctx); 218 struct v3d_screen *screen = v3d->screen; 219 struct v3d_resource *src = v3d_resource(psrc); 220 struct v3d_resource *dst = v3d_resource(pdst); 221 struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; 222 struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; 223 int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; 224 int width = u_minify(pdst->width0, base_level) * msaa_scale; 225 int height = u_minify(pdst->height0, base_level) * msaa_scale; 226 enum pipe_format pformat; 227 228 if (psrc->format != pdst->format) 229 return false; 230 if (psrc->nr_samples != pdst->nr_samples) 231 return false; 232 233 /* Can't write to raster. */ 234 if (dst_base_slice->tiling == V3D_TILING_RASTER) 235 return false; 236 237 /* When using TFU for blit, we are doing exact copies (both input and 238 * output format must be the same, no scaling, etc), so there is no 239 * pixel format conversions. Thus we can rewrite the format to use one 240 * that is TFU compatible based on its texel size. 241 */ 242 if (for_mipmap) { 243 pformat = pdst->format; 244 } else { 245 switch (dst->cpp) { 246 case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; 247 case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; 248 case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; 249 case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; 250 case 1: pformat = PIPE_FORMAT_R8_UNORM; break; 251 default: unreachable("unsupported format bit-size"); break; 252 }; 253 } 254 255 uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); 256 257 if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format, for_mipmap)) { 258 assert(for_mipmap); 259 return false; 260 } 261 262 v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); 263 v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); 264 265 struct drm_v3d_submit_tfu tfu = { 266 .ios = (height << 16) | width, 267 .bo_handles = { 268 dst->bo->handle, 269 src != dst ? src->bo->handle : 0 270 }, 271 .in_sync = v3d->out_sync, 272 .out_sync = v3d->out_sync, 273 }; 274 uint32_t src_offset = (src->bo->offset + 275 v3d_layer_offset(psrc, src_level, src_layer)); 276 tfu.iia |= src_offset; 277 if (src_base_slice->tiling == V3D_TILING_RASTER) { 278 tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << 279 V3D33_TFU_ICFG_FORMAT_SHIFT); 280 } else { 281 tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE + 282 (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << 283 V3D33_TFU_ICFG_FORMAT_SHIFT); 284 } 285 286 uint32_t dst_offset = (dst->bo->offset + 287 v3d_layer_offset(pdst, base_level, dst_layer)); 288 tfu.ioa |= dst_offset; 289 if (last_level != base_level) 290 tfu.ioa |= V3D33_TFU_IOA_DIMTW; 291 tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE + 292 (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << 293 V3D33_TFU_IOA_FORMAT_SHIFT); 294 295 tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT; 296 tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT; 297 298 switch (src_base_slice->tiling) { 299 case V3D_TILING_UIF_NO_XOR: 300 case V3D_TILING_UIF_XOR: 301 tfu.iis |= (src_base_slice->padded_height / 302 (2 * v3d_utile_height(src->cpp))); 303 break; 304 case V3D_TILING_RASTER: 305 tfu.iis |= src_base_slice->stride / src->cpp; 306 break; 307 case V3D_TILING_LINEARTILE: 308 case V3D_TILING_UBLINEAR_1_COLUMN: 309 case V3D_TILING_UBLINEAR_2_COLUMN: 310 break; 311 } 312 313 /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 314 * OPAD field for the destination (how many extra UIF blocks beyond 315 * those necessary to cover the height). When filling mipmaps, the 316 * miplevel 1+ tiling state is inferred. 317 */ 318 if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || 319 dst_base_slice->tiling == V3D_TILING_UIF_XOR) { 320 int uif_block_h = 2 * v3d_utile_height(dst->cpp); 321 int implicit_padded_height = align(height, uif_block_h); 322 323 tfu.icfg |= (((dst_base_slice->padded_height - 324 implicit_padded_height) / uif_block_h) << 325 V3D33_TFU_ICFG_OPAD_SHIFT); 326 } 327 328 int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); 329 if (ret != 0) { 330 fprintf(stderr, "Failed to submit TFU job: %d\n", ret); 331 return false; 332 } 333 334 dst->writes++; 335 336 return true; 337} 338 339bool 340v3d_generate_mipmap(struct pipe_context *pctx, 341 struct pipe_resource *prsc, 342 enum pipe_format format, 343 unsigned int base_level, 344 unsigned int last_level, 345 unsigned int first_layer, 346 unsigned int last_layer) 347{ 348 if (format != prsc->format) 349 return false; 350 351 /* We could maybe support looping over layers for array textures, but 352 * we definitely don't support 3D. 353 */ 354 if (first_layer != last_layer) 355 return false; 356 357 return v3d_tfu(pctx, 358 prsc, prsc, 359 base_level, 360 base_level, last_level, 361 first_layer, first_layer, 362 true); 363} 364 365static void 366v3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 367{ 368 int dst_width = u_minify(info->dst.resource->width0, info->dst.level); 369 int dst_height = u_minify(info->dst.resource->height0, info->dst.level); 370 371 if ((info->mask & PIPE_MASK_RGBA) == 0) 372 return; 373 374 if (info->scissor_enable || 375 info->dst.box.x != 0 || 376 info->dst.box.y != 0 || 377 info->dst.box.width != dst_width || 378 info->dst.box.height != dst_height || 379 info->dst.box.depth != 1 || 380 info->src.box.x != 0 || 381 info->src.box.y != 0 || 382 info->src.box.width != info->dst.box.width || 383 info->src.box.height != info->dst.box.height || 384 info->src.box.depth != 1) { 385 return; 386 } 387 388 if (info->dst.format != info->src.format) 389 return; 390 391 if (v3d_tfu(pctx, info->dst.resource, info->src.resource, 392 info->src.level, 393 info->dst.level, info->dst.level, 394 info->src.box.z, info->dst.box.z, 395 false)) { 396 info->mask &= ~PIPE_MASK_RGBA; 397 } 398} 399 400static struct pipe_surface * 401v3d_get_blit_surface(struct pipe_context *pctx, 402 struct pipe_resource *prsc, 403 enum pipe_format format, 404 unsigned level, 405 int16_t layer) 406{ 407 struct pipe_surface tmpl; 408 409 tmpl.format = format; 410 tmpl.u.tex.level = level; 411 tmpl.u.tex.first_layer = layer; 412 tmpl.u.tex.last_layer = layer; 413 414 return pctx->create_surface(pctx, prsc, &tmpl); 415} 416 417static bool 418is_tile_unaligned(unsigned size, unsigned tile_size) 419{ 420 return size & (tile_size - 1); 421} 422 423static void 424v3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 425{ 426 struct v3d_context *v3d = v3d_context(pctx); 427 struct v3d_screen *screen = v3d->screen; 428 429 if (screen->devinfo.ver < 40 || !info->mask) 430 return; 431 432 bool is_color_blit = info->mask & PIPE_MASK_RGBA; 433 bool is_depth_blit = info->mask & PIPE_MASK_Z; 434 bool is_stencil_blit = info->mask & PIPE_MASK_S; 435 436 /* We should receive either a depth/stencil blit, or color blit, but 437 * not both. 438 */ 439 assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) || 440 (!is_color_blit && (is_depth_blit || is_stencil_blit))); 441 442 if (info->scissor_enable) 443 return; 444 445 if (info->src.box.x != info->dst.box.x || 446 info->src.box.y != info->dst.box.y || 447 info->src.box.width != info->dst.box.width || 448 info->src.box.height != info->dst.box.height) 449 return; 450 451 if (is_color_blit && 452 util_format_is_depth_or_stencil(info->dst.format)) 453 return; 454 455 if (!v3d_rt_format_supported(&screen->devinfo, info->src.format)) 456 return; 457 458 if (v3d_get_rt_format(&screen->devinfo, info->src.format) != 459 v3d_get_rt_format(&screen->devinfo, info->dst.format)) 460 return; 461 462 bool msaa = (info->src.resource->nr_samples > 1 || 463 info->dst.resource->nr_samples > 1); 464 bool is_msaa_resolve = (info->src.resource->nr_samples > 1 && 465 info->dst.resource->nr_samples < 2); 466 467 if (is_msaa_resolve && 468 !v3d_format_supports_tlb_msaa_resolve(&screen->devinfo, info->src.format)) 469 return; 470 471 v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false); 472 473 struct pipe_surface *dst_surf = 474 v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z); 475 struct pipe_surface *src_surf = 476 v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z); 477 478 struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 }; 479 if (is_color_blit) 480 surfaces[0] = dst_surf; 481 482 bool double_buffer = 483 unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa; 484 485 uint32_t tile_width, tile_height, max_bpp; 486 v3d_get_tile_buffer_size(msaa, double_buffer, 487 is_color_blit ? 1 : 0, surfaces, src_surf, 488 &tile_width, &tile_height, &max_bpp); 489 490 int dst_surface_width = u_minify(info->dst.resource->width0, 491 info->dst.level); 492 int dst_surface_height = u_minify(info->dst.resource->height0, 493 info->dst.level); 494 if (is_tile_unaligned(info->dst.box.x, tile_width) || 495 is_tile_unaligned(info->dst.box.y, tile_height) || 496 (is_tile_unaligned(info->dst.box.width, tile_width) && 497 info->dst.box.x + info->dst.box.width != dst_surface_width) || 498 (is_tile_unaligned(info->dst.box.height, tile_height) && 499 info->dst.box.y + info->dst.box.height != dst_surface_height)) { 500 pipe_surface_reference(&dst_surf, NULL); 501 pipe_surface_reference(&src_surf, NULL); 502 return; 503 } 504 505 struct v3d_job *job = v3d_get_job(v3d, 506 is_color_blit ? 1u : 0u, 507 surfaces, 508 is_color_blit ? NULL : dst_surf, 509 src_surf); 510 job->msaa = msaa; 511 job->double_buffer = double_buffer; 512 job->tile_width = tile_width; 513 job->tile_height = tile_height; 514 job->internal_bpp = max_bpp; 515 job->draw_min_x = info->dst.box.x; 516 job->draw_min_y = info->dst.box.y; 517 job->draw_max_x = info->dst.box.x + info->dst.box.width; 518 job->draw_max_y = info->dst.box.y + info->dst.box.height; 519 job->scissor.disabled = false; 520 521 /* The simulator complains if we do a TLB load from a source with a 522 * stride that is smaller than the destination's, so we program the 523 * 'frame region' to match the smallest dimensions of the two surfaces. 524 * This should be fine because we only get here if the src and dst boxes 525 * match, so we know the blit involves the same tiles on both surfaces. 526 */ 527 job->draw_width = MIN2(dst_surf->width, src_surf->width); 528 job->draw_height = MIN2(dst_surf->height, src_surf->height); 529 job->draw_tiles_x = DIV_ROUND_UP(job->draw_width, 530 job->tile_width); 531 job->draw_tiles_y = DIV_ROUND_UP(job->draw_height, 532 job->tile_height); 533 534 job->needs_flush = true; 535 job->num_layers = info->dst.box.depth; 536 537 job->store = 0; 538 if (is_color_blit) { 539 job->store |= PIPE_CLEAR_COLOR0; 540 info->mask &= ~PIPE_MASK_RGBA; 541 } 542 if (is_depth_blit) { 543 job->store |= PIPE_CLEAR_DEPTH; 544 info->mask &= ~PIPE_MASK_Z; 545 } 546 if (is_stencil_blit){ 547 job->store |= PIPE_CLEAR_STENCIL; 548 info->mask &= ~PIPE_MASK_S; 549 } 550 551 v3d41_start_binning(v3d, job); 552 553 v3d_job_submit(v3d, job); 554 555 pipe_surface_reference(&dst_surf, NULL); 556 pipe_surface_reference(&src_surf, NULL); 557} 558 559/** 560 * Creates the VS of the custom blit shader to convert YUV plane from 561 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 562 * This vertex shader is mostly a pass-through VS. 563 */ 564static void * 565v3d_get_sand8_vs(struct pipe_context *pctx) 566{ 567 struct v3d_context *v3d = v3d_context(pctx); 568 struct pipe_screen *pscreen = pctx->screen; 569 570 if (v3d->sand8_blit_vs) 571 return v3d->sand8_blit_vs; 572 573 const struct nir_shader_compiler_options *options = 574 pscreen->get_compiler_options(pscreen, 575 PIPE_SHADER_IR_NIR, 576 PIPE_SHADER_VERTEX); 577 578 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, 579 options, 580 "sand8_blit_vs"); 581 582 const struct glsl_type *vec4 = glsl_vec4_type(); 583 nir_variable *pos_in = nir_variable_create(b.shader, 584 nir_var_shader_in, 585 vec4, "pos"); 586 587 nir_variable *pos_out = nir_variable_create(b.shader, 588 nir_var_shader_out, 589 vec4, "gl_Position"); 590 pos_out->data.location = VARYING_SLOT_POS; 591 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); 592 593 struct pipe_shader_state shader_tmpl = { 594 .type = PIPE_SHADER_IR_NIR, 595 .ir.nir = b.shader, 596 }; 597 598 v3d->sand8_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); 599 600 return v3d->sand8_blit_vs; 601} 602/** 603 * Creates the FS of the custom blit shader to convert YUV plane from 604 * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 605 * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1) 606 * plane for a NV12 format without the SAND modifier. 607 */ 608static void * 609v3d_get_sand8_fs(struct pipe_context *pctx, int cpp) 610{ 611 struct v3d_context *v3d = v3d_context(pctx); 612 struct pipe_screen *pscreen = pctx->screen; 613 struct pipe_shader_state **cached_shader; 614 const char *name; 615 616 if (cpp == 1) { 617 cached_shader = &v3d->sand8_blit_fs_luma; 618 name = "sand8_blit_fs_luma"; 619 } else { 620 cached_shader = &v3d->sand8_blit_fs_chroma; 621 name = "sand8_blit_fs_chroma"; 622 } 623 624 if (*cached_shader) 625 return *cached_shader; 626 627 const struct nir_shader_compiler_options *options = 628 pscreen->get_compiler_options(pscreen, 629 PIPE_SHADER_IR_NIR, 630 PIPE_SHADER_FRAGMENT); 631 632 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 633 options, "%s", name); 634 const struct glsl_type *vec4 = glsl_vec4_type(); 635 636 const struct glsl_type *glsl_int = glsl_int_type(); 637 638 nir_variable *color_out = 639 nir_variable_create(b.shader, nir_var_shader_out, 640 vec4, "f_color"); 641 color_out->data.location = FRAG_RESULT_COLOR; 642 643 nir_variable *pos_in = 644 nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); 645 pos_in->data.location = VARYING_SLOT_POS; 646 nir_ssa_def *pos = nir_load_var(&b, pos_in); 647 648 nir_ssa_def *zero = nir_imm_int(&b, 0); 649 nir_ssa_def *one = nir_imm_int(&b, 1); 650 nir_ssa_def *two = nir_imm_int(&b, 2); 651 nir_ssa_def *six = nir_imm_int(&b, 6); 652 nir_ssa_def *seven = nir_imm_int(&b, 7); 653 nir_ssa_def *eight = nir_imm_int(&b, 8); 654 655 nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); 656 nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); 657 658 nir_variable *stride_in = 659 nir_variable_create(b.shader, nir_var_uniform, glsl_int, 660 "sand8_stride"); 661 nir_ssa_def *stride = 662 nir_load_uniform(&b, 1, 32, zero, 663 .base = stride_in->data.driver_location, 664 .range = 1, 665 .dest_type = nir_type_int32); 666 667 nir_ssa_def *x_offset; 668 nir_ssa_def *y_offset; 669 670 /* UIF tiled format is composed by UIF blocks, Each block has 671 * four 64 byte microtiles. Inside each microtile pixels are stored 672 * in raster format. But microtiles have different dimensions 673 * based in the bits per pixel of the image. 674 * 675 * 8bpp microtile dimensions are 8x8 676 * 16bpp microtile dimensions are 8x4 677 * 32bpp microtile dimensions are 4x4 678 * 679 * As we are reading and writing with 32bpp to optimize 680 * the number of texture operations during the blit. We need 681 * to adjust the offsets were we read and write as data will 682 * be later read using 8bpp (luma) and 16bpp (chroma). 683 * 684 * For chroma 8x4 16bpp raster order is compatible with 4x4 685 * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes. 686 * But luma 8x8 8bpp raster order is not compatible 687 * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has 688 * 16 bytes per line. So if we read a 8bpp texture that was 689 * written as 32bpp texture. Bytes would be misplaced. 690 * 691 * inter/intra_utile_x_offests takes care of mapping the offsets 692 * between microtiles to deal with this issue for luma planes. 693 */ 694 if (cpp == 1) { 695 nir_ssa_def *intra_utile_x_offset = 696 nir_ishl(&b, nir_iand_imm(&b, x, 1), two); 697 nir_ssa_def *inter_utile_x_offset = 698 nir_ishl(&b, nir_iand_imm(&b, x, 60), one); 699 nir_ssa_def *stripe_offset= 700 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6), 701 stride), 702 seven); 703 704 x_offset = nir_iadd(&b, stripe_offset, 705 nir_iadd(&b, intra_utile_x_offset, 706 inter_utile_x_offset)); 707 y_offset = nir_iadd(&b, 708 nir_ishl(&b, nir_iand_imm(&b, x, 2), six), 709 nir_ishl(&b, y, eight)); 710 } else { 711 nir_ssa_def *stripe_offset= 712 nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5), 713 stride), 714 seven); 715 x_offset = nir_iadd(&b, stripe_offset, 716 nir_ishl(&b, nir_iand_imm(&b, x, 31), two)); 717 y_offset = nir_ishl(&b, y, seven); 718 } 719 nir_ssa_def *ubo_offset = nir_iadd(&b, x_offset, y_offset); 720 nir_ssa_def *load = 721 nir_load_ubo(&b, 1, 32, one, ubo_offset, 722 .align_mul = 4, 723 .align_offset = 0, 724 .range_base = 0, 725 .range = ~0); 726 727 nir_ssa_def *output = nir_unpack_unorm_4x8(&b, load); 728 729 nir_store_var(&b, color_out, 730 output, 731 0xF); 732 733 struct pipe_shader_state shader_tmpl = { 734 .type = PIPE_SHADER_IR_NIR, 735 .ir.nir = b.shader, 736 }; 737 738 *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); 739 740 return *cached_shader; 741} 742 743/** 744 * Turns NV12 with SAND8 format modifier from raster-order with interleaved 745 * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma. 746 * 747 * This implementation is based on vc4_yuv_blit. 748 */ 749static void 750v3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 751{ 752 struct v3d_context *v3d = v3d_context(pctx); 753 struct v3d_resource *src = v3d_resource(info->src.resource); 754 ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource); 755 756 if (!src->sand_col128_stride) 757 return; 758 if (src->tiled) 759 return; 760 if (src->base.format != PIPE_FORMAT_R8_UNORM && 761 src->base.format != PIPE_FORMAT_R8G8_UNORM) 762 return; 763 if (!(info->mask & PIPE_MASK_RGBA)) 764 return; 765 766 assert(dst->base.format == src->base.format); 767 assert(dst->tiled); 768 769 assert(info->src.box.x == 0 && info->dst.box.x == 0); 770 assert(info->src.box.y == 0 && info->dst.box.y == 0); 771 assert(info->src.box.width == info->dst.box.width); 772 assert(info->src.box.height == info->dst.box.height); 773 774 v3d_blitter_save(v3d, true); 775 776 struct pipe_surface dst_tmpl; 777 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, 778 info->dst.level, info->dst.box.z); 779 /* Although the src textures are cpp=1 or cpp=2, the dst texture 780 * uses a cpp=4 dst texture. So, all read/write texture ops will 781 * be done using 32-bit read and writes. 782 */ 783 dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM; 784 struct pipe_surface *dst_surf = 785 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); 786 if (!dst_surf) { 787 fprintf(stderr, "Failed to create YUV dst surface\n"); 788 util_blitter_unset_running_flag(v3d->blitter); 789 return; 790 } 791 792 uint32_t sand8_stride = src->sand_col128_stride; 793 794 /* Adjust the dimensions of dst luma/chroma to match src 795 * size now we are using a cpp=4 format. Next dimension take into 796 * account the UIF microtile layouts. 797 */ 798 dst_surf->width = align(dst_surf->width, 8) / 2; 799 if (src->cpp == 1) 800 dst_surf->height /= 2; 801 802 /* Set the constant buffer. */ 803 struct pipe_constant_buffer cb_uniforms = { 804 .user_buffer = &sand8_stride, 805 .buffer_size = sizeof(sand8_stride), 806 }; 807 808 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, 809 &cb_uniforms); 810 struct pipe_constant_buffer cb_src = { 811 .buffer = info->src.resource, 812 .buffer_offset = src->slices[info->src.level].offset, 813 .buffer_size = (src->bo->size - 814 src->slices[info->src.level].offset), 815 }; 816 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 2, false, 817 &cb_src); 818 /* Unbind the textures, to make sure we don't try to recurse into the 819 * shadow blit. 820 */ 821 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL); 822 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); 823 824 util_blitter_custom_shader(v3d->blitter, dst_surf, 825 v3d_get_sand8_vs(pctx), 826 v3d_get_sand8_fs(pctx, src->cpp)); 827 828 util_blitter_restore_textures(v3d->blitter); 829 util_blitter_restore_constant_buffer_state(v3d->blitter); 830 831 /* Restore cb1 (util_blitter doesn't handle this one). */ 832 struct pipe_constant_buffer cb_disabled = { 0 }; 833 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, 834 &cb_disabled); 835 836 pipe_surface_reference(&dst_surf, NULL); 837 838 info->mask &= ~PIPE_MASK_RGBA; 839 return; 840} 841 842 843/* Optimal hardware path for blitting pixels. 844 * Scaling, format conversion, up- and downsampling (resolve) are allowed. 845 */ 846void 847v3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 848{ 849 struct v3d_context *v3d = v3d_context(pctx); 850 struct pipe_blit_info info = *blit_info; 851 852 v3d_sand8_blit(pctx, &info); 853 854 v3d_tfu_blit(pctx, &info); 855 856 v3d_tlb_blit(pctx, &info); 857 858 v3d_stencil_blit(pctx, &info); 859 860 v3d_render_blit(pctx, &info); 861 862 /* Flush our blit jobs immediately. They're unlikely to get reused by 863 * normal drawing or other blits, and without flushing we can easily 864 * run into unexpected OOMs when blits are used for a large series of 865 * texture uploads before using the textures. 866 */ 867 v3d_flush_jobs_writing_resource(v3d, info.dst.resource, 868 V3D_FLUSH_DEFAULT, false); 869} 870