1/* 2 * Copyright © 2015 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "util/format/u_format.h" 25#include "util/u_surface.h" 26#include "util/u_blitter.h" 27#include "compiler/nir/nir_builder.h" 28#include "vc4_context.h" 29 30static struct pipe_surface * 31vc4_get_blit_surface(struct pipe_context *pctx, 32 struct pipe_resource *prsc, unsigned level) 33{ 34 struct pipe_surface tmpl; 35 36 memset(&tmpl, 0, sizeof(tmpl)); 37 tmpl.format = prsc->format; 38 tmpl.u.tex.level = level; 39 tmpl.u.tex.first_layer = 0; 40 tmpl.u.tex.last_layer = 0; 41 42 return pctx->create_surface(pctx, prsc, &tmpl); 43} 44 45static bool 46is_tile_unaligned(unsigned size, unsigned tile_size) 47{ 48 return size & (tile_size - 1); 49} 50 51static bool 52vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) 53{ 54 struct vc4_context *vc4 = vc4_context(pctx); 55 bool msaa = (info->src.resource->nr_samples > 1 || 56 info->dst.resource->nr_samples > 1); 57 int tile_width = msaa ? 32 : 64; 58 int tile_height = msaa ? 32 : 64; 59 60 if (util_format_is_depth_or_stencil(info->dst.resource->format)) 61 return false; 62 63 if (info->scissor_enable) 64 return false; 65 66 if ((info->mask & PIPE_MASK_RGBA) == 0) 67 return false; 68 69 if (info->dst.box.x != info->src.box.x || 70 info->dst.box.y != info->src.box.y || 71 info->dst.box.width != info->src.box.width || 72 info->dst.box.height != info->src.box.height) { 73 return false; 74 } 75 76 int dst_surface_width = u_minify(info->dst.resource->width0, 77 info->dst.level); 78 int dst_surface_height = u_minify(info->dst.resource->height0, 79 info->dst.level); 80 if (is_tile_unaligned(info->dst.box.x, tile_width) || 81 is_tile_unaligned(info->dst.box.y, tile_height) || 82 (is_tile_unaligned(info->dst.box.width, tile_width) && 83 info->dst.box.x + info->dst.box.width != dst_surface_width) || 84 (is_tile_unaligned(info->dst.box.height, tile_height) && 85 info->dst.box.y + info->dst.box.height != dst_surface_height)) { 86 return false; 87 } 88 89 /* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL uses the 90 * VC4_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our 91 * destination surface) to determine the stride. This may be wrong 92 * when reading from texture miplevels > 0, which are stored in 93 * POT-sized areas. For MSAA, the tile addresses are computed 94 * explicitly by the RCL, but still use the destination width to 95 * determine the stride (which could be fixed by explicitly supplying 96 * it in the ABI). 97 */ 98 struct vc4_resource *rsc = vc4_resource(info->src.resource); 99 100 uint32_t stride; 101 102 if (info->src.resource->nr_samples > 1) 103 stride = align(dst_surface_width, 32) * 4 * rsc->cpp; 104 else if (rsc->slices[info->src.level].tiling == VC4_TILING_FORMAT_T) 105 stride = align(dst_surface_width * rsc->cpp, 128); 106 else 107 stride = align(dst_surface_width * rsc->cpp, 16); 108 109 if (stride != rsc->slices[info->src.level].stride) 110 return false; 111 112 if (info->dst.resource->format != info->src.resource->format) 113 return false; 114 115 if (false) { 116 fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n", 117 info->src.box.x, 118 info->src.box.y, 119 info->dst.box.x, 120 info->dst.box.y, 121 info->dst.box.width, 122 info->dst.box.height); 123 } 124 125 struct pipe_surface *dst_surf = 126 vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level); 127 struct pipe_surface *src_surf = 128 vc4_get_blit_surface(pctx, info->src.resource, info->src.level); 129 130 vc4_flush_jobs_reading_resource(vc4, info->src.resource); 131 132 struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL); 133 pipe_surface_reference(&job->color_read, src_surf); 134 135 job->draw_min_x = info->dst.box.x; 136 job->draw_min_y = info->dst.box.y; 137 job->draw_max_x = info->dst.box.x + info->dst.box.width; 138 job->draw_max_y = info->dst.box.y + info->dst.box.height; 139 job->draw_width = dst_surf->width; 140 job->draw_height = dst_surf->height; 141 142 job->tile_width = tile_width; 143 job->tile_height = tile_height; 144 job->msaa = msaa; 145 job->needs_flush = true; 146 job->resolve |= PIPE_CLEAR_COLOR; 147 148 vc4_job_submit(vc4, job); 149 150 pipe_surface_reference(&dst_surf, NULL); 151 pipe_surface_reference(&src_surf, NULL); 152 153 return true; 154} 155 156void 157vc4_blitter_save(struct vc4_context *vc4) 158{ 159 util_blitter_save_fragment_constant_buffer_slot(vc4->blitter, 160 vc4->constbuf[PIPE_SHADER_FRAGMENT].cb); 161 util_blitter_save_vertex_buffer_slot(vc4->blitter, vc4->vertexbuf.vb); 162 util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx); 163 util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.bind_vs); 164 util_blitter_save_rasterizer(vc4->blitter, vc4->rasterizer); 165 util_blitter_save_viewport(vc4->blitter, &vc4->viewport); 166 util_blitter_save_scissor(vc4->blitter, &vc4->scissor); 167 util_blitter_save_fragment_shader(vc4->blitter, vc4->prog.bind_fs); 168 util_blitter_save_blend(vc4->blitter, vc4->blend); 169 util_blitter_save_depth_stencil_alpha(vc4->blitter, vc4->zsa); 170 util_blitter_save_stencil_ref(vc4->blitter, &vc4->stencil_ref); 171 util_blitter_save_sample_mask(vc4->blitter, vc4->sample_mask, 0); 172 util_blitter_save_framebuffer(vc4->blitter, &vc4->framebuffer); 173 util_blitter_save_fragment_sampler_states(vc4->blitter, 174 vc4->fragtex.num_samplers, 175 (void **)vc4->fragtex.samplers); 176 util_blitter_save_fragment_sampler_views(vc4->blitter, 177 vc4->fragtex.num_textures, vc4->fragtex.textures); 178} 179 180static void *vc4_get_yuv_vs(struct pipe_context *pctx) 181{ 182 struct vc4_context *vc4 = vc4_context(pctx); 183 struct pipe_screen *pscreen = pctx->screen; 184 185 if (vc4->yuv_linear_blit_vs) 186 return vc4->yuv_linear_blit_vs; 187 188 const struct nir_shader_compiler_options *options = 189 pscreen->get_compiler_options(pscreen, 190 PIPE_SHADER_IR_NIR, 191 PIPE_SHADER_VERTEX); 192 193 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options, 194 "linear_blit_vs"); 195 196 const struct glsl_type *vec4 = glsl_vec4_type(); 197 nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, 198 vec4, "pos"); 199 200 nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out, 201 vec4, "gl_Position"); 202 pos_out->data.location = VARYING_SLOT_POS; 203 204 nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); 205 206 struct pipe_shader_state shader_tmpl = { 207 .type = PIPE_SHADER_IR_NIR, 208 .ir.nir = b.shader, 209 }; 210 211 vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); 212 213 return vc4->yuv_linear_blit_vs; 214} 215 216static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp) 217{ 218 struct vc4_context *vc4 = vc4_context(pctx); 219 struct pipe_screen *pscreen = pctx->screen; 220 struct pipe_shader_state **cached_shader; 221 const char *name; 222 223 if (cpp == 1) { 224 cached_shader = &vc4->yuv_linear_blit_fs_8bit; 225 name = "linear_blit_8bit_fs"; 226 } else { 227 cached_shader = &vc4->yuv_linear_blit_fs_16bit; 228 name = "linear_blit_16bit_fs"; 229 } 230 231 if (*cached_shader) 232 return *cached_shader; 233 234 const struct nir_shader_compiler_options *options = 235 pscreen->get_compiler_options(pscreen, 236 PIPE_SHADER_IR_NIR, 237 PIPE_SHADER_FRAGMENT); 238 239 nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 240 options, "%s", name); 241 242 const struct glsl_type *vec4 = glsl_vec4_type(); 243 const struct glsl_type *glsl_int = glsl_int_type(); 244 245 nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out, 246 vec4, "f_color"); 247 color_out->data.location = FRAG_RESULT_COLOR; 248 249 nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in, 250 vec4, "pos"); 251 pos_in->data.location = VARYING_SLOT_POS; 252 nir_ssa_def *pos = nir_load_var(&b, pos_in); 253 254 nir_ssa_def *one = nir_imm_int(&b, 1); 255 nir_ssa_def *two = nir_imm_int(&b, 2); 256 257 nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); 258 nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); 259 260 nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform, 261 glsl_int, "stride"); 262 nir_ssa_def *stride = nir_load_var(&b, stride_in); 263 264 nir_ssa_def *x_offset; 265 nir_ssa_def *y_offset; 266 if (cpp == 1) { 267 nir_ssa_def *intra_utile_x_offset = 268 nir_ishl(&b, nir_iand(&b, x, one), two); 269 nir_ssa_def *inter_utile_x_offset = 270 nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one); 271 272 x_offset = nir_iadd(&b, 273 intra_utile_x_offset, 274 inter_utile_x_offset); 275 y_offset = nir_imul(&b, 276 nir_iadd(&b, 277 nir_ishl(&b, y, one), 278 nir_ushr(&b, nir_iand(&b, x, two), one)), 279 stride); 280 } else { 281 x_offset = nir_ishl(&b, x, two); 282 y_offset = nir_imul(&b, y, stride); 283 } 284 285 nir_ssa_def *load = 286 nir_load_ubo(&b, 1, 32, one, nir_iadd(&b, x_offset, y_offset), 287 .align_mul = 4, 288 .align_offset = 0, 289 .range_base = 0, 290 .range = ~0); 291 292 nir_store_var(&b, color_out, 293 nir_unpack_unorm_4x8(&b, load), 294 0xf); 295 296 struct pipe_shader_state shader_tmpl = { 297 .type = PIPE_SHADER_IR_NIR, 298 .ir.nir = b.shader, 299 }; 300 301 *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); 302 303 return *cached_shader; 304} 305 306static bool 307vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info) 308{ 309 struct vc4_context *vc4 = vc4_context(pctx); 310 struct vc4_resource *src = vc4_resource(info->src.resource); 311 struct vc4_resource *dst = vc4_resource(info->dst.resource); 312 bool ok; 313 314 if (src->tiled) 315 return false; 316 if (src->base.format != PIPE_FORMAT_R8_UNORM && 317 src->base.format != PIPE_FORMAT_R8G8_UNORM) 318 return false; 319 320 /* YUV blits always turn raster-order to tiled */ 321 assert(dst->base.format == src->base.format); 322 assert(dst->tiled); 323 324 /* Always 1:1 and at the origin */ 325 assert(info->src.box.x == 0 && info->dst.box.x == 0); 326 assert(info->src.box.y == 0 && info->dst.box.y == 0); 327 assert(info->src.box.width == info->dst.box.width); 328 assert(info->src.box.height == info->dst.box.height); 329 330 if ((src->slices[info->src.level].offset & 3) || 331 (src->slices[info->src.level].stride & 3)) { 332 perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n", 333 src->slices[info->src.level].offset, 334 src->slices[info->src.level].stride); 335 goto fallback; 336 } 337 338 vc4_blitter_save(vc4); 339 340 /* Create a renderable surface mapping the T-tiled shadow buffer. 341 */ 342 struct pipe_surface dst_tmpl; 343 util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, 344 info->dst.level, info->dst.box.z); 345 dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM; 346 struct pipe_surface *dst_surf = 347 pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); 348 if (!dst_surf) { 349 fprintf(stderr, "Failed to create YUV dst surface\n"); 350 util_blitter_unset_running_flag(vc4->blitter); 351 return false; 352 } 353 dst_surf->width = align(dst_surf->width, 8) / 2; 354 if (dst->cpp == 1) 355 dst_surf->height /= 2; 356 357 /* Set the constant buffer. */ 358 uint32_t stride = src->slices[info->src.level].stride; 359 struct pipe_constant_buffer cb_uniforms = { 360 .user_buffer = &stride, 361 .buffer_size = sizeof(stride), 362 }; 363 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, &cb_uniforms); 364 struct pipe_constant_buffer cb_src = { 365 .buffer = info->src.resource, 366 .buffer_offset = src->slices[info->src.level].offset, 367 .buffer_size = (src->bo->size - 368 src->slices[info->src.level].offset), 369 }; 370 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, &cb_src); 371 372 /* Unbind the textures, to make sure we don't try to recurse into the 373 * shadow blit. 374 */ 375 pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL); 376 pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); 377 378 util_blitter_custom_shader(vc4->blitter, dst_surf, 379 vc4_get_yuv_vs(pctx), 380 vc4_get_yuv_fs(pctx, src->cpp)); 381 382 util_blitter_restore_textures(vc4->blitter); 383 util_blitter_restore_constant_buffer_state(vc4->blitter); 384 /* Restore cb1 (util_blitter doesn't handle this one). */ 385 struct pipe_constant_buffer cb_disabled = { 0 }; 386 pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, &cb_disabled); 387 388 pipe_surface_reference(&dst_surf, NULL); 389 390 return true; 391 392fallback: 393 /* Do an immediate SW fallback, since the render blit path 394 * would just recurse. 395 */ 396 ok = util_try_blit_via_copy_region(pctx, info, false); 397 assert(ok); (void)ok; 398 399 return true; 400} 401 402static bool 403vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 404{ 405 struct vc4_context *vc4 = vc4_context(ctx); 406 407 if (!util_blitter_is_blit_supported(vc4->blitter, info)) { 408 fprintf(stderr, "blit unsupported %s -> %s\n", 409 util_format_short_name(info->src.resource->format), 410 util_format_short_name(info->dst.resource->format)); 411 return false; 412 } 413 414 /* Enable the scissor, so we get a minimal set of tiles rendered. */ 415 if (!info->scissor_enable) { 416 info->scissor_enable = true; 417 info->scissor.minx = info->dst.box.x; 418 info->scissor.miny = info->dst.box.y; 419 info->scissor.maxx = info->dst.box.x + info->dst.box.width; 420 info->scissor.maxy = info->dst.box.y + info->dst.box.height; 421 } 422 423 vc4_blitter_save(vc4); 424 util_blitter_blit(vc4->blitter, info); 425 426 return true; 427} 428 429/* Optimal hardware path for blitting pixels. 430 * Scaling, format conversion, up- and downsampling (resolve) are allowed. 431 */ 432void 433vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 434{ 435 struct pipe_blit_info info = *blit_info; 436 437 if (vc4_yuv_blit(pctx, blit_info)) 438 return; 439 440 if (vc4_tile_blit(pctx, blit_info)) 441 return; 442 443 if (info.mask & PIPE_MASK_S) { 444 if (util_try_blit_via_copy_region(pctx, &info, false)) 445 return; 446 447 info.mask &= ~PIPE_MASK_S; 448 fprintf(stderr, "cannot blit stencil, skipping\n"); 449 } 450 451 if (vc4_render_blit(pctx, &info)) 452 return; 453 454 fprintf(stderr, "Unsupported blit\n"); 455} 456