1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2015-2017 Broadcom 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "util/format/u_format.h" 25bf215546Sopenharmony_ci#include "util/u_surface.h" 26bf215546Sopenharmony_ci#include "util/u_blitter.h" 27bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 28bf215546Sopenharmony_ci#include "v3d_context.h" 29bf215546Sopenharmony_ci#include "broadcom/common/v3d_tiling.h" 30bf215546Sopenharmony_ci#include "broadcom/common/v3d_tfu.h" 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_ci/** 33bf215546Sopenharmony_ci * The param @op_blit is used to tell if we are saving state for blitter_blit 34bf215546Sopenharmony_ci * (if true) or blitter_clear (if false). If other blitter functions are used 35bf215546Sopenharmony_ci * that require different state we may need something more elaborated than 36bf215546Sopenharmony_ci * this. 37bf215546Sopenharmony_ci */ 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_civoid 40bf215546Sopenharmony_civ3d_blitter_save(struct v3d_context *v3d, bool op_blit) 41bf215546Sopenharmony_ci{ 42bf215546Sopenharmony_ci util_blitter_save_fragment_constant_buffer_slot(v3d->blitter, 43bf215546Sopenharmony_ci v3d->constbuf[PIPE_SHADER_FRAGMENT].cb); 44bf215546Sopenharmony_ci util_blitter_save_vertex_buffer_slot(v3d->blitter, v3d->vertexbuf.vb); 45bf215546Sopenharmony_ci util_blitter_save_vertex_elements(v3d->blitter, v3d->vtx); 46bf215546Sopenharmony_ci util_blitter_save_vertex_shader(v3d->blitter, v3d->prog.bind_vs); 47bf215546Sopenharmony_ci util_blitter_save_geometry_shader(v3d->blitter, v3d->prog.bind_gs); 48bf215546Sopenharmony_ci util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 49bf215546Sopenharmony_ci v3d->streamout.targets); 50bf215546Sopenharmony_ci util_blitter_save_rasterizer(v3d->blitter, v3d->rasterizer); 51bf215546Sopenharmony_ci util_blitter_save_viewport(v3d->blitter, &v3d->viewport); 52bf215546Sopenharmony_ci util_blitter_save_fragment_shader(v3d->blitter, v3d->prog.bind_fs); 53bf215546Sopenharmony_ci util_blitter_save_blend(v3d->blitter, v3d->blend); 54bf215546Sopenharmony_ci util_blitter_save_depth_stencil_alpha(v3d->blitter, v3d->zsa); 55bf215546Sopenharmony_ci util_blitter_save_stencil_ref(v3d->blitter, &v3d->stencil_ref); 56bf215546Sopenharmony_ci util_blitter_save_sample_mask(v3d->blitter, v3d->sample_mask, 0); 57bf215546Sopenharmony_ci util_blitter_save_so_targets(v3d->blitter, v3d->streamout.num_targets, 58bf215546Sopenharmony_ci v3d->streamout.targets); 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci if (op_blit) { 61bf215546Sopenharmony_ci util_blitter_save_scissor(v3d->blitter, &v3d->scissor); 62bf215546Sopenharmony_ci util_blitter_save_framebuffer(v3d->blitter, &v3d->framebuffer); 63bf215546Sopenharmony_ci util_blitter_save_fragment_sampler_states(v3d->blitter, 64bf215546Sopenharmony_ci v3d->tex[PIPE_SHADER_FRAGMENT].num_samplers, 65bf215546Sopenharmony_ci (void **)v3d->tex[PIPE_SHADER_FRAGMENT].samplers); 66bf215546Sopenharmony_ci util_blitter_save_fragment_sampler_views(v3d->blitter, 67bf215546Sopenharmony_ci v3d->tex[PIPE_SHADER_FRAGMENT].num_textures, 68bf215546Sopenharmony_ci v3d->tex[PIPE_SHADER_FRAGMENT].textures); 69bf215546Sopenharmony_ci } 70bf215546Sopenharmony_ci} 71bf215546Sopenharmony_ci 72bf215546Sopenharmony_cistatic void 73bf215546Sopenharmony_civ3d_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 74bf215546Sopenharmony_ci{ 75bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(ctx); 76bf215546Sopenharmony_ci struct v3d_resource *src = v3d_resource(info->src.resource); 77bf215546Sopenharmony_ci struct pipe_resource *tiled = NULL; 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci if (!info->mask) 80bf215546Sopenharmony_ci return; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_ci if (!src->tiled && 83bf215546Sopenharmony_ci info->src.resource->target != PIPE_TEXTURE_1D && 84bf215546Sopenharmony_ci info->src.resource->target != PIPE_TEXTURE_1D_ARRAY) { 85bf215546Sopenharmony_ci struct pipe_box box = { 86bf215546Sopenharmony_ci .x = 0, 87bf215546Sopenharmony_ci .y = 0, 88bf215546Sopenharmony_ci .width = u_minify(info->src.resource->width0, 89bf215546Sopenharmony_ci info->src.level), 90bf215546Sopenharmony_ci .height = u_minify(info->src.resource->height0, 91bf215546Sopenharmony_ci info->src.level), 92bf215546Sopenharmony_ci .depth = 1, 93bf215546Sopenharmony_ci }; 94bf215546Sopenharmony_ci struct pipe_resource tmpl = { 95bf215546Sopenharmony_ci .target = info->src.resource->target, 96bf215546Sopenharmony_ci .format = info->src.resource->format, 97bf215546Sopenharmony_ci .width0 = box.width, 98bf215546Sopenharmony_ci .height0 = box.height, 99bf215546Sopenharmony_ci .depth0 = 1, 100bf215546Sopenharmony_ci .array_size = 1, 101bf215546Sopenharmony_ci }; 102bf215546Sopenharmony_ci tiled = ctx->screen->resource_create(ctx->screen, &tmpl); 103bf215546Sopenharmony_ci if (!tiled) { 104bf215546Sopenharmony_ci fprintf(stderr, "Failed to create tiled blit temp\n"); 105bf215546Sopenharmony_ci return; 106bf215546Sopenharmony_ci } 107bf215546Sopenharmony_ci ctx->resource_copy_region(ctx, 108bf215546Sopenharmony_ci tiled, 0, 109bf215546Sopenharmony_ci 0, 0, 0, 110bf215546Sopenharmony_ci info->src.resource, info->src.level, 111bf215546Sopenharmony_ci &box); 112bf215546Sopenharmony_ci info->src.level = 0; 113bf215546Sopenharmony_ci info->src.resource = tiled; 114bf215546Sopenharmony_ci } 115bf215546Sopenharmony_ci 116bf215546Sopenharmony_ci if (!util_blitter_is_blit_supported(v3d->blitter, info)) { 117bf215546Sopenharmony_ci fprintf(stderr, "blit unsupported %s -> %s\n", 118bf215546Sopenharmony_ci util_format_short_name(info->src.format), 119bf215546Sopenharmony_ci util_format_short_name(info->dst.format)); 120bf215546Sopenharmony_ci return; 121bf215546Sopenharmony_ci } 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci v3d_blitter_save(v3d, true); 124bf215546Sopenharmony_ci util_blitter_blit(v3d->blitter, info); 125bf215546Sopenharmony_ci 126bf215546Sopenharmony_ci pipe_resource_reference(&tiled, NULL); 127bf215546Sopenharmony_ci info->mask = 0; 128bf215546Sopenharmony_ci} 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci/* Implement stencil blits by reinterpreting the stencil data as an RGBA8888 131bf215546Sopenharmony_ci * or R8 texture. 132bf215546Sopenharmony_ci */ 133bf215546Sopenharmony_cistatic void 134bf215546Sopenharmony_civ3d_stencil_blit(struct pipe_context *ctx, struct pipe_blit_info *info) 135bf215546Sopenharmony_ci{ 136bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(ctx); 137bf215546Sopenharmony_ci struct v3d_resource *src = v3d_resource(info->src.resource); 138bf215546Sopenharmony_ci struct v3d_resource *dst = v3d_resource(info->dst.resource); 139bf215546Sopenharmony_ci enum pipe_format src_format, dst_format; 140bf215546Sopenharmony_ci 141bf215546Sopenharmony_ci if ((info->mask & PIPE_MASK_S) == 0) 142bf215546Sopenharmony_ci return; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci if (src->separate_stencil) { 145bf215546Sopenharmony_ci src = src->separate_stencil; 146bf215546Sopenharmony_ci src_format = PIPE_FORMAT_R8_UINT; 147bf215546Sopenharmony_ci } else { 148bf215546Sopenharmony_ci src_format = PIPE_FORMAT_RGBA8888_UINT; 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci if (dst->separate_stencil) { 152bf215546Sopenharmony_ci dst = dst->separate_stencil; 153bf215546Sopenharmony_ci dst_format = PIPE_FORMAT_R8_UINT; 154bf215546Sopenharmony_ci } else { 155bf215546Sopenharmony_ci dst_format = PIPE_FORMAT_RGBA8888_UINT; 156bf215546Sopenharmony_ci } 157bf215546Sopenharmony_ci 158bf215546Sopenharmony_ci /* Initialize the surface. */ 159bf215546Sopenharmony_ci struct pipe_surface dst_tmpl = { 160bf215546Sopenharmony_ci .u.tex = { 161bf215546Sopenharmony_ci .level = info->dst.level, 162bf215546Sopenharmony_ci .first_layer = info->dst.box.z, 163bf215546Sopenharmony_ci .last_layer = info->dst.box.z, 164bf215546Sopenharmony_ci }, 165bf215546Sopenharmony_ci .format = dst_format, 166bf215546Sopenharmony_ci }; 167bf215546Sopenharmony_ci struct pipe_surface *dst_surf = 168bf215546Sopenharmony_ci ctx->create_surface(ctx, &dst->base, &dst_tmpl); 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci /* Initialize the sampler view. */ 171bf215546Sopenharmony_ci struct pipe_sampler_view src_tmpl = { 172bf215546Sopenharmony_ci .target = src->base.target, 173bf215546Sopenharmony_ci .format = src_format, 174bf215546Sopenharmony_ci .u.tex = { 175bf215546Sopenharmony_ci .first_level = info->src.level, 176bf215546Sopenharmony_ci .last_level = info->src.level, 177bf215546Sopenharmony_ci .first_layer = 0, 178bf215546Sopenharmony_ci .last_layer = (PIPE_TEXTURE_3D ? 179bf215546Sopenharmony_ci u_minify(src->base.depth0, 180bf215546Sopenharmony_ci info->src.level) - 1 : 181bf215546Sopenharmony_ci src->base.array_size - 1), 182bf215546Sopenharmony_ci }, 183bf215546Sopenharmony_ci .swizzle_r = PIPE_SWIZZLE_X, 184bf215546Sopenharmony_ci .swizzle_g = PIPE_SWIZZLE_Y, 185bf215546Sopenharmony_ci .swizzle_b = PIPE_SWIZZLE_Z, 186bf215546Sopenharmony_ci .swizzle_a = PIPE_SWIZZLE_W, 187bf215546Sopenharmony_ci }; 188bf215546Sopenharmony_ci struct pipe_sampler_view *src_view = 189bf215546Sopenharmony_ci ctx->create_sampler_view(ctx, &src->base, &src_tmpl); 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci v3d_blitter_save(v3d, true); 192bf215546Sopenharmony_ci util_blitter_blit_generic(v3d->blitter, dst_surf, &info->dst.box, 193bf215546Sopenharmony_ci src_view, &info->src.box, 194bf215546Sopenharmony_ci src->base.width0, src->base.height0, 195bf215546Sopenharmony_ci PIPE_MASK_R, 196bf215546Sopenharmony_ci PIPE_TEX_FILTER_NEAREST, 197bf215546Sopenharmony_ci info->scissor_enable ? &info->scissor : NULL, 198bf215546Sopenharmony_ci info->alpha_blend, false, 0); 199bf215546Sopenharmony_ci 200bf215546Sopenharmony_ci pipe_surface_reference(&dst_surf, NULL); 201bf215546Sopenharmony_ci pipe_sampler_view_reference(&src_view, NULL); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci info->mask &= ~PIPE_MASK_S; 204bf215546Sopenharmony_ci} 205bf215546Sopenharmony_ci 206bf215546Sopenharmony_cistatic bool 207bf215546Sopenharmony_civ3d_tfu(struct pipe_context *pctx, 208bf215546Sopenharmony_ci struct pipe_resource *pdst, 209bf215546Sopenharmony_ci struct pipe_resource *psrc, 210bf215546Sopenharmony_ci unsigned int src_level, 211bf215546Sopenharmony_ci unsigned int base_level, 212bf215546Sopenharmony_ci unsigned int last_level, 213bf215546Sopenharmony_ci unsigned int src_layer, 214bf215546Sopenharmony_ci unsigned int dst_layer, 215bf215546Sopenharmony_ci bool for_mipmap) 216bf215546Sopenharmony_ci{ 217bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(pctx); 218bf215546Sopenharmony_ci struct v3d_screen *screen = v3d->screen; 219bf215546Sopenharmony_ci struct v3d_resource *src = v3d_resource(psrc); 220bf215546Sopenharmony_ci struct v3d_resource *dst = v3d_resource(pdst); 221bf215546Sopenharmony_ci struct v3d_resource_slice *src_base_slice = &src->slices[src_level]; 222bf215546Sopenharmony_ci struct v3d_resource_slice *dst_base_slice = &dst->slices[base_level]; 223bf215546Sopenharmony_ci int msaa_scale = pdst->nr_samples > 1 ? 2 : 1; 224bf215546Sopenharmony_ci int width = u_minify(pdst->width0, base_level) * msaa_scale; 225bf215546Sopenharmony_ci int height = u_minify(pdst->height0, base_level) * msaa_scale; 226bf215546Sopenharmony_ci enum pipe_format pformat; 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci if (psrc->format != pdst->format) 229bf215546Sopenharmony_ci return false; 230bf215546Sopenharmony_ci if (psrc->nr_samples != pdst->nr_samples) 231bf215546Sopenharmony_ci return false; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci /* Can't write to raster. */ 234bf215546Sopenharmony_ci if (dst_base_slice->tiling == V3D_TILING_RASTER) 235bf215546Sopenharmony_ci return false; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci /* When using TFU for blit, we are doing exact copies (both input and 238bf215546Sopenharmony_ci * output format must be the same, no scaling, etc), so there is no 239bf215546Sopenharmony_ci * pixel format conversions. Thus we can rewrite the format to use one 240bf215546Sopenharmony_ci * that is TFU compatible based on its texel size. 241bf215546Sopenharmony_ci */ 242bf215546Sopenharmony_ci if (for_mipmap) { 243bf215546Sopenharmony_ci pformat = pdst->format; 244bf215546Sopenharmony_ci } else { 245bf215546Sopenharmony_ci switch (dst->cpp) { 246bf215546Sopenharmony_ci case 16: pformat = PIPE_FORMAT_R32G32B32A32_FLOAT; break; 247bf215546Sopenharmony_ci case 8: pformat = PIPE_FORMAT_R16G16B16A16_FLOAT; break; 248bf215546Sopenharmony_ci case 4: pformat = PIPE_FORMAT_R32_FLOAT; break; 249bf215546Sopenharmony_ci case 2: pformat = PIPE_FORMAT_R16_FLOAT; break; 250bf215546Sopenharmony_ci case 1: pformat = PIPE_FORMAT_R8_UNORM; break; 251bf215546Sopenharmony_ci default: unreachable("unsupported format bit-size"); break; 252bf215546Sopenharmony_ci }; 253bf215546Sopenharmony_ci } 254bf215546Sopenharmony_ci 255bf215546Sopenharmony_ci uint32_t tex_format = v3d_get_tex_format(&screen->devinfo, pformat); 256bf215546Sopenharmony_ci 257bf215546Sopenharmony_ci if (!v3d_tfu_supports_tex_format(&screen->devinfo, tex_format, for_mipmap)) { 258bf215546Sopenharmony_ci assert(for_mipmap); 259bf215546Sopenharmony_ci return false; 260bf215546Sopenharmony_ci } 261bf215546Sopenharmony_ci 262bf215546Sopenharmony_ci v3d_flush_jobs_writing_resource(v3d, psrc, V3D_FLUSH_DEFAULT, false); 263bf215546Sopenharmony_ci v3d_flush_jobs_reading_resource(v3d, pdst, V3D_FLUSH_DEFAULT, false); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci struct drm_v3d_submit_tfu tfu = { 266bf215546Sopenharmony_ci .ios = (height << 16) | width, 267bf215546Sopenharmony_ci .bo_handles = { 268bf215546Sopenharmony_ci dst->bo->handle, 269bf215546Sopenharmony_ci src != dst ? src->bo->handle : 0 270bf215546Sopenharmony_ci }, 271bf215546Sopenharmony_ci .in_sync = v3d->out_sync, 272bf215546Sopenharmony_ci .out_sync = v3d->out_sync, 273bf215546Sopenharmony_ci }; 274bf215546Sopenharmony_ci uint32_t src_offset = (src->bo->offset + 275bf215546Sopenharmony_ci v3d_layer_offset(psrc, src_level, src_layer)); 276bf215546Sopenharmony_ci tfu.iia |= src_offset; 277bf215546Sopenharmony_ci if (src_base_slice->tiling == V3D_TILING_RASTER) { 278bf215546Sopenharmony_ci tfu.icfg |= (V3D33_TFU_ICFG_FORMAT_RASTER << 279bf215546Sopenharmony_ci V3D33_TFU_ICFG_FORMAT_SHIFT); 280bf215546Sopenharmony_ci } else { 281bf215546Sopenharmony_ci tfu.icfg |= ((V3D33_TFU_ICFG_FORMAT_LINEARTILE + 282bf215546Sopenharmony_ci (src_base_slice->tiling - V3D_TILING_LINEARTILE)) << 283bf215546Sopenharmony_ci V3D33_TFU_ICFG_FORMAT_SHIFT); 284bf215546Sopenharmony_ci } 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci uint32_t dst_offset = (dst->bo->offset + 287bf215546Sopenharmony_ci v3d_layer_offset(pdst, base_level, dst_layer)); 288bf215546Sopenharmony_ci tfu.ioa |= dst_offset; 289bf215546Sopenharmony_ci if (last_level != base_level) 290bf215546Sopenharmony_ci tfu.ioa |= V3D33_TFU_IOA_DIMTW; 291bf215546Sopenharmony_ci tfu.ioa |= ((V3D33_TFU_IOA_FORMAT_LINEARTILE + 292bf215546Sopenharmony_ci (dst_base_slice->tiling - V3D_TILING_LINEARTILE)) << 293bf215546Sopenharmony_ci V3D33_TFU_IOA_FORMAT_SHIFT); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci tfu.icfg |= tex_format << V3D33_TFU_ICFG_TTYPE_SHIFT; 296bf215546Sopenharmony_ci tfu.icfg |= (last_level - base_level) << V3D33_TFU_ICFG_NUMMM_SHIFT; 297bf215546Sopenharmony_ci 298bf215546Sopenharmony_ci switch (src_base_slice->tiling) { 299bf215546Sopenharmony_ci case V3D_TILING_UIF_NO_XOR: 300bf215546Sopenharmony_ci case V3D_TILING_UIF_XOR: 301bf215546Sopenharmony_ci tfu.iis |= (src_base_slice->padded_height / 302bf215546Sopenharmony_ci (2 * v3d_utile_height(src->cpp))); 303bf215546Sopenharmony_ci break; 304bf215546Sopenharmony_ci case V3D_TILING_RASTER: 305bf215546Sopenharmony_ci tfu.iis |= src_base_slice->stride / src->cpp; 306bf215546Sopenharmony_ci break; 307bf215546Sopenharmony_ci case V3D_TILING_LINEARTILE: 308bf215546Sopenharmony_ci case V3D_TILING_UBLINEAR_1_COLUMN: 309bf215546Sopenharmony_ci case V3D_TILING_UBLINEAR_2_COLUMN: 310bf215546Sopenharmony_ci break; 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci 313bf215546Sopenharmony_ci /* If we're writing level 0 (!IOA_DIMTW), then we need to supply the 314bf215546Sopenharmony_ci * OPAD field for the destination (how many extra UIF blocks beyond 315bf215546Sopenharmony_ci * those necessary to cover the height). When filling mipmaps, the 316bf215546Sopenharmony_ci * miplevel 1+ tiling state is inferred. 317bf215546Sopenharmony_ci */ 318bf215546Sopenharmony_ci if (dst_base_slice->tiling == V3D_TILING_UIF_NO_XOR || 319bf215546Sopenharmony_ci dst_base_slice->tiling == V3D_TILING_UIF_XOR) { 320bf215546Sopenharmony_ci int uif_block_h = 2 * v3d_utile_height(dst->cpp); 321bf215546Sopenharmony_ci int implicit_padded_height = align(height, uif_block_h); 322bf215546Sopenharmony_ci 323bf215546Sopenharmony_ci tfu.icfg |= (((dst_base_slice->padded_height - 324bf215546Sopenharmony_ci implicit_padded_height) / uif_block_h) << 325bf215546Sopenharmony_ci V3D33_TFU_ICFG_OPAD_SHIFT); 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci int ret = v3d_ioctl(screen->fd, DRM_IOCTL_V3D_SUBMIT_TFU, &tfu); 329bf215546Sopenharmony_ci if (ret != 0) { 330bf215546Sopenharmony_ci fprintf(stderr, "Failed to submit TFU job: %d\n", ret); 331bf215546Sopenharmony_ci return false; 332bf215546Sopenharmony_ci } 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_ci dst->writes++; 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci return true; 337bf215546Sopenharmony_ci} 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_cibool 340bf215546Sopenharmony_civ3d_generate_mipmap(struct pipe_context *pctx, 341bf215546Sopenharmony_ci struct pipe_resource *prsc, 342bf215546Sopenharmony_ci enum pipe_format format, 343bf215546Sopenharmony_ci unsigned int base_level, 344bf215546Sopenharmony_ci unsigned int last_level, 345bf215546Sopenharmony_ci unsigned int first_layer, 346bf215546Sopenharmony_ci unsigned int last_layer) 347bf215546Sopenharmony_ci{ 348bf215546Sopenharmony_ci if (format != prsc->format) 349bf215546Sopenharmony_ci return false; 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci /* We could maybe support looping over layers for array textures, but 352bf215546Sopenharmony_ci * we definitely don't support 3D. 353bf215546Sopenharmony_ci */ 354bf215546Sopenharmony_ci if (first_layer != last_layer) 355bf215546Sopenharmony_ci return false; 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci return v3d_tfu(pctx, 358bf215546Sopenharmony_ci prsc, prsc, 359bf215546Sopenharmony_ci base_level, 360bf215546Sopenharmony_ci base_level, last_level, 361bf215546Sopenharmony_ci first_layer, first_layer, 362bf215546Sopenharmony_ci true); 363bf215546Sopenharmony_ci} 364bf215546Sopenharmony_ci 365bf215546Sopenharmony_cistatic void 366bf215546Sopenharmony_civ3d_tfu_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 367bf215546Sopenharmony_ci{ 368bf215546Sopenharmony_ci int dst_width = u_minify(info->dst.resource->width0, info->dst.level); 369bf215546Sopenharmony_ci int dst_height = u_minify(info->dst.resource->height0, info->dst.level); 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if ((info->mask & PIPE_MASK_RGBA) == 0) 372bf215546Sopenharmony_ci return; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci if (info->scissor_enable || 375bf215546Sopenharmony_ci info->dst.box.x != 0 || 376bf215546Sopenharmony_ci info->dst.box.y != 0 || 377bf215546Sopenharmony_ci info->dst.box.width != dst_width || 378bf215546Sopenharmony_ci info->dst.box.height != dst_height || 379bf215546Sopenharmony_ci info->dst.box.depth != 1 || 380bf215546Sopenharmony_ci info->src.box.x != 0 || 381bf215546Sopenharmony_ci info->src.box.y != 0 || 382bf215546Sopenharmony_ci info->src.box.width != info->dst.box.width || 383bf215546Sopenharmony_ci info->src.box.height != info->dst.box.height || 384bf215546Sopenharmony_ci info->src.box.depth != 1) { 385bf215546Sopenharmony_ci return; 386bf215546Sopenharmony_ci } 387bf215546Sopenharmony_ci 388bf215546Sopenharmony_ci if (info->dst.format != info->src.format) 389bf215546Sopenharmony_ci return; 390bf215546Sopenharmony_ci 391bf215546Sopenharmony_ci if (v3d_tfu(pctx, info->dst.resource, info->src.resource, 392bf215546Sopenharmony_ci info->src.level, 393bf215546Sopenharmony_ci info->dst.level, info->dst.level, 394bf215546Sopenharmony_ci info->src.box.z, info->dst.box.z, 395bf215546Sopenharmony_ci false)) { 396bf215546Sopenharmony_ci info->mask &= ~PIPE_MASK_RGBA; 397bf215546Sopenharmony_ci } 398bf215546Sopenharmony_ci} 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_cistatic struct pipe_surface * 401bf215546Sopenharmony_civ3d_get_blit_surface(struct pipe_context *pctx, 402bf215546Sopenharmony_ci struct pipe_resource *prsc, 403bf215546Sopenharmony_ci enum pipe_format format, 404bf215546Sopenharmony_ci unsigned level, 405bf215546Sopenharmony_ci int16_t layer) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci struct pipe_surface tmpl; 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci tmpl.format = format; 410bf215546Sopenharmony_ci tmpl.u.tex.level = level; 411bf215546Sopenharmony_ci tmpl.u.tex.first_layer = layer; 412bf215546Sopenharmony_ci tmpl.u.tex.last_layer = layer; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci return pctx->create_surface(pctx, prsc, &tmpl); 415bf215546Sopenharmony_ci} 416bf215546Sopenharmony_ci 417bf215546Sopenharmony_cistatic bool 418bf215546Sopenharmony_ciis_tile_unaligned(unsigned size, unsigned tile_size) 419bf215546Sopenharmony_ci{ 420bf215546Sopenharmony_ci return size & (tile_size - 1); 421bf215546Sopenharmony_ci} 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_cistatic void 424bf215546Sopenharmony_civ3d_tlb_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 425bf215546Sopenharmony_ci{ 426bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(pctx); 427bf215546Sopenharmony_ci struct v3d_screen *screen = v3d->screen; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci if (screen->devinfo.ver < 40 || !info->mask) 430bf215546Sopenharmony_ci return; 431bf215546Sopenharmony_ci 432bf215546Sopenharmony_ci bool is_color_blit = info->mask & PIPE_MASK_RGBA; 433bf215546Sopenharmony_ci bool is_depth_blit = info->mask & PIPE_MASK_Z; 434bf215546Sopenharmony_ci bool is_stencil_blit = info->mask & PIPE_MASK_S; 435bf215546Sopenharmony_ci 436bf215546Sopenharmony_ci /* We should receive either a depth/stencil blit, or color blit, but 437bf215546Sopenharmony_ci * not both. 438bf215546Sopenharmony_ci */ 439bf215546Sopenharmony_ci assert ((is_color_blit && !is_depth_blit && !is_stencil_blit) || 440bf215546Sopenharmony_ci (!is_color_blit && (is_depth_blit || is_stencil_blit))); 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci if (info->scissor_enable) 443bf215546Sopenharmony_ci return; 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ci if (info->src.box.x != info->dst.box.x || 446bf215546Sopenharmony_ci info->src.box.y != info->dst.box.y || 447bf215546Sopenharmony_ci info->src.box.width != info->dst.box.width || 448bf215546Sopenharmony_ci info->src.box.height != info->dst.box.height) 449bf215546Sopenharmony_ci return; 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci if (is_color_blit && 452bf215546Sopenharmony_ci util_format_is_depth_or_stencil(info->dst.format)) 453bf215546Sopenharmony_ci return; 454bf215546Sopenharmony_ci 455bf215546Sopenharmony_ci if (!v3d_rt_format_supported(&screen->devinfo, info->src.format)) 456bf215546Sopenharmony_ci return; 457bf215546Sopenharmony_ci 458bf215546Sopenharmony_ci if (v3d_get_rt_format(&screen->devinfo, info->src.format) != 459bf215546Sopenharmony_ci v3d_get_rt_format(&screen->devinfo, info->dst.format)) 460bf215546Sopenharmony_ci return; 461bf215546Sopenharmony_ci 462bf215546Sopenharmony_ci bool msaa = (info->src.resource->nr_samples > 1 || 463bf215546Sopenharmony_ci info->dst.resource->nr_samples > 1); 464bf215546Sopenharmony_ci bool is_msaa_resolve = (info->src.resource->nr_samples > 1 && 465bf215546Sopenharmony_ci info->dst.resource->nr_samples < 2); 466bf215546Sopenharmony_ci 467bf215546Sopenharmony_ci if (is_msaa_resolve && 468bf215546Sopenharmony_ci !v3d_format_supports_tlb_msaa_resolve(&screen->devinfo, info->src.format)) 469bf215546Sopenharmony_ci return; 470bf215546Sopenharmony_ci 471bf215546Sopenharmony_ci v3d_flush_jobs_writing_resource(v3d, info->src.resource, V3D_FLUSH_DEFAULT, false); 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_ci struct pipe_surface *dst_surf = 474bf215546Sopenharmony_ci v3d_get_blit_surface(pctx, info->dst.resource, info->dst.format, info->dst.level, info->dst.box.z); 475bf215546Sopenharmony_ci struct pipe_surface *src_surf = 476bf215546Sopenharmony_ci v3d_get_blit_surface(pctx, info->src.resource, info->src.format, info->src.level, info->src.box.z); 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci struct pipe_surface *surfaces[V3D_MAX_DRAW_BUFFERS] = { 0 }; 479bf215546Sopenharmony_ci if (is_color_blit) 480bf215546Sopenharmony_ci surfaces[0] = dst_surf; 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_ci bool double_buffer = 483bf215546Sopenharmony_ci unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !msaa; 484bf215546Sopenharmony_ci 485bf215546Sopenharmony_ci uint32_t tile_width, tile_height, max_bpp; 486bf215546Sopenharmony_ci v3d_get_tile_buffer_size(msaa, double_buffer, 487bf215546Sopenharmony_ci is_color_blit ? 1 : 0, surfaces, src_surf, 488bf215546Sopenharmony_ci &tile_width, &tile_height, &max_bpp); 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci int dst_surface_width = u_minify(info->dst.resource->width0, 491bf215546Sopenharmony_ci info->dst.level); 492bf215546Sopenharmony_ci int dst_surface_height = u_minify(info->dst.resource->height0, 493bf215546Sopenharmony_ci info->dst.level); 494bf215546Sopenharmony_ci if (is_tile_unaligned(info->dst.box.x, tile_width) || 495bf215546Sopenharmony_ci is_tile_unaligned(info->dst.box.y, tile_height) || 496bf215546Sopenharmony_ci (is_tile_unaligned(info->dst.box.width, tile_width) && 497bf215546Sopenharmony_ci info->dst.box.x + info->dst.box.width != dst_surface_width) || 498bf215546Sopenharmony_ci (is_tile_unaligned(info->dst.box.height, tile_height) && 499bf215546Sopenharmony_ci info->dst.box.y + info->dst.box.height != dst_surface_height)) { 500bf215546Sopenharmony_ci pipe_surface_reference(&dst_surf, NULL); 501bf215546Sopenharmony_ci pipe_surface_reference(&src_surf, NULL); 502bf215546Sopenharmony_ci return; 503bf215546Sopenharmony_ci } 504bf215546Sopenharmony_ci 505bf215546Sopenharmony_ci struct v3d_job *job = v3d_get_job(v3d, 506bf215546Sopenharmony_ci is_color_blit ? 1u : 0u, 507bf215546Sopenharmony_ci surfaces, 508bf215546Sopenharmony_ci is_color_blit ? NULL : dst_surf, 509bf215546Sopenharmony_ci src_surf); 510bf215546Sopenharmony_ci job->msaa = msaa; 511bf215546Sopenharmony_ci job->double_buffer = double_buffer; 512bf215546Sopenharmony_ci job->tile_width = tile_width; 513bf215546Sopenharmony_ci job->tile_height = tile_height; 514bf215546Sopenharmony_ci job->internal_bpp = max_bpp; 515bf215546Sopenharmony_ci job->draw_min_x = info->dst.box.x; 516bf215546Sopenharmony_ci job->draw_min_y = info->dst.box.y; 517bf215546Sopenharmony_ci job->draw_max_x = info->dst.box.x + info->dst.box.width; 518bf215546Sopenharmony_ci job->draw_max_y = info->dst.box.y + info->dst.box.height; 519bf215546Sopenharmony_ci job->scissor.disabled = false; 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci /* The simulator complains if we do a TLB load from a source with a 522bf215546Sopenharmony_ci * stride that is smaller than the destination's, so we program the 523bf215546Sopenharmony_ci * 'frame region' to match the smallest dimensions of the two surfaces. 524bf215546Sopenharmony_ci * This should be fine because we only get here if the src and dst boxes 525bf215546Sopenharmony_ci * match, so we know the blit involves the same tiles on both surfaces. 526bf215546Sopenharmony_ci */ 527bf215546Sopenharmony_ci job->draw_width = MIN2(dst_surf->width, src_surf->width); 528bf215546Sopenharmony_ci job->draw_height = MIN2(dst_surf->height, src_surf->height); 529bf215546Sopenharmony_ci job->draw_tiles_x = DIV_ROUND_UP(job->draw_width, 530bf215546Sopenharmony_ci job->tile_width); 531bf215546Sopenharmony_ci job->draw_tiles_y = DIV_ROUND_UP(job->draw_height, 532bf215546Sopenharmony_ci job->tile_height); 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci job->needs_flush = true; 535bf215546Sopenharmony_ci job->num_layers = info->dst.box.depth; 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci job->store = 0; 538bf215546Sopenharmony_ci if (is_color_blit) { 539bf215546Sopenharmony_ci job->store |= PIPE_CLEAR_COLOR0; 540bf215546Sopenharmony_ci info->mask &= ~PIPE_MASK_RGBA; 541bf215546Sopenharmony_ci } 542bf215546Sopenharmony_ci if (is_depth_blit) { 543bf215546Sopenharmony_ci job->store |= PIPE_CLEAR_DEPTH; 544bf215546Sopenharmony_ci info->mask &= ~PIPE_MASK_Z; 545bf215546Sopenharmony_ci } 546bf215546Sopenharmony_ci if (is_stencil_blit){ 547bf215546Sopenharmony_ci job->store |= PIPE_CLEAR_STENCIL; 548bf215546Sopenharmony_ci info->mask &= ~PIPE_MASK_S; 549bf215546Sopenharmony_ci } 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci v3d41_start_binning(v3d, job); 552bf215546Sopenharmony_ci 553bf215546Sopenharmony_ci v3d_job_submit(v3d, job); 554bf215546Sopenharmony_ci 555bf215546Sopenharmony_ci pipe_surface_reference(&dst_surf, NULL); 556bf215546Sopenharmony_ci pipe_surface_reference(&src_surf, NULL); 557bf215546Sopenharmony_ci} 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_ci/** 560bf215546Sopenharmony_ci * Creates the VS of the custom blit shader to convert YUV plane from 561bf215546Sopenharmony_ci * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 562bf215546Sopenharmony_ci * This vertex shader is mostly a pass-through VS. 563bf215546Sopenharmony_ci */ 564bf215546Sopenharmony_cistatic void * 565bf215546Sopenharmony_civ3d_get_sand8_vs(struct pipe_context *pctx) 566bf215546Sopenharmony_ci{ 567bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(pctx); 568bf215546Sopenharmony_ci struct pipe_screen *pscreen = pctx->screen; 569bf215546Sopenharmony_ci 570bf215546Sopenharmony_ci if (v3d->sand8_blit_vs) 571bf215546Sopenharmony_ci return v3d->sand8_blit_vs; 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options = 574bf215546Sopenharmony_ci pscreen->get_compiler_options(pscreen, 575bf215546Sopenharmony_ci PIPE_SHADER_IR_NIR, 576bf215546Sopenharmony_ci PIPE_SHADER_VERTEX); 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_ci nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, 579bf215546Sopenharmony_ci options, 580bf215546Sopenharmony_ci "sand8_blit_vs"); 581bf215546Sopenharmony_ci 582bf215546Sopenharmony_ci const struct glsl_type *vec4 = glsl_vec4_type(); 583bf215546Sopenharmony_ci nir_variable *pos_in = nir_variable_create(b.shader, 584bf215546Sopenharmony_ci nir_var_shader_in, 585bf215546Sopenharmony_ci vec4, "pos"); 586bf215546Sopenharmony_ci 587bf215546Sopenharmony_ci nir_variable *pos_out = nir_variable_create(b.shader, 588bf215546Sopenharmony_ci nir_var_shader_out, 589bf215546Sopenharmony_ci vec4, "gl_Position"); 590bf215546Sopenharmony_ci pos_out->data.location = VARYING_SLOT_POS; 591bf215546Sopenharmony_ci nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf); 592bf215546Sopenharmony_ci 593bf215546Sopenharmony_ci struct pipe_shader_state shader_tmpl = { 594bf215546Sopenharmony_ci .type = PIPE_SHADER_IR_NIR, 595bf215546Sopenharmony_ci .ir.nir = b.shader, 596bf215546Sopenharmony_ci }; 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_ci v3d->sand8_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl); 599bf215546Sopenharmony_ci 600bf215546Sopenharmony_ci return v3d->sand8_blit_vs; 601bf215546Sopenharmony_ci} 602bf215546Sopenharmony_ci/** 603bf215546Sopenharmony_ci * Creates the FS of the custom blit shader to convert YUV plane from 604bf215546Sopenharmony_ci * the NV12 format with BROADCOM_SAND_COL128 modifier to UIF tiled format. 605bf215546Sopenharmony_ci * The result texture is equivalent to a chroma (cpp=2) or luma (cpp=1) 606bf215546Sopenharmony_ci * plane for a NV12 format without the SAND modifier. 607bf215546Sopenharmony_ci */ 608bf215546Sopenharmony_cistatic void * 609bf215546Sopenharmony_civ3d_get_sand8_fs(struct pipe_context *pctx, int cpp) 610bf215546Sopenharmony_ci{ 611bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(pctx); 612bf215546Sopenharmony_ci struct pipe_screen *pscreen = pctx->screen; 613bf215546Sopenharmony_ci struct pipe_shader_state **cached_shader; 614bf215546Sopenharmony_ci const char *name; 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci if (cpp == 1) { 617bf215546Sopenharmony_ci cached_shader = &v3d->sand8_blit_fs_luma; 618bf215546Sopenharmony_ci name = "sand8_blit_fs_luma"; 619bf215546Sopenharmony_ci } else { 620bf215546Sopenharmony_ci cached_shader = &v3d->sand8_blit_fs_chroma; 621bf215546Sopenharmony_ci name = "sand8_blit_fs_chroma"; 622bf215546Sopenharmony_ci } 623bf215546Sopenharmony_ci 624bf215546Sopenharmony_ci if (*cached_shader) 625bf215546Sopenharmony_ci return *cached_shader; 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci const struct nir_shader_compiler_options *options = 628bf215546Sopenharmony_ci pscreen->get_compiler_options(pscreen, 629bf215546Sopenharmony_ci PIPE_SHADER_IR_NIR, 630bf215546Sopenharmony_ci PIPE_SHADER_FRAGMENT); 631bf215546Sopenharmony_ci 632bf215546Sopenharmony_ci nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, 633bf215546Sopenharmony_ci options, "%s", name); 634bf215546Sopenharmony_ci const struct glsl_type *vec4 = glsl_vec4_type(); 635bf215546Sopenharmony_ci 636bf215546Sopenharmony_ci const struct glsl_type *glsl_int = glsl_int_type(); 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci nir_variable *color_out = 639bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_out, 640bf215546Sopenharmony_ci vec4, "f_color"); 641bf215546Sopenharmony_ci color_out->data.location = FRAG_RESULT_COLOR; 642bf215546Sopenharmony_ci 643bf215546Sopenharmony_ci nir_variable *pos_in = 644bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_shader_in, vec4, "pos"); 645bf215546Sopenharmony_ci pos_in->data.location = VARYING_SLOT_POS; 646bf215546Sopenharmony_ci nir_ssa_def *pos = nir_load_var(&b, pos_in); 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci nir_ssa_def *zero = nir_imm_int(&b, 0); 649bf215546Sopenharmony_ci nir_ssa_def *one = nir_imm_int(&b, 1); 650bf215546Sopenharmony_ci nir_ssa_def *two = nir_imm_int(&b, 2); 651bf215546Sopenharmony_ci nir_ssa_def *six = nir_imm_int(&b, 6); 652bf215546Sopenharmony_ci nir_ssa_def *seven = nir_imm_int(&b, 7); 653bf215546Sopenharmony_ci nir_ssa_def *eight = nir_imm_int(&b, 8); 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0)); 656bf215546Sopenharmony_ci nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1)); 657bf215546Sopenharmony_ci 658bf215546Sopenharmony_ci nir_variable *stride_in = 659bf215546Sopenharmony_ci nir_variable_create(b.shader, nir_var_uniform, glsl_int, 660bf215546Sopenharmony_ci "sand8_stride"); 661bf215546Sopenharmony_ci nir_ssa_def *stride = 662bf215546Sopenharmony_ci nir_load_uniform(&b, 1, 32, zero, 663bf215546Sopenharmony_ci .base = stride_in->data.driver_location, 664bf215546Sopenharmony_ci .range = 1, 665bf215546Sopenharmony_ci .dest_type = nir_type_int32); 666bf215546Sopenharmony_ci 667bf215546Sopenharmony_ci nir_ssa_def *x_offset; 668bf215546Sopenharmony_ci nir_ssa_def *y_offset; 669bf215546Sopenharmony_ci 670bf215546Sopenharmony_ci /* UIF tiled format is composed by UIF blocks, Each block has 671bf215546Sopenharmony_ci * four 64 byte microtiles. Inside each microtile pixels are stored 672bf215546Sopenharmony_ci * in raster format. But microtiles have different dimensions 673bf215546Sopenharmony_ci * based in the bits per pixel of the image. 674bf215546Sopenharmony_ci * 675bf215546Sopenharmony_ci * 8bpp microtile dimensions are 8x8 676bf215546Sopenharmony_ci * 16bpp microtile dimensions are 8x4 677bf215546Sopenharmony_ci * 32bpp microtile dimensions are 4x4 678bf215546Sopenharmony_ci * 679bf215546Sopenharmony_ci * As we are reading and writing with 32bpp to optimize 680bf215546Sopenharmony_ci * the number of texture operations during the blit. We need 681bf215546Sopenharmony_ci * to adjust the offsets were we read and write as data will 682bf215546Sopenharmony_ci * be later read using 8bpp (luma) and 16bpp (chroma). 683bf215546Sopenharmony_ci * 684bf215546Sopenharmony_ci * For chroma 8x4 16bpp raster order is compatible with 4x4 685bf215546Sopenharmony_ci * 32bpp. In both layouts each line has 8*2 == 4*4 == 16 bytes. 686bf215546Sopenharmony_ci * But luma 8x8 8bpp raster order is not compatible 687bf215546Sopenharmony_ci * with 4x4 32bpp. 8bpp has 8 bytes per line, and 32bpp has 688bf215546Sopenharmony_ci * 16 bytes per line. So if we read a 8bpp texture that was 689bf215546Sopenharmony_ci * written as 32bpp texture. Bytes would be misplaced. 690bf215546Sopenharmony_ci * 691bf215546Sopenharmony_ci * inter/intra_utile_x_offests takes care of mapping the offsets 692bf215546Sopenharmony_ci * between microtiles to deal with this issue for luma planes. 693bf215546Sopenharmony_ci */ 694bf215546Sopenharmony_ci if (cpp == 1) { 695bf215546Sopenharmony_ci nir_ssa_def *intra_utile_x_offset = 696bf215546Sopenharmony_ci nir_ishl(&b, nir_iand_imm(&b, x, 1), two); 697bf215546Sopenharmony_ci nir_ssa_def *inter_utile_x_offset = 698bf215546Sopenharmony_ci nir_ishl(&b, nir_iand_imm(&b, x, 60), one); 699bf215546Sopenharmony_ci nir_ssa_def *stripe_offset= 700bf215546Sopenharmony_ci nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 6), 701bf215546Sopenharmony_ci stride), 702bf215546Sopenharmony_ci seven); 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci x_offset = nir_iadd(&b, stripe_offset, 705bf215546Sopenharmony_ci nir_iadd(&b, intra_utile_x_offset, 706bf215546Sopenharmony_ci inter_utile_x_offset)); 707bf215546Sopenharmony_ci y_offset = nir_iadd(&b, 708bf215546Sopenharmony_ci nir_ishl(&b, nir_iand_imm(&b, x, 2), six), 709bf215546Sopenharmony_ci nir_ishl(&b, y, eight)); 710bf215546Sopenharmony_ci } else { 711bf215546Sopenharmony_ci nir_ssa_def *stripe_offset= 712bf215546Sopenharmony_ci nir_ishl(&b,nir_imul(&b,nir_ishr_imm(&b, x, 5), 713bf215546Sopenharmony_ci stride), 714bf215546Sopenharmony_ci seven); 715bf215546Sopenharmony_ci x_offset = nir_iadd(&b, stripe_offset, 716bf215546Sopenharmony_ci nir_ishl(&b, nir_iand_imm(&b, x, 31), two)); 717bf215546Sopenharmony_ci y_offset = nir_ishl(&b, y, seven); 718bf215546Sopenharmony_ci } 719bf215546Sopenharmony_ci nir_ssa_def *ubo_offset = nir_iadd(&b, x_offset, y_offset); 720bf215546Sopenharmony_ci nir_ssa_def *load = 721bf215546Sopenharmony_ci nir_load_ubo(&b, 1, 32, one, ubo_offset, 722bf215546Sopenharmony_ci .align_mul = 4, 723bf215546Sopenharmony_ci .align_offset = 0, 724bf215546Sopenharmony_ci .range_base = 0, 725bf215546Sopenharmony_ci .range = ~0); 726bf215546Sopenharmony_ci 727bf215546Sopenharmony_ci nir_ssa_def *output = nir_unpack_unorm_4x8(&b, load); 728bf215546Sopenharmony_ci 729bf215546Sopenharmony_ci nir_store_var(&b, color_out, 730bf215546Sopenharmony_ci output, 731bf215546Sopenharmony_ci 0xF); 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci struct pipe_shader_state shader_tmpl = { 734bf215546Sopenharmony_ci .type = PIPE_SHADER_IR_NIR, 735bf215546Sopenharmony_ci .ir.nir = b.shader, 736bf215546Sopenharmony_ci }; 737bf215546Sopenharmony_ci 738bf215546Sopenharmony_ci *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl); 739bf215546Sopenharmony_ci 740bf215546Sopenharmony_ci return *cached_shader; 741bf215546Sopenharmony_ci} 742bf215546Sopenharmony_ci 743bf215546Sopenharmony_ci/** 744bf215546Sopenharmony_ci * Turns NV12 with SAND8 format modifier from raster-order with interleaved 745bf215546Sopenharmony_ci * luma and chroma 128-byte-wide-columns to tiled format for luma and chroma. 746bf215546Sopenharmony_ci * 747bf215546Sopenharmony_ci * This implementation is based on vc4_yuv_blit. 748bf215546Sopenharmony_ci */ 749bf215546Sopenharmony_cistatic void 750bf215546Sopenharmony_civ3d_sand8_blit(struct pipe_context *pctx, struct pipe_blit_info *info) 751bf215546Sopenharmony_ci{ 752bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(pctx); 753bf215546Sopenharmony_ci struct v3d_resource *src = v3d_resource(info->src.resource); 754bf215546Sopenharmony_ci ASSERTED struct v3d_resource *dst = v3d_resource(info->dst.resource); 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci if (!src->sand_col128_stride) 757bf215546Sopenharmony_ci return; 758bf215546Sopenharmony_ci if (src->tiled) 759bf215546Sopenharmony_ci return; 760bf215546Sopenharmony_ci if (src->base.format != PIPE_FORMAT_R8_UNORM && 761bf215546Sopenharmony_ci src->base.format != PIPE_FORMAT_R8G8_UNORM) 762bf215546Sopenharmony_ci return; 763bf215546Sopenharmony_ci if (!(info->mask & PIPE_MASK_RGBA)) 764bf215546Sopenharmony_ci return; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci assert(dst->base.format == src->base.format); 767bf215546Sopenharmony_ci assert(dst->tiled); 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci assert(info->src.box.x == 0 && info->dst.box.x == 0); 770bf215546Sopenharmony_ci assert(info->src.box.y == 0 && info->dst.box.y == 0); 771bf215546Sopenharmony_ci assert(info->src.box.width == info->dst.box.width); 772bf215546Sopenharmony_ci assert(info->src.box.height == info->dst.box.height); 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_ci v3d_blitter_save(v3d, true); 775bf215546Sopenharmony_ci 776bf215546Sopenharmony_ci struct pipe_surface dst_tmpl; 777bf215546Sopenharmony_ci util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource, 778bf215546Sopenharmony_ci info->dst.level, info->dst.box.z); 779bf215546Sopenharmony_ci /* Although the src textures are cpp=1 or cpp=2, the dst texture 780bf215546Sopenharmony_ci * uses a cpp=4 dst texture. So, all read/write texture ops will 781bf215546Sopenharmony_ci * be done using 32-bit read and writes. 782bf215546Sopenharmony_ci */ 783bf215546Sopenharmony_ci dst_tmpl.format = PIPE_FORMAT_R8G8B8A8_UNORM; 784bf215546Sopenharmony_ci struct pipe_surface *dst_surf = 785bf215546Sopenharmony_ci pctx->create_surface(pctx, info->dst.resource, &dst_tmpl); 786bf215546Sopenharmony_ci if (!dst_surf) { 787bf215546Sopenharmony_ci fprintf(stderr, "Failed to create YUV dst surface\n"); 788bf215546Sopenharmony_ci util_blitter_unset_running_flag(v3d->blitter); 789bf215546Sopenharmony_ci return; 790bf215546Sopenharmony_ci } 791bf215546Sopenharmony_ci 792bf215546Sopenharmony_ci uint32_t sand8_stride = src->sand_col128_stride; 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci /* Adjust the dimensions of dst luma/chroma to match src 795bf215546Sopenharmony_ci * size now we are using a cpp=4 format. Next dimension take into 796bf215546Sopenharmony_ci * account the UIF microtile layouts. 797bf215546Sopenharmony_ci */ 798bf215546Sopenharmony_ci dst_surf->width = align(dst_surf->width, 8) / 2; 799bf215546Sopenharmony_ci if (src->cpp == 1) 800bf215546Sopenharmony_ci dst_surf->height /= 2; 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci /* Set the constant buffer. */ 803bf215546Sopenharmony_ci struct pipe_constant_buffer cb_uniforms = { 804bf215546Sopenharmony_ci .user_buffer = &sand8_stride, 805bf215546Sopenharmony_ci .buffer_size = sizeof(sand8_stride), 806bf215546Sopenharmony_ci }; 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, 809bf215546Sopenharmony_ci &cb_uniforms); 810bf215546Sopenharmony_ci struct pipe_constant_buffer cb_src = { 811bf215546Sopenharmony_ci .buffer = info->src.resource, 812bf215546Sopenharmony_ci .buffer_offset = src->slices[info->src.level].offset, 813bf215546Sopenharmony_ci .buffer_size = (src->bo->size - 814bf215546Sopenharmony_ci src->slices[info->src.level].offset), 815bf215546Sopenharmony_ci }; 816bf215546Sopenharmony_ci pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 2, false, 817bf215546Sopenharmony_ci &cb_src); 818bf215546Sopenharmony_ci /* Unbind the textures, to make sure we don't try to recurse into the 819bf215546Sopenharmony_ci * shadow blit. 820bf215546Sopenharmony_ci */ 821bf215546Sopenharmony_ci pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL); 822bf215546Sopenharmony_ci pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL); 823bf215546Sopenharmony_ci 824bf215546Sopenharmony_ci util_blitter_custom_shader(v3d->blitter, dst_surf, 825bf215546Sopenharmony_ci v3d_get_sand8_vs(pctx), 826bf215546Sopenharmony_ci v3d_get_sand8_fs(pctx, src->cpp)); 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci util_blitter_restore_textures(v3d->blitter); 829bf215546Sopenharmony_ci util_blitter_restore_constant_buffer_state(v3d->blitter); 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci /* Restore cb1 (util_blitter doesn't handle this one). */ 832bf215546Sopenharmony_ci struct pipe_constant_buffer cb_disabled = { 0 }; 833bf215546Sopenharmony_ci pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, 834bf215546Sopenharmony_ci &cb_disabled); 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_ci pipe_surface_reference(&dst_surf, NULL); 837bf215546Sopenharmony_ci 838bf215546Sopenharmony_ci info->mask &= ~PIPE_MASK_RGBA; 839bf215546Sopenharmony_ci return; 840bf215546Sopenharmony_ci} 841bf215546Sopenharmony_ci 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci/* Optimal hardware path for blitting pixels. 844bf215546Sopenharmony_ci * Scaling, format conversion, up- and downsampling (resolve) are allowed. 845bf215546Sopenharmony_ci */ 846bf215546Sopenharmony_civoid 847bf215546Sopenharmony_civ3d_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info) 848bf215546Sopenharmony_ci{ 849bf215546Sopenharmony_ci struct v3d_context *v3d = v3d_context(pctx); 850bf215546Sopenharmony_ci struct pipe_blit_info info = *blit_info; 851bf215546Sopenharmony_ci 852bf215546Sopenharmony_ci v3d_sand8_blit(pctx, &info); 853bf215546Sopenharmony_ci 854bf215546Sopenharmony_ci v3d_tfu_blit(pctx, &info); 855bf215546Sopenharmony_ci 856bf215546Sopenharmony_ci v3d_tlb_blit(pctx, &info); 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci v3d_stencil_blit(pctx, &info); 859bf215546Sopenharmony_ci 860bf215546Sopenharmony_ci v3d_render_blit(pctx, &info); 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci /* Flush our blit jobs immediately. They're unlikely to get reused by 863bf215546Sopenharmony_ci * normal drawing or other blits, and without flushing we can easily 864bf215546Sopenharmony_ci * run into unexpected OOMs when blits are used for a large series of 865bf215546Sopenharmony_ci * texture uploads before using the textures. 866bf215546Sopenharmony_ci */ 867bf215546Sopenharmony_ci v3d_flush_jobs_writing_resource(v3d, info.dst.resource, 868bf215546Sopenharmony_ci V3D_FLUSH_DEFAULT, false); 869bf215546Sopenharmony_ci} 870