1/*
2 * Copyright © 2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "util/format/u_format.h"
25#include "util/u_surface.h"
26#include "util/u_blitter.h"
27#include "compiler/nir/nir_builder.h"
28#include "vc4_context.h"
29
30static struct pipe_surface *
31vc4_get_blit_surface(struct pipe_context *pctx,
32                     struct pipe_resource *prsc, unsigned level)
33{
34        struct pipe_surface tmpl;
35
36        memset(&tmpl, 0, sizeof(tmpl));
37        tmpl.format = prsc->format;
38        tmpl.u.tex.level = level;
39        tmpl.u.tex.first_layer = 0;
40        tmpl.u.tex.last_layer = 0;
41
42        return pctx->create_surface(pctx, prsc, &tmpl);
43}
44
45static bool
46is_tile_unaligned(unsigned size, unsigned tile_size)
47{
48        return size & (tile_size - 1);
49}
50
51static bool
52vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
53{
54        struct vc4_context *vc4 = vc4_context(pctx);
55        bool msaa = (info->src.resource->nr_samples > 1 ||
56                     info->dst.resource->nr_samples > 1);
57        int tile_width = msaa ? 32 : 64;
58        int tile_height = msaa ? 32 : 64;
59
60        if (util_format_is_depth_or_stencil(info->dst.resource->format))
61                return false;
62
63        if (info->scissor_enable)
64                return false;
65
66        if ((info->mask & PIPE_MASK_RGBA) == 0)
67                return false;
68
69        if (info->dst.box.x != info->src.box.x ||
70            info->dst.box.y != info->src.box.y ||
71            info->dst.box.width != info->src.box.width ||
72            info->dst.box.height != info->src.box.height) {
73                return false;
74        }
75
76        int dst_surface_width = u_minify(info->dst.resource->width0,
77                                         info->dst.level);
78        int dst_surface_height = u_minify(info->dst.resource->height0,
79                                         info->dst.level);
80        if (is_tile_unaligned(info->dst.box.x, tile_width) ||
81            is_tile_unaligned(info->dst.box.y, tile_height) ||
82            (is_tile_unaligned(info->dst.box.width, tile_width) &&
83             info->dst.box.x + info->dst.box.width != dst_surface_width) ||
84            (is_tile_unaligned(info->dst.box.height, tile_height) &&
85             info->dst.box.y + info->dst.box.height != dst_surface_height)) {
86                return false;
87        }
88
89        /* VC4_PACKET_LOAD_TILE_BUFFER_GENERAL uses the
90         * VC4_PACKET_TILE_RENDERING_MODE_CONFIG's width (determined by our
91         * destination surface) to determine the stride.  This may be wrong
92         * when reading from texture miplevels > 0, which are stored in
93         * POT-sized areas.  For MSAA, the tile addresses are computed
94         * explicitly by the RCL, but still use the destination width to
95         * determine the stride (which could be fixed by explicitly supplying
96         * it in the ABI).
97         */
98        struct vc4_resource *rsc = vc4_resource(info->src.resource);
99
100        uint32_t stride;
101
102        if (info->src.resource->nr_samples > 1)
103                stride = align(dst_surface_width, 32) * 4 * rsc->cpp;
104        else if (rsc->slices[info->src.level].tiling == VC4_TILING_FORMAT_T)
105                stride = align(dst_surface_width * rsc->cpp, 128);
106        else
107                stride = align(dst_surface_width * rsc->cpp, 16);
108
109        if (stride != rsc->slices[info->src.level].stride)
110                return false;
111
112        if (info->dst.resource->format != info->src.resource->format)
113                return false;
114
115        if (false) {
116                fprintf(stderr, "RCL blit from %d,%d to %d,%d (%d,%d)\n",
117                        info->src.box.x,
118                        info->src.box.y,
119                        info->dst.box.x,
120                        info->dst.box.y,
121                        info->dst.box.width,
122                        info->dst.box.height);
123        }
124
125        struct pipe_surface *dst_surf =
126                vc4_get_blit_surface(pctx, info->dst.resource, info->dst.level);
127        struct pipe_surface *src_surf =
128                vc4_get_blit_surface(pctx, info->src.resource, info->src.level);
129
130        vc4_flush_jobs_reading_resource(vc4, info->src.resource);
131
132        struct vc4_job *job = vc4_get_job(vc4, dst_surf, NULL);
133        pipe_surface_reference(&job->color_read, src_surf);
134
135        job->draw_min_x = info->dst.box.x;
136        job->draw_min_y = info->dst.box.y;
137        job->draw_max_x = info->dst.box.x + info->dst.box.width;
138        job->draw_max_y = info->dst.box.y + info->dst.box.height;
139        job->draw_width = dst_surf->width;
140        job->draw_height = dst_surf->height;
141
142        job->tile_width = tile_width;
143        job->tile_height = tile_height;
144        job->msaa = msaa;
145        job->needs_flush = true;
146        job->resolve |= PIPE_CLEAR_COLOR;
147
148        vc4_job_submit(vc4, job);
149
150        pipe_surface_reference(&dst_surf, NULL);
151        pipe_surface_reference(&src_surf, NULL);
152
153        return true;
154}
155
156void
157vc4_blitter_save(struct vc4_context *vc4)
158{
159        util_blitter_save_fragment_constant_buffer_slot(vc4->blitter,
160                                                        vc4->constbuf[PIPE_SHADER_FRAGMENT].cb);
161        util_blitter_save_vertex_buffer_slot(vc4->blitter, vc4->vertexbuf.vb);
162        util_blitter_save_vertex_elements(vc4->blitter, vc4->vtx);
163        util_blitter_save_vertex_shader(vc4->blitter, vc4->prog.bind_vs);
164        util_blitter_save_rasterizer(vc4->blitter, vc4->rasterizer);
165        util_blitter_save_viewport(vc4->blitter, &vc4->viewport);
166        util_blitter_save_scissor(vc4->blitter, &vc4->scissor);
167        util_blitter_save_fragment_shader(vc4->blitter, vc4->prog.bind_fs);
168        util_blitter_save_blend(vc4->blitter, vc4->blend);
169        util_blitter_save_depth_stencil_alpha(vc4->blitter, vc4->zsa);
170        util_blitter_save_stencil_ref(vc4->blitter, &vc4->stencil_ref);
171        util_blitter_save_sample_mask(vc4->blitter, vc4->sample_mask, 0);
172        util_blitter_save_framebuffer(vc4->blitter, &vc4->framebuffer);
173        util_blitter_save_fragment_sampler_states(vc4->blitter,
174                        vc4->fragtex.num_samplers,
175                        (void **)vc4->fragtex.samplers);
176        util_blitter_save_fragment_sampler_views(vc4->blitter,
177                        vc4->fragtex.num_textures, vc4->fragtex.textures);
178}
179
180static void *vc4_get_yuv_vs(struct pipe_context *pctx)
181{
182   struct vc4_context *vc4 = vc4_context(pctx);
183   struct pipe_screen *pscreen = pctx->screen;
184
185   if (vc4->yuv_linear_blit_vs)
186           return vc4->yuv_linear_blit_vs;
187
188   const struct nir_shader_compiler_options *options =
189           pscreen->get_compiler_options(pscreen,
190                                         PIPE_SHADER_IR_NIR,
191                                         PIPE_SHADER_VERTEX);
192
193   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
194                                                  "linear_blit_vs");
195
196   const struct glsl_type *vec4 = glsl_vec4_type();
197   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
198                                              vec4, "pos");
199
200   nir_variable *pos_out = nir_variable_create(b.shader, nir_var_shader_out,
201                                               vec4, "gl_Position");
202   pos_out->data.location = VARYING_SLOT_POS;
203
204   nir_store_var(&b, pos_out, nir_load_var(&b, pos_in), 0xf);
205
206   struct pipe_shader_state shader_tmpl = {
207           .type = PIPE_SHADER_IR_NIR,
208           .ir.nir = b.shader,
209   };
210
211   vc4->yuv_linear_blit_vs = pctx->create_vs_state(pctx, &shader_tmpl);
212
213   return vc4->yuv_linear_blit_vs;
214}
215
216static void *vc4_get_yuv_fs(struct pipe_context *pctx, int cpp)
217{
218   struct vc4_context *vc4 = vc4_context(pctx);
219   struct pipe_screen *pscreen = pctx->screen;
220   struct pipe_shader_state **cached_shader;
221   const char *name;
222
223   if (cpp == 1) {
224           cached_shader = &vc4->yuv_linear_blit_fs_8bit;
225           name = "linear_blit_8bit_fs";
226   } else {
227           cached_shader = &vc4->yuv_linear_blit_fs_16bit;
228           name = "linear_blit_16bit_fs";
229   }
230
231   if (*cached_shader)
232           return *cached_shader;
233
234   const struct nir_shader_compiler_options *options =
235           pscreen->get_compiler_options(pscreen,
236                                         PIPE_SHADER_IR_NIR,
237                                         PIPE_SHADER_FRAGMENT);
238
239   nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,
240                                                  options, "%s", name);
241
242   const struct glsl_type *vec4 = glsl_vec4_type();
243   const struct glsl_type *glsl_int = glsl_int_type();
244
245   nir_variable *color_out = nir_variable_create(b.shader, nir_var_shader_out,
246                                                 vec4, "f_color");
247   color_out->data.location = FRAG_RESULT_COLOR;
248
249   nir_variable *pos_in = nir_variable_create(b.shader, nir_var_shader_in,
250                                              vec4, "pos");
251   pos_in->data.location = VARYING_SLOT_POS;
252   nir_ssa_def *pos = nir_load_var(&b, pos_in);
253
254   nir_ssa_def *one = nir_imm_int(&b, 1);
255   nir_ssa_def *two = nir_imm_int(&b, 2);
256
257   nir_ssa_def *x = nir_f2i32(&b, nir_channel(&b, pos, 0));
258   nir_ssa_def *y = nir_f2i32(&b, nir_channel(&b, pos, 1));
259
260   nir_variable *stride_in = nir_variable_create(b.shader, nir_var_uniform,
261                                                 glsl_int, "stride");
262   nir_ssa_def *stride = nir_load_var(&b, stride_in);
263
264   nir_ssa_def *x_offset;
265   nir_ssa_def *y_offset;
266   if (cpp == 1) {
267           nir_ssa_def *intra_utile_x_offset =
268                   nir_ishl(&b, nir_iand(&b, x, one), two);
269           nir_ssa_def *inter_utile_x_offset =
270                   nir_ishl(&b, nir_iand(&b, x, nir_imm_int(&b, ~3)), one);
271
272           x_offset = nir_iadd(&b,
273                               intra_utile_x_offset,
274                               inter_utile_x_offset);
275           y_offset = nir_imul(&b,
276                               nir_iadd(&b,
277                                        nir_ishl(&b, y, one),
278                                        nir_ushr(&b, nir_iand(&b, x, two), one)),
279                               stride);
280   } else {
281           x_offset = nir_ishl(&b, x, two);
282           y_offset = nir_imul(&b, y, stride);
283   }
284
285   nir_ssa_def *load =
286      nir_load_ubo(&b, 1, 32, one, nir_iadd(&b, x_offset, y_offset),
287                   .align_mul = 4,
288                   .align_offset = 0,
289                   .range_base = 0,
290                   .range = ~0);
291
292   nir_store_var(&b, color_out,
293                 nir_unpack_unorm_4x8(&b, load),
294                 0xf);
295
296   struct pipe_shader_state shader_tmpl = {
297           .type = PIPE_SHADER_IR_NIR,
298           .ir.nir = b.shader,
299   };
300
301   *cached_shader = pctx->create_fs_state(pctx, &shader_tmpl);
302
303   return *cached_shader;
304}
305
306static bool
307vc4_yuv_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
308{
309        struct vc4_context *vc4 = vc4_context(pctx);
310        struct vc4_resource *src = vc4_resource(info->src.resource);
311        struct vc4_resource *dst = vc4_resource(info->dst.resource);
312        bool ok;
313
314        if (src->tiled)
315                return false;
316        if (src->base.format != PIPE_FORMAT_R8_UNORM &&
317            src->base.format != PIPE_FORMAT_R8G8_UNORM)
318                return false;
319
320        /* YUV blits always turn raster-order to tiled */
321        assert(dst->base.format == src->base.format);
322        assert(dst->tiled);
323
324        /* Always 1:1 and at the origin */
325        assert(info->src.box.x == 0 && info->dst.box.x == 0);
326        assert(info->src.box.y == 0 && info->dst.box.y == 0);
327        assert(info->src.box.width == info->dst.box.width);
328        assert(info->src.box.height == info->dst.box.height);
329
330        if ((src->slices[info->src.level].offset & 3) ||
331            (src->slices[info->src.level].stride & 3)) {
332                perf_debug("YUV-blit src texture offset/stride misaligned: 0x%08x/%d\n",
333                           src->slices[info->src.level].offset,
334                           src->slices[info->src.level].stride);
335                goto fallback;
336        }
337
338        vc4_blitter_save(vc4);
339
340        /* Create a renderable surface mapping the T-tiled shadow buffer.
341         */
342        struct pipe_surface dst_tmpl;
343        util_blitter_default_dst_texture(&dst_tmpl, info->dst.resource,
344                                         info->dst.level, info->dst.box.z);
345        dst_tmpl.format = PIPE_FORMAT_RGBA8888_UNORM;
346        struct pipe_surface *dst_surf =
347                pctx->create_surface(pctx, info->dst.resource, &dst_tmpl);
348        if (!dst_surf) {
349                fprintf(stderr, "Failed to create YUV dst surface\n");
350                util_blitter_unset_running_flag(vc4->blitter);
351                return false;
352        }
353        dst_surf->width = align(dst_surf->width, 8) / 2;
354        if (dst->cpp == 1)
355                dst_surf->height /= 2;
356
357        /* Set the constant buffer. */
358        uint32_t stride = src->slices[info->src.level].stride;
359        struct pipe_constant_buffer cb_uniforms = {
360                .user_buffer = &stride,
361                .buffer_size = sizeof(stride),
362        };
363        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 0, false, &cb_uniforms);
364        struct pipe_constant_buffer cb_src = {
365                .buffer = info->src.resource,
366                .buffer_offset = src->slices[info->src.level].offset,
367                .buffer_size = (src->bo->size -
368                                src->slices[info->src.level].offset),
369        };
370        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, &cb_src);
371
372        /* Unbind the textures, to make sure we don't try to recurse into the
373         * shadow blit.
374         */
375        pctx->set_sampler_views(pctx, PIPE_SHADER_FRAGMENT, 0, 0, 0, false, NULL);
376        pctx->bind_sampler_states(pctx, PIPE_SHADER_FRAGMENT, 0, 0, NULL);
377
378        util_blitter_custom_shader(vc4->blitter, dst_surf,
379                                   vc4_get_yuv_vs(pctx),
380                                   vc4_get_yuv_fs(pctx, src->cpp));
381
382        util_blitter_restore_textures(vc4->blitter);
383        util_blitter_restore_constant_buffer_state(vc4->blitter);
384        /* Restore cb1 (util_blitter doesn't handle this one). */
385        struct pipe_constant_buffer cb_disabled = { 0 };
386        pctx->set_constant_buffer(pctx, PIPE_SHADER_FRAGMENT, 1, false, &cb_disabled);
387
388        pipe_surface_reference(&dst_surf, NULL);
389
390        return true;
391
392fallback:
393        /* Do an immediate SW fallback, since the render blit path
394         * would just recurse.
395         */
396        ok = util_try_blit_via_copy_region(pctx, info, false);
397        assert(ok); (void)ok;
398
399        return true;
400}
401
402static bool
403vc4_render_blit(struct pipe_context *ctx, struct pipe_blit_info *info)
404{
405        struct vc4_context *vc4 = vc4_context(ctx);
406
407        if (!util_blitter_is_blit_supported(vc4->blitter, info)) {
408                fprintf(stderr, "blit unsupported %s -> %s\n",
409                    util_format_short_name(info->src.resource->format),
410                    util_format_short_name(info->dst.resource->format));
411                return false;
412        }
413
414        /* Enable the scissor, so we get a minimal set of tiles rendered. */
415        if (!info->scissor_enable) {
416                info->scissor_enable = true;
417                info->scissor.minx = info->dst.box.x;
418                info->scissor.miny = info->dst.box.y;
419                info->scissor.maxx = info->dst.box.x + info->dst.box.width;
420                info->scissor.maxy = info->dst.box.y + info->dst.box.height;
421        }
422
423        vc4_blitter_save(vc4);
424        util_blitter_blit(vc4->blitter, info);
425
426        return true;
427}
428
429/* Optimal hardware path for blitting pixels.
430 * Scaling, format conversion, up- and downsampling (resolve) are allowed.
431 */
432void
433vc4_blit(struct pipe_context *pctx, const struct pipe_blit_info *blit_info)
434{
435        struct pipe_blit_info info = *blit_info;
436
437        if (vc4_yuv_blit(pctx, blit_info))
438                return;
439
440        if (vc4_tile_blit(pctx, blit_info))
441                return;
442
443        if (info.mask & PIPE_MASK_S) {
444                if (util_try_blit_via_copy_region(pctx, &info, false))
445                        return;
446
447                info.mask &= ~PIPE_MASK_S;
448                fprintf(stderr, "cannot blit stencil, skipping\n");
449        }
450
451        if (vc4_render_blit(pctx, &info))
452                return;
453
454        fprintf(stderr, "Unsupported blit\n");
455}
456