1/*
2 * Copyright © 2014-2015 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file vc4_job.c
25 *
26 * Functions for submitting VC4 render jobs to the kernel.
27 */
28
29#include <xf86drm.h>
30#include "vc4_cl_dump.h"
31#include "vc4_context.h"
32#include "util/hash_table.h"
33
34static void
35vc4_job_free(struct vc4_context *vc4, struct vc4_job *job)
36{
37        struct vc4_bo **referenced_bos = job->bo_pointers.base;
38        for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
39                vc4_bo_unreference(&referenced_bos[i]);
40        }
41
42        _mesa_hash_table_remove_key(vc4->jobs, &job->key);
43
44        if (job->color_write) {
45                _mesa_hash_table_remove_key(vc4->write_jobs,
46                                            job->color_write->texture);
47                pipe_surface_reference(&job->color_write, NULL);
48        }
49        if (job->msaa_color_write) {
50                _mesa_hash_table_remove_key(vc4->write_jobs,
51                                            job->msaa_color_write->texture);
52                pipe_surface_reference(&job->msaa_color_write, NULL);
53        }
54        if (job->zs_write) {
55                _mesa_hash_table_remove_key(vc4->write_jobs,
56                                            job->zs_write->texture);
57                pipe_surface_reference(&job->zs_write, NULL);
58        }
59        if (job->msaa_zs_write) {
60                _mesa_hash_table_remove_key(vc4->write_jobs,
61                                            job->msaa_zs_write->texture);
62                pipe_surface_reference(&job->msaa_zs_write, NULL);
63        }
64
65        pipe_surface_reference(&job->color_read, NULL);
66        pipe_surface_reference(&job->zs_read, NULL);
67
68        if (vc4->job == job)
69                vc4->job = NULL;
70
71        ralloc_free(job);
72}
73
74static struct vc4_job *
75vc4_job_create(struct vc4_context *vc4)
76{
77        struct vc4_job *job = rzalloc(vc4, struct vc4_job);
78
79        vc4_init_cl(job, &job->bcl);
80        vc4_init_cl(job, &job->shader_rec);
81        vc4_init_cl(job, &job->uniforms);
82        vc4_init_cl(job, &job->bo_handles);
83        vc4_init_cl(job, &job->bo_pointers);
84
85        job->draw_min_x = ~0;
86        job->draw_min_y = ~0;
87        job->draw_max_x = 0;
88        job->draw_max_y = 0;
89
90        job->last_gem_handle_hindex = ~0;
91
92        if (vc4->perfmon)
93                job->perfmon = vc4->perfmon;
94
95        return job;
96}
97
98void
99vc4_flush_jobs_writing_resource(struct vc4_context *vc4,
100                                struct pipe_resource *prsc)
101{
102        struct hash_entry *entry = _mesa_hash_table_search(vc4->write_jobs,
103                                                           prsc);
104        if (entry) {
105                struct vc4_job *job = entry->data;
106                vc4_job_submit(vc4, job);
107        }
108}
109
110void
111vc4_flush_jobs_reading_resource(struct vc4_context *vc4,
112                                struct pipe_resource *prsc)
113{
114        struct vc4_resource *rsc = vc4_resource(prsc);
115
116        vc4_flush_jobs_writing_resource(vc4, prsc);
117
118        hash_table_foreach(vc4->jobs, entry) {
119                struct vc4_job *job = entry->data;
120
121                struct vc4_bo **referenced_bos = job->bo_pointers.base;
122                bool found = false;
123                for (int i = 0; i < cl_offset(&job->bo_handles) / 4; i++) {
124                        if (referenced_bos[i] == rsc->bo) {
125                                found = true;
126                                break;
127                        }
128                }
129                if (found) {
130                        vc4_job_submit(vc4, job);
131                        continue;
132                }
133
134                /* Also check for the Z/color buffers, since the references to
135                 * those are only added immediately before submit.
136                 */
137                if (job->color_read && !(job->cleared & PIPE_CLEAR_COLOR)) {
138                        struct vc4_resource *ctex =
139                                vc4_resource(job->color_read->texture);
140                        if (ctex->bo == rsc->bo) {
141                                vc4_job_submit(vc4, job);
142                                continue;
143                        }
144                }
145
146                if (job->zs_read && !(job->cleared &
147                                      (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
148                        struct vc4_resource *ztex =
149                                vc4_resource(job->zs_read->texture);
150                        if (ztex->bo == rsc->bo) {
151                                vc4_job_submit(vc4, job);
152                                continue;
153                        }
154                }
155        }
156}
157
158/**
159 * Returns a vc4_job struture for tracking V3D rendering to a particular FBO.
160 *
161 * If we've already started rendering to this FBO, then return old same job,
162 * otherwise make a new one.  If we're beginning rendering to an FBO, make
163 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
164 * have been flushed.
165 */
166struct vc4_job *
167vc4_get_job(struct vc4_context *vc4,
168            struct pipe_surface *cbuf, struct pipe_surface *zsbuf)
169{
170        /* Return the existing job for this FBO if we have one */
171        struct vc4_job_key local_key = {.cbuf = cbuf, .zsbuf = zsbuf};
172        struct hash_entry *entry = _mesa_hash_table_search(vc4->jobs,
173                                                           &local_key);
174        if (entry)
175                return entry->data;
176
177        /* Creating a new job.  Make sure that any previous jobs reading or
178         * writing these buffers are flushed.
179         */
180        if (cbuf)
181                vc4_flush_jobs_reading_resource(vc4, cbuf->texture);
182        if (zsbuf)
183                vc4_flush_jobs_reading_resource(vc4, zsbuf->texture);
184
185        struct vc4_job *job = vc4_job_create(vc4);
186
187        if (cbuf) {
188                if (cbuf->texture->nr_samples > 1) {
189                        job->msaa = true;
190                        pipe_surface_reference(&job->msaa_color_write, cbuf);
191                } else {
192                        pipe_surface_reference(&job->color_write, cbuf);
193                }
194        }
195
196        if (zsbuf) {
197                if (zsbuf->texture->nr_samples > 1) {
198                        job->msaa = true;
199                        pipe_surface_reference(&job->msaa_zs_write, zsbuf);
200                } else {
201                        pipe_surface_reference(&job->zs_write, zsbuf);
202                }
203        }
204
205        if (job->msaa) {
206                job->tile_width = 32;
207                job->tile_height = 32;
208        } else {
209                job->tile_width = 64;
210                job->tile_height = 64;
211        }
212
213        if (cbuf)
214                _mesa_hash_table_insert(vc4->write_jobs, cbuf->texture, job);
215        if (zsbuf)
216                _mesa_hash_table_insert(vc4->write_jobs, zsbuf->texture, job);
217
218        job->key.cbuf = cbuf;
219        job->key.zsbuf = zsbuf;
220        _mesa_hash_table_insert(vc4->jobs, &job->key, job);
221
222        return job;
223}
224
225struct vc4_job *
226vc4_get_job_for_fbo(struct vc4_context *vc4)
227{
228        if (vc4->job)
229                return vc4->job;
230
231        struct pipe_surface *cbuf = vc4->framebuffer.cbufs[0];
232        struct pipe_surface *zsbuf = vc4->framebuffer.zsbuf;
233        struct vc4_job *job = vc4_get_job(vc4, cbuf, zsbuf);
234
235        /* The dirty flags are tracking what's been updated while vc4->job has
236         * been bound, so set them all to ~0 when switching between jobs.  We
237         * also need to reset all state at the start of rendering.
238         */
239        vc4->dirty = ~0;
240
241        /* Set up the read surfaces in the job.  If they aren't actually
242         * getting read (due to a clear starting the frame), job->cleared will
243         * mask out the read.
244         */
245        pipe_surface_reference(&job->color_read, cbuf);
246        pipe_surface_reference(&job->zs_read, zsbuf);
247
248        /* If we're binding to uninitialized buffers, no need to load their
249         * contents before drawing.
250         */
251        if (cbuf) {
252                struct vc4_resource *rsc = vc4_resource(cbuf->texture);
253                if (!rsc->writes)
254                        job->cleared |= PIPE_CLEAR_COLOR0;
255        }
256
257        if (zsbuf) {
258                struct vc4_resource *rsc = vc4_resource(zsbuf->texture);
259                if (!rsc->writes)
260                        job->cleared |= PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL;
261        }
262
263        job->draw_tiles_x = DIV_ROUND_UP(vc4->framebuffer.width,
264                                         job->tile_width);
265        job->draw_tiles_y = DIV_ROUND_UP(vc4->framebuffer.height,
266                                         job->tile_height);
267
268        /* Initialize the job with the raster order flags -- each draw will
269         * check that we haven't changed the flags, since that requires a
270         * flush.
271         */
272        if (vc4->rasterizer)
273                job->flags = vc4->rasterizer->tile_raster_order_flags;
274
275        vc4->job = job;
276
277        return job;
278}
279
280static void
281vc4_submit_setup_rcl_surface(struct vc4_job *job,
282                             struct drm_vc4_submit_rcl_surface *submit_surf,
283                             struct pipe_surface *psurf,
284                             bool is_depth, bool is_write)
285{
286        struct vc4_surface *surf = vc4_surface(psurf);
287
288        if (!surf)
289                return;
290
291        struct vc4_resource *rsc = vc4_resource(psurf->texture);
292        submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
293        submit_surf->offset = surf->offset;
294
295        if (psurf->texture->nr_samples <= 1) {
296                if (is_depth) {
297                        submit_surf->bits =
298                                VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
299                                              VC4_LOADSTORE_TILE_BUFFER_BUFFER);
300
301                } else {
302                        submit_surf->bits =
303                                VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_COLOR,
304                                              VC4_LOADSTORE_TILE_BUFFER_BUFFER) |
305                                VC4_SET_FIELD(vc4_rt_format_is_565(psurf->format) ?
306                                              VC4_LOADSTORE_TILE_BUFFER_BGR565 :
307                                              VC4_LOADSTORE_TILE_BUFFER_RGBA8888,
308                                              VC4_LOADSTORE_TILE_BUFFER_FORMAT);
309                }
310                submit_surf->bits |=
311                        VC4_SET_FIELD(surf->tiling,
312                                      VC4_LOADSTORE_TILE_BUFFER_TILING);
313        } else {
314                assert(!is_write);
315                submit_surf->flags |= VC4_SUBMIT_RCL_SURFACE_READ_IS_FULL_RES;
316        }
317
318        if (is_write)
319                rsc->writes++;
320}
321
322static void
323vc4_submit_setup_rcl_render_config_surface(struct vc4_job *job,
324                                           struct drm_vc4_submit_rcl_surface *submit_surf,
325                                           struct pipe_surface *psurf)
326{
327        struct vc4_surface *surf = vc4_surface(psurf);
328
329        if (!surf)
330                return;
331
332        struct vc4_resource *rsc = vc4_resource(psurf->texture);
333        submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
334        submit_surf->offset = surf->offset;
335
336        if (psurf->texture->nr_samples <= 1) {
337                submit_surf->bits =
338                        VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
339                                      VC4_RENDER_CONFIG_FORMAT_BGR565 :
340                                      VC4_RENDER_CONFIG_FORMAT_RGBA8888,
341                                      VC4_RENDER_CONFIG_FORMAT) |
342                        VC4_SET_FIELD(surf->tiling,
343                                      VC4_RENDER_CONFIG_MEMORY_FORMAT);
344        }
345
346        rsc->writes++;
347}
348
349static void
350vc4_submit_setup_rcl_msaa_surface(struct vc4_job *job,
351                                  struct drm_vc4_submit_rcl_surface *submit_surf,
352                                  struct pipe_surface *psurf)
353{
354        struct vc4_surface *surf = vc4_surface(psurf);
355
356        if (!surf)
357                return;
358
359        struct vc4_resource *rsc = vc4_resource(psurf->texture);
360        submit_surf->hindex = vc4_gem_hindex(job, rsc->bo);
361        submit_surf->offset = surf->offset;
362        submit_surf->bits = 0;
363        rsc->writes++;
364}
365
366/**
367 * Submits the job to the kernel and then reinitializes it.
368 */
369void
370vc4_job_submit(struct vc4_context *vc4, struct vc4_job *job)
371{
372        if (!job->needs_flush)
373                goto done;
374
375        /* The RCL setup would choke if the draw bounds cause no drawing, so
376         * just drop the drawing if that's the case.
377         */
378        if (job->draw_max_x <= job->draw_min_x ||
379            job->draw_max_y <= job->draw_min_y) {
380                goto done;
381        }
382
383        if (vc4_debug & VC4_DEBUG_CL) {
384                fprintf(stderr, "BCL:\n");
385                vc4_dump_cl(job->bcl.base, cl_offset(&job->bcl), false);
386        }
387
388        if (cl_offset(&job->bcl) > 0) {
389                /* Increment the semaphore indicating that binning is done and
390                 * unblocking the render thread.  Note that this doesn't act
391                 * until the FLUSH completes.
392                 */
393                cl_ensure_space(&job->bcl, 8);
394                cl_emit(&job->bcl, INCREMENT_SEMAPHORE, incr);
395                /* The FLUSH caps all of our bin lists with a
396                 * VC4_PACKET_RETURN.
397                 */
398                cl_emit(&job->bcl, FLUSH, flush);
399        }
400        struct drm_vc4_submit_cl submit = {
401                .color_read.hindex = ~0,
402                .zs_read.hindex = ~0,
403                .color_write.hindex = ~0,
404                .msaa_color_write.hindex = ~0,
405                .zs_write.hindex = ~0,
406                .msaa_zs_write.hindex = ~0,
407        };
408
409        cl_ensure_space(&job->bo_handles, 6 * sizeof(uint32_t));
410        cl_ensure_space(&job->bo_pointers, 6 * sizeof(struct vc4_bo *));
411
412        if (job->resolve & PIPE_CLEAR_COLOR) {
413                if (!(job->cleared & PIPE_CLEAR_COLOR)) {
414                        vc4_submit_setup_rcl_surface(job, &submit.color_read,
415                                                     job->color_read,
416                                                     false, false);
417                }
418                vc4_submit_setup_rcl_render_config_surface(job,
419                                                           &submit.color_write,
420                                                           job->color_write);
421                vc4_submit_setup_rcl_msaa_surface(job,
422                                                  &submit.msaa_color_write,
423                                                  job->msaa_color_write);
424        }
425        if (job->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL)) {
426                if (!(job->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
427                        vc4_submit_setup_rcl_surface(job, &submit.zs_read,
428                                                     job->zs_read, true, false);
429                }
430                vc4_submit_setup_rcl_surface(job, &submit.zs_write,
431                                             job->zs_write, true, true);
432                vc4_submit_setup_rcl_msaa_surface(job, &submit.msaa_zs_write,
433                                                  job->msaa_zs_write);
434        }
435
436        if (job->msaa) {
437                /* This bit controls how many pixels the general
438                 * (i.e. subsampled) loads/stores are iterating over
439                 * (multisample loads replicate out to the other samples).
440                 */
441                submit.color_write.bits |= VC4_RENDER_CONFIG_MS_MODE_4X;
442                /* Controls whether color_write's
443                 * VC4_PACKET_STORE_MS_TILE_BUFFER does 4x decimation
444                 */
445                submit.color_write.bits |= VC4_RENDER_CONFIG_DECIMATE_MODE_4X;
446        }
447
448        submit.bo_handles = (uintptr_t)job->bo_handles.base;
449        submit.bo_handle_count = cl_offset(&job->bo_handles) / 4;
450        submit.bin_cl = (uintptr_t)job->bcl.base;
451        submit.bin_cl_size = cl_offset(&job->bcl);
452        submit.shader_rec = (uintptr_t)job->shader_rec.base;
453        submit.shader_rec_size = cl_offset(&job->shader_rec);
454        submit.shader_rec_count = job->shader_rec_count;
455        submit.uniforms = (uintptr_t)job->uniforms.base;
456        submit.uniforms_size = cl_offset(&job->uniforms);
457	if (job->perfmon)
458		submit.perfmonid = job->perfmon->id;
459
460        assert(job->draw_min_x != ~0 && job->draw_min_y != ~0);
461        submit.min_x_tile = job->draw_min_x / job->tile_width;
462        submit.min_y_tile = job->draw_min_y / job->tile_height;
463        submit.max_x_tile = (job->draw_max_x - 1) / job->tile_width;
464        submit.max_y_tile = (job->draw_max_y - 1) / job->tile_height;
465        submit.width = job->draw_width;
466        submit.height = job->draw_height;
467        if (job->cleared) {
468                submit.flags |= VC4_SUBMIT_CL_USE_CLEAR_COLOR;
469                submit.clear_color[0] = job->clear_color[0];
470                submit.clear_color[1] = job->clear_color[1];
471                submit.clear_z = job->clear_depth;
472                submit.clear_s = job->clear_stencil;
473        }
474        submit.flags |= job->flags;
475
476        if (vc4->screen->has_syncobj) {
477                submit.out_sync = vc4->job_syncobj;
478
479                if (vc4->in_fence_fd >= 0) {
480                        /* This replaces the fence in the syncobj. */
481                        drmSyncobjImportSyncFile(vc4->fd, vc4->in_syncobj,
482                                                 vc4->in_fence_fd);
483                        submit.in_sync = vc4->in_syncobj;
484                        close(vc4->in_fence_fd);
485                        vc4->in_fence_fd = -1;
486                }
487        }
488
489        if (!(vc4_debug & VC4_DEBUG_NORAST)) {
490                int ret;
491
492                ret = vc4_ioctl(vc4->fd, DRM_IOCTL_VC4_SUBMIT_CL, &submit);
493                static bool warned = false;
494                if (ret && !warned) {
495                        fprintf(stderr, "Draw call returned %s.  "
496                                        "Expect corruption.\n", strerror(errno));
497                        warned = true;
498                } else if (!ret) {
499                        vc4->last_emit_seqno = submit.seqno;
500                        if (job->perfmon)
501                                job->perfmon->last_seqno = submit.seqno;
502                }
503        }
504
505        if (vc4->last_emit_seqno - vc4->screen->finished_seqno > 5) {
506                if (!vc4_wait_seqno(vc4->screen,
507                                    vc4->last_emit_seqno - 5,
508                                    PIPE_TIMEOUT_INFINITE,
509                                    "job throttling")) {
510                        fprintf(stderr, "Job throttling failed\n");
511                }
512        }
513
514        if (vc4_debug & VC4_DEBUG_ALWAYS_SYNC) {
515                if (!vc4_wait_seqno(vc4->screen, vc4->last_emit_seqno,
516                                    PIPE_TIMEOUT_INFINITE, "sync")) {
517                        fprintf(stderr, "Wait failed.\n");
518                        abort();
519                }
520        }
521
522done:
523        vc4_job_free(vc4, job);
524}
525
526static bool
527vc4_job_compare(const void *a, const void *b)
528{
529        return memcmp(a, b, sizeof(struct vc4_job_key)) == 0;
530}
531
532static uint32_t
533vc4_job_hash(const void *key)
534{
535        return _mesa_hash_data(key, sizeof(struct vc4_job_key));
536}
537
538int
539vc4_job_init(struct vc4_context *vc4)
540{
541        vc4->jobs = _mesa_hash_table_create(vc4,
542                                            vc4_job_hash,
543                                            vc4_job_compare);
544        vc4->write_jobs = _mesa_hash_table_create(vc4,
545                                                  _mesa_hash_pointer,
546                                                  _mesa_key_pointer_equal);
547
548        if (vc4->screen->has_syncobj) {
549                /* Create the syncobj as signaled since with no job executed
550                 * there is nothing to wait on.
551                 */
552                int ret = drmSyncobjCreate(vc4->fd,
553                                           DRM_SYNCOBJ_CREATE_SIGNALED,
554                                           &vc4->job_syncobj);
555                if (ret) {
556                        /* If the screen indicated syncobj support, we should
557                         * be able to create a signaled syncobj.
558                         * At this point it is too late to pretend the screen
559                         * has no syncobj support.
560                         */
561                        return ret;
562                }
563        }
564
565        return 0;
566}
567
568