1/*
2 * Copyright © 2014-2017 Broadcom
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24/** @file v3d_job.c
25 *
26 * Functions for submitting V3D render jobs to the kernel.
27 */
28
29#include <xf86drm.h>
30#include "v3d_context.h"
31/* The OQ/semaphore packets are the same across V3D versions. */
32#define V3D_VERSION 33
33#include "broadcom/cle/v3dx_pack.h"
34#include "broadcom/common/v3d_macros.h"
35#include "util/hash_table.h"
36#include "util/ralloc.h"
37#include "util/set.h"
38#include "broadcom/clif/clif_dump.h"
39
40void
41v3d_job_free(struct v3d_context *v3d, struct v3d_job *job)
42{
43        set_foreach(job->bos, entry) {
44                struct v3d_bo *bo = (struct v3d_bo *)entry->key;
45                v3d_bo_unreference(&bo);
46        }
47
48        _mesa_hash_table_remove_key(v3d->jobs, &job->key);
49
50        if (job->write_prscs) {
51                set_foreach(job->write_prscs, entry) {
52                        const struct pipe_resource *prsc = entry->key;
53
54                        _mesa_hash_table_remove_key(v3d->write_jobs, prsc);
55                }
56        }
57
58        for (int i = 0; i < job->nr_cbufs; i++) {
59                if (job->cbufs[i]) {
60                        _mesa_hash_table_remove_key(v3d->write_jobs,
61                                                    job->cbufs[i]->texture);
62                        pipe_surface_reference(&job->cbufs[i], NULL);
63                }
64        }
65        if (job->zsbuf) {
66                struct v3d_resource *rsc = v3d_resource(job->zsbuf->texture);
67                if (rsc->separate_stencil)
68                        _mesa_hash_table_remove_key(v3d->write_jobs,
69                                                    &rsc->separate_stencil->base);
70
71                _mesa_hash_table_remove_key(v3d->write_jobs,
72                                            job->zsbuf->texture);
73                pipe_surface_reference(&job->zsbuf, NULL);
74        }
75        if (job->bbuf)
76                pipe_surface_reference(&job->bbuf, NULL);
77
78        if (v3d->job == job)
79                v3d->job = NULL;
80
81        v3d_destroy_cl(&job->bcl);
82        v3d_destroy_cl(&job->rcl);
83        v3d_destroy_cl(&job->indirect);
84        v3d_bo_unreference(&job->tile_alloc);
85        v3d_bo_unreference(&job->tile_state);
86
87        ralloc_free(job);
88}
89
90struct v3d_job *
91v3d_job_create(struct v3d_context *v3d)
92{
93        struct v3d_job *job = rzalloc(v3d, struct v3d_job);
94
95        job->v3d = v3d;
96
97        v3d_init_cl(job, &job->bcl);
98        v3d_init_cl(job, &job->rcl);
99        v3d_init_cl(job, &job->indirect);
100
101        job->draw_min_x = ~0;
102        job->draw_min_y = ~0;
103        job->draw_max_x = 0;
104        job->draw_max_y = 0;
105
106        job->bos = _mesa_set_create(job,
107                                    _mesa_hash_pointer,
108                                    _mesa_key_pointer_equal);
109        return job;
110}
111
112void
113v3d_job_add_bo(struct v3d_job *job, struct v3d_bo *bo)
114{
115        if (!bo)
116                return;
117
118        if (_mesa_set_search(job->bos, bo))
119                return;
120
121        v3d_bo_reference(bo);
122        _mesa_set_add(job->bos, bo);
123        job->referenced_size += bo->size;
124
125        uint32_t *bo_handles = (void *)(uintptr_t)job->submit.bo_handles;
126
127        if (job->submit.bo_handle_count >= job->bo_handles_size) {
128                job->bo_handles_size = MAX2(4, job->bo_handles_size * 2);
129                bo_handles = reralloc(job, bo_handles,
130                                      uint32_t, job->bo_handles_size);
131                job->submit.bo_handles = (uintptr_t)(void *)bo_handles;
132        }
133        bo_handles[job->submit.bo_handle_count++] = bo->handle;
134}
135
136void
137v3d_job_add_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
138{
139        struct v3d_context *v3d = job->v3d;
140
141        if (!job->write_prscs) {
142                job->write_prscs = _mesa_set_create(job,
143                                                    _mesa_hash_pointer,
144                                                    _mesa_key_pointer_equal);
145        }
146
147        _mesa_set_add(job->write_prscs, prsc);
148        _mesa_hash_table_insert(v3d->write_jobs, prsc, job);
149}
150
151void
152v3d_flush_jobs_using_bo(struct v3d_context *v3d, struct v3d_bo *bo)
153{
154        hash_table_foreach(v3d->jobs, entry) {
155                struct v3d_job *job = entry->data;
156
157                if (_mesa_set_search(job->bos, bo))
158                        v3d_job_submit(v3d, job);
159        }
160}
161
162void
163v3d_job_add_tf_write_resource(struct v3d_job *job, struct pipe_resource *prsc)
164{
165        v3d_job_add_write_resource(job, prsc);
166
167        if (!job->tf_write_prscs)
168                job->tf_write_prscs = _mesa_pointer_set_create(job);
169
170        _mesa_set_add(job->tf_write_prscs, prsc);
171}
172
173static bool
174v3d_job_writes_resource_from_tf(struct v3d_job *job,
175                                struct pipe_resource *prsc)
176{
177        if (!job->tf_enabled)
178                return false;
179
180        if (!job->tf_write_prscs)
181                return false;
182
183        return _mesa_set_search(job->tf_write_prscs, prsc) != NULL;
184}
185
186void
187v3d_flush_jobs_writing_resource(struct v3d_context *v3d,
188                                struct pipe_resource *prsc,
189                                enum v3d_flush_cond flush_cond,
190                                bool is_compute_pipeline)
191{
192        struct hash_entry *entry = _mesa_hash_table_search(v3d->write_jobs,
193                                                           prsc);
194        struct v3d_resource *rsc = v3d_resource(prsc);
195
196        /* We need to sync if graphics pipeline reads a resource written
197         * by the compute pipeline. The same would be needed for the case of
198         * graphics-compute dependency but nowadays all compute jobs
199         * are serialized with the previous submitted job.
200         */
201        if (!is_compute_pipeline && rsc->bo != NULL && rsc->compute_written) {
202           v3d->sync_on_last_compute_job = true;
203           rsc->compute_written = false;
204        }
205
206        if (!entry)
207                return;
208
209        struct v3d_job *job = entry->data;
210
211        bool needs_flush;
212        switch (flush_cond) {
213        case V3D_FLUSH_ALWAYS:
214                needs_flush = true;
215                break;
216        case V3D_FLUSH_NOT_CURRENT_JOB:
217                needs_flush = !v3d->job || v3d->job != job;
218                break;
219        case V3D_FLUSH_DEFAULT:
220        default:
221                /* For writes from TF in the same job we use the "Wait for TF"
222                 * feature provided by the hardware so we don't want to flush.
223                 * The exception to this is when the caller is about to map the
224                 * resource since in that case we don't have a 'Wait for TF'
225                 * command the in command stream. In this scenario the caller
226                 * is expected to set 'always_flush' to True.
227                 */
228                needs_flush = !v3d_job_writes_resource_from_tf(job, prsc);
229        }
230
231        if (needs_flush)
232                v3d_job_submit(v3d, job);
233}
234
235void
236v3d_flush_jobs_reading_resource(struct v3d_context *v3d,
237                                struct pipe_resource *prsc,
238                                enum v3d_flush_cond flush_cond,
239                                bool is_compute_pipeline)
240{
241        struct v3d_resource *rsc = v3d_resource(prsc);
242
243        /* We only need to force the flush on TF writes, which is the only
244         * case where we might skip the flush to use the 'Wait for TF'
245         * command. Here we are flushing for a read, which means that the
246         * caller intends to write to the resource, so we don't care if
247         * there was a previous TF write to it.
248         */
249        v3d_flush_jobs_writing_resource(v3d, prsc, flush_cond,
250                                        is_compute_pipeline);
251
252        hash_table_foreach(v3d->jobs, entry) {
253                struct v3d_job *job = entry->data;
254
255                if (!_mesa_set_search(job->bos, rsc->bo))
256                        continue;
257
258                bool needs_flush;
259                switch (flush_cond) {
260                case V3D_FLUSH_NOT_CURRENT_JOB:
261                        needs_flush = !v3d->job || v3d->job != job;
262                        break;
263                case V3D_FLUSH_ALWAYS:
264                case V3D_FLUSH_DEFAULT:
265                default:
266                        needs_flush = true;
267                }
268
269                if (needs_flush)
270                        v3d_job_submit(v3d, job);
271
272                /* Reminder: v3d->jobs is safe to keep iterating even
273                 * after deletion of an entry.
274                 */
275                continue;
276        }
277}
278
279/**
280 * Returns a v3d_job struture for tracking V3D rendering to a particular FBO.
281 *
282 * If we've already started rendering to this FBO, then return the same job,
283 * otherwise make a new one.  If we're beginning rendering to an FBO, make
284 * sure that any previous reads of the FBO (or writes to its color/Z surfaces)
285 * have been flushed.
286 */
287struct v3d_job *
288v3d_get_job(struct v3d_context *v3d,
289            uint32_t nr_cbufs,
290            struct pipe_surface **cbufs,
291            struct pipe_surface *zsbuf,
292            struct pipe_surface *bbuf)
293{
294        /* Return the existing job for this FBO if we have one */
295        struct v3d_job_key local_key = {
296                .cbufs = {
297                        cbufs[0],
298                        cbufs[1],
299                        cbufs[2],
300                        cbufs[3],
301                },
302                .zsbuf = zsbuf,
303                .bbuf = bbuf,
304        };
305        struct hash_entry *entry = _mesa_hash_table_search(v3d->jobs,
306                                                           &local_key);
307        if (entry)
308                return entry->data;
309
310        /* Creating a new job.  Make sure that any previous jobs reading or
311         * writing these buffers are flushed.
312         */
313        struct v3d_job *job = v3d_job_create(v3d);
314        job->nr_cbufs = nr_cbufs;
315
316        for (int i = 0; i < job->nr_cbufs; i++) {
317                if (cbufs[i]) {
318                        v3d_flush_jobs_reading_resource(v3d, cbufs[i]->texture,
319                                                        V3D_FLUSH_DEFAULT,
320                                                        false);
321                        pipe_surface_reference(&job->cbufs[i], cbufs[i]);
322
323                        if (cbufs[i]->texture->nr_samples > 1)
324                                job->msaa = true;
325                }
326        }
327        if (zsbuf) {
328                v3d_flush_jobs_reading_resource(v3d, zsbuf->texture,
329                                                V3D_FLUSH_DEFAULT,
330                                                false);
331                pipe_surface_reference(&job->zsbuf, zsbuf);
332                if (zsbuf->texture->nr_samples > 1)
333                        job->msaa = true;
334        }
335        if (bbuf) {
336                pipe_surface_reference(&job->bbuf, bbuf);
337                if (bbuf->texture->nr_samples > 1)
338                        job->msaa = true;
339        }
340
341        for (int i = 0; i < job->nr_cbufs; i++) {
342                if (cbufs[i])
343                        _mesa_hash_table_insert(v3d->write_jobs,
344                                                cbufs[i]->texture, job);
345        }
346        if (zsbuf) {
347                _mesa_hash_table_insert(v3d->write_jobs, zsbuf->texture, job);
348
349                struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
350                if (rsc->separate_stencil) {
351                        v3d_flush_jobs_reading_resource(v3d,
352                                                        &rsc->separate_stencil->base,
353                                                        V3D_FLUSH_DEFAULT,
354                                                        false);
355                        _mesa_hash_table_insert(v3d->write_jobs,
356                                                &rsc->separate_stencil->base,
357                                                job);
358                }
359        }
360
361       job->double_buffer =
362               unlikely(V3D_DEBUG & V3D_DEBUG_DOUBLE_BUFFER) && !job->msaa;
363
364        memcpy(&job->key, &local_key, sizeof(local_key));
365        _mesa_hash_table_insert(v3d->jobs, &job->key, job);
366
367        return job;
368}
369
370struct v3d_job *
371v3d_get_job_for_fbo(struct v3d_context *v3d)
372{
373        if (v3d->job)
374                return v3d->job;
375
376        uint32_t nr_cbufs = v3d->framebuffer.nr_cbufs;
377        struct pipe_surface **cbufs = v3d->framebuffer.cbufs;
378        struct pipe_surface *zsbuf = v3d->framebuffer.zsbuf;
379        struct v3d_job *job = v3d_get_job(v3d, nr_cbufs, cbufs, zsbuf, NULL);
380
381        if (v3d->framebuffer.samples >= 1) {
382                job->msaa = true;
383                job->double_buffer = false;
384        }
385
386        v3d_get_tile_buffer_size(job->msaa, job->double_buffer,
387                                 job->nr_cbufs, job->cbufs, job->bbuf,
388                                 &job->tile_width, &job->tile_height,
389                                 &job->internal_bpp);
390
391        /* The dirty flags are tracking what's been updated while v3d->job has
392         * been bound, so set them all to ~0 when switching between jobs.  We
393         * also need to reset all state at the start of rendering.
394         */
395        v3d->dirty = ~0;
396
397        /* If we're binding to uninitialized buffers, no need to load their
398         * contents before drawing.
399         */
400        for (int i = 0; i < nr_cbufs; i++) {
401                if (cbufs[i]) {
402                        struct v3d_resource *rsc = v3d_resource(cbufs[i]->texture);
403                        if (!rsc->writes)
404                                job->clear |= PIPE_CLEAR_COLOR0 << i;
405                }
406        }
407
408        if (zsbuf) {
409                struct v3d_resource *rsc = v3d_resource(zsbuf->texture);
410                if (!rsc->writes)
411                        job->clear |= PIPE_CLEAR_DEPTH;
412
413                if (rsc->separate_stencil)
414                        rsc = rsc->separate_stencil;
415
416                if (!rsc->writes)
417                        job->clear |= PIPE_CLEAR_STENCIL;
418        }
419
420        job->draw_tiles_x = DIV_ROUND_UP(v3d->framebuffer.width,
421                                         job->tile_width);
422        job->draw_tiles_y = DIV_ROUND_UP(v3d->framebuffer.height,
423                                         job->tile_height);
424
425        v3d->job = job;
426
427        return job;
428}
429
430static void
431v3d_clif_dump(struct v3d_context *v3d, struct v3d_job *job)
432{
433        if (!(unlikely(V3D_DEBUG & (V3D_DEBUG_CL |
434                                    V3D_DEBUG_CL_NO_BIN |
435                                    V3D_DEBUG_CLIF))))
436                return;
437
438        struct clif_dump *clif = clif_dump_init(&v3d->screen->devinfo,
439                                                stderr,
440                                                V3D_DEBUG & (V3D_DEBUG_CL |
441                                                             V3D_DEBUG_CL_NO_BIN),
442                                                V3D_DEBUG & V3D_DEBUG_CL_NO_BIN);
443
444        set_foreach(job->bos, entry) {
445                struct v3d_bo *bo = (void *)entry->key;
446                char *name = ralloc_asprintf(NULL, "%s_0x%x",
447                                             bo->name, bo->offset);
448
449                v3d_bo_map(bo);
450                clif_dump_add_bo(clif, name, bo->offset, bo->size, bo->map);
451
452                ralloc_free(name);
453        }
454
455        clif_dump(clif, &job->submit);
456
457        clif_dump_destroy(clif);
458}
459
460static void
461v3d_read_and_accumulate_primitive_counters(struct v3d_context *v3d)
462{
463        assert(v3d->prim_counts);
464
465        perf_debug("stalling on TF counts readback\n");
466        struct v3d_resource *rsc = v3d_resource(v3d->prim_counts);
467        if (v3d_bo_wait(rsc->bo, PIPE_TIMEOUT_INFINITE, "prim-counts")) {
468                uint32_t *map = v3d_bo_map(rsc->bo) + v3d->prim_counts_offset;
469                v3d->tf_prims_generated += map[V3D_PRIM_COUNTS_TF_WRITTEN];
470                /* When we only have a vertex shader we determine the primitive
471                 * count in the CPU so don't update it here again.
472                 */
473                if (v3d->prog.gs)
474                        v3d->prims_generated += map[V3D_PRIM_COUNTS_WRITTEN];
475        }
476}
477
478/**
479 * Submits the job to the kernel and then reinitializes it.
480 */
481void
482v3d_job_submit(struct v3d_context *v3d, struct v3d_job *job)
483{
484        struct v3d_screen *screen = v3d->screen;
485
486        if (!job->needs_flush)
487                goto done;
488
489        /* The GL_PRIMITIVES_GENERATED query is included with
490         * OES_geometry_shader.
491         */
492        job->needs_primitives_generated =
493                v3d->n_primitives_generated_queries_in_flight > 0 &&
494                v3d->prog.gs;
495
496        if (job->needs_primitives_generated)
497                v3d_ensure_prim_counts_allocated(v3d);
498
499        if (screen->devinfo.ver >= 41)
500                v3d41_emit_rcl(job);
501        else
502                v3d33_emit_rcl(job);
503
504        if (cl_offset(&job->bcl) > 0) {
505                if (screen->devinfo.ver >= 41)
506                        v3d41_bcl_epilogue(v3d, job);
507                else
508                        v3d33_bcl_epilogue(v3d, job);
509        }
510
511        /* While the RCL will implicitly depend on the last RCL to have
512         * finished, we also need to block on any previous TFU job we may have
513         * dispatched.
514         */
515        job->submit.in_sync_rcl = v3d->out_sync;
516
517        /* Update the sync object for the last rendering by our context. */
518        job->submit.out_sync = v3d->out_sync;
519
520        job->submit.bcl_end = job->bcl.bo->offset + cl_offset(&job->bcl);
521        job->submit.rcl_end = job->rcl.bo->offset + cl_offset(&job->rcl);
522
523        if (v3d->active_perfmon) {
524                assert(screen->has_perfmon);
525                job->submit.perfmon_id = v3d->active_perfmon->kperfmon_id;
526        }
527
528        /* If we are submitting a job with a different perfmon, we need to
529         * ensure the previous one fully finishes before starting this;
530         * otherwise it would wrongly mix counter results.
531         */
532        if (v3d->active_perfmon != v3d->last_perfmon) {
533                v3d->last_perfmon = v3d->active_perfmon;
534                job->submit.in_sync_bcl = v3d->out_sync;
535        }
536
537        job->submit.flags = 0;
538        if (job->tmu_dirty_rcl && screen->has_cache_flush)
539                job->submit.flags |= DRM_V3D_SUBMIT_CL_FLUSH_CACHE;
540
541        /* On V3D 4.1, the tile alloc/state setup moved to register writes
542         * instead of binner packets.
543         */
544        if (screen->devinfo.ver >= 41) {
545                v3d_job_add_bo(job, job->tile_alloc);
546                job->submit.qma = job->tile_alloc->offset;
547                job->submit.qms = job->tile_alloc->size;
548
549                v3d_job_add_bo(job, job->tile_state);
550                job->submit.qts = job->tile_state->offset;
551        }
552
553        v3d_clif_dump(v3d, job);
554
555        if (!(unlikely(V3D_DEBUG & V3D_DEBUG_NORAST))) {
556                int ret;
557
558                ret = v3d_ioctl(v3d->fd, DRM_IOCTL_V3D_SUBMIT_CL, &job->submit);
559                static bool warned = false;
560                if (ret && !warned) {
561                        fprintf(stderr, "Draw call returned %s.  "
562                                        "Expect corruption.\n", strerror(errno));
563                        warned = true;
564                } else if (!ret) {
565                        if (v3d->active_perfmon)
566                                v3d->active_perfmon->job_submitted = true;
567                }
568
569                /* If we are submitting a job in the middle of transform
570                 * feedback or there is a primitives generated query with a
571                 * geometry shader then we need to read the primitive counts
572                 * and accumulate them, otherwise they will be reset at the
573                 * start of the next draw when we emit the Tile Binning Mode
574                 * Configuration packet.
575                 *
576                 * If the job doesn't have any TF draw calls, then we know
577                 * the primitive count must be zero and we can skip stalling
578                 * for this. This also fixes a problem because it seems that
579                 * in this scenario the counters are not reset with the Tile
580                 * Binning Mode Configuration packet, which would translate
581                 * to us reading an obsolete (possibly non-zero) value from
582                 * the GPU counters.
583                 */
584                if (job->needs_primitives_generated ||
585                    (v3d->streamout.num_targets &&
586                     job->tf_draw_calls_queued > 0))
587                        v3d_read_and_accumulate_primitive_counters(v3d);
588        }
589
590done:
591        v3d_job_free(v3d, job);
592}
593
594static bool
595v3d_job_compare(const void *a, const void *b)
596{
597        return memcmp(a, b, sizeof(struct v3d_job_key)) == 0;
598}
599
600static uint32_t
601v3d_job_hash(const void *key)
602{
603        return _mesa_hash_data(key, sizeof(struct v3d_job_key));
604}
605
606void
607v3d_job_init(struct v3d_context *v3d)
608{
609        v3d->jobs = _mesa_hash_table_create(v3d,
610                                            v3d_job_hash,
611                                            v3d_job_compare);
612        v3d->write_jobs = _mesa_hash_table_create(v3d,
613                                                  _mesa_hash_pointer,
614                                                  _mesa_key_pointer_equal);
615}
616
617