1/*
2 * Copyright (C) 2022 Collabora, Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24#include "pan_earlyzs.h"
25#include "panfrost/util/pan_ir.h"
26
27/*
28 * Return an "early" mode. If it is known that the depth/stencil tests always
29 * pass (so the shader is always executed), weak early is usually faster than
30 * force early.
31 */
32static enum pan_earlyzs
33best_early_mode(bool zs_always_passes)
34{
35        if (zs_always_passes)
36                return PAN_EARLYZS_WEAK_EARLY;
37        else
38                return PAN_EARLYZS_FORCE_EARLY;
39}
40
41/*
42 * Analyze a fragment shader and provided API state to determine the early-ZS
43 * configuration. The order of arguments must match the order of states in the
44 * lookup table, synchronized with pan_earlyzs_get.
45 */
46static struct pan_earlyzs_state
47analyze(const struct pan_shader_info *s,
48        bool writes_zs_or_oq,
49        bool alpha_to_coverage,
50        bool zs_always_passes)
51{
52        /* If the shader writes depth or stencil, all depth/stencil tests must
53         * be deferred until the value is known after the ZS_EMIT instruction,
54         * if present. ZS_EMIT must precede ATEST, so the value is known when
55         * ATEST executes, justifying the late test/update.
56         */
57        bool shader_writes_zs = (s->fs.writes_depth || s->fs.writes_stencil);
58        bool late_update = shader_writes_zs;
59        bool late_kill = shader_writes_zs;
60
61        /* Late coverage updates are required if the coverage mask depends on
62         * the results of the shader. Discards are implemented as coverage mask
63         * updates and must be considered. Strictly, depth/stencil writes may
64         * also update the coverage mask, but these already force late updates.
65         */
66        bool late_coverage = s->fs.writes_coverage ||
67                             s->fs.can_discard ||
68                             alpha_to_coverage;
69
70        /* Late coverage mask updates may affect the value written to the
71         * depth/stencil buffer (if a pixel is discarded entirely). However,
72         * they do not affect depth/stencil testing. So they may only matter if
73         * depth or stencil is written.
74         *
75         * That dependency does mean late coverage mask updates require late
76         * depth/stencil updates.
77         *
78         * Similarly, occlusion queries count samples that pass the
79         * depth/stencil tests, so occlusion queries with late coverage also
80         * require a late update.
81         */
82        late_update |= (late_coverage && writes_zs_or_oq);
83
84        /* Side effects require late depth/stencil tests to ensure the shader
85         * isn't killed before the side effects execute.
86         */
87        late_kill |= s->writes_global;
88
89        /* Finally, the shader may override and force early fragment tests */
90        late_update &= !s->fs.early_fragment_tests;
91        late_kill   &= !s->fs.early_fragment_tests;
92
93        /* Collect results */
94        enum pan_earlyzs early_mode = best_early_mode(zs_always_passes);
95
96        return (struct pan_earlyzs_state) {
97                .update = late_update ? PAN_EARLYZS_FORCE_LATE : early_mode,
98                .kill   = late_kill   ? PAN_EARLYZS_FORCE_LATE : early_mode
99        };
100}
101
102/*
103 * Analyze a fragment shader to determine all possible early-ZS configurations.
104 * Returns a lookup table of configurations indexed by the API state.
105 */
106struct pan_earlyzs_lut
107pan_earlyzs_analyze(const struct pan_shader_info *s)
108{
109        struct pan_earlyzs_lut lut;
110
111        for (unsigned v0 = 0; v0 < 2; ++v0) {
112                for (unsigned v1 = 0; v1 < 2; ++v1) {
113                        for (unsigned v2 = 0; v2 < 2; ++v2)
114                                lut.states[v0][v1][v2] = analyze(s, v0, v1, v2);
115                }
116        }
117
118        return lut;
119}
120