1/* 2 * Copyright (C) 2022 Collabora, Ltd. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#include "pan_earlyzs.h" 25#include "panfrost/util/pan_ir.h" 26 27/* 28 * Return an "early" mode. If it is known that the depth/stencil tests always 29 * pass (so the shader is always executed), weak early is usually faster than 30 * force early. 31 */ 32static enum pan_earlyzs 33best_early_mode(bool zs_always_passes) 34{ 35 if (zs_always_passes) 36 return PAN_EARLYZS_WEAK_EARLY; 37 else 38 return PAN_EARLYZS_FORCE_EARLY; 39} 40 41/* 42 * Analyze a fragment shader and provided API state to determine the early-ZS 43 * configuration. The order of arguments must match the order of states in the 44 * lookup table, synchronized with pan_earlyzs_get. 45 */ 46static struct pan_earlyzs_state 47analyze(const struct pan_shader_info *s, 48 bool writes_zs_or_oq, 49 bool alpha_to_coverage, 50 bool zs_always_passes) 51{ 52 /* If the shader writes depth or stencil, all depth/stencil tests must 53 * be deferred until the value is known after the ZS_EMIT instruction, 54 * if present. ZS_EMIT must precede ATEST, so the value is known when 55 * ATEST executes, justifying the late test/update. 56 */ 57 bool shader_writes_zs = (s->fs.writes_depth || s->fs.writes_stencil); 58 bool late_update = shader_writes_zs; 59 bool late_kill = shader_writes_zs; 60 61 /* Late coverage updates are required if the coverage mask depends on 62 * the results of the shader. Discards are implemented as coverage mask 63 * updates and must be considered. Strictly, depth/stencil writes may 64 * also update the coverage mask, but these already force late updates. 65 */ 66 bool late_coverage = s->fs.writes_coverage || 67 s->fs.can_discard || 68 alpha_to_coverage; 69 70 /* Late coverage mask updates may affect the value written to the 71 * depth/stencil buffer (if a pixel is discarded entirely). However, 72 * they do not affect depth/stencil testing. So they may only matter if 73 * depth or stencil is written. 74 * 75 * That dependency does mean late coverage mask updates require late 76 * depth/stencil updates. 77 * 78 * Similarly, occlusion queries count samples that pass the 79 * depth/stencil tests, so occlusion queries with late coverage also 80 * require a late update. 81 */ 82 late_update |= (late_coverage && writes_zs_or_oq); 83 84 /* Side effects require late depth/stencil tests to ensure the shader 85 * isn't killed before the side effects execute. 86 */ 87 late_kill |= s->writes_global; 88 89 /* Finally, the shader may override and force early fragment tests */ 90 late_update &= !s->fs.early_fragment_tests; 91 late_kill &= !s->fs.early_fragment_tests; 92 93 /* Collect results */ 94 enum pan_earlyzs early_mode = best_early_mode(zs_always_passes); 95 96 return (struct pan_earlyzs_state) { 97 .update = late_update ? PAN_EARLYZS_FORCE_LATE : early_mode, 98 .kill = late_kill ? PAN_EARLYZS_FORCE_LATE : early_mode 99 }; 100} 101 102/* 103 * Analyze a fragment shader to determine all possible early-ZS configurations. 104 * Returns a lookup table of configurations indexed by the API state. 105 */ 106struct pan_earlyzs_lut 107pan_earlyzs_analyze(const struct pan_shader_info *s) 108{ 109 struct pan_earlyzs_lut lut; 110 111 for (unsigned v0 = 0; v0 < 2; ++v0) { 112 for (unsigned v1 = 0; v1 < 2; ++v1) { 113 for (unsigned v2 = 0; v2 < 2; ++v2) 114 lut.states[v0][v1][v2] = analyze(s, v0, v1, v2); 115 } 116 } 117 118 return lut; 119} 120