1/* 2 * Copyright (C) 2018-2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 */ 23 24#ifndef __AGX_PUBLIC_H_ 25#define __AGX_PUBLIC_H_ 26 27#include "compiler/nir/nir.h" 28#include "util/u_dynarray.h" 29#include "asahi/lib/agx_pack.h" 30 31enum agx_push_type { 32 /* Array of 64-bit pointers to the base addresses (BASES) and array of 33 * 16-bit sizes for optional bounds checking (SIZES) */ 34 AGX_PUSH_UBO_BASES = 0, 35 AGX_PUSH_UBO_SIZES = 1, 36 AGX_PUSH_VBO_BASES = 2, 37 AGX_PUSH_VBO_SIZES = 3, 38 AGX_PUSH_SSBO_BASES = 4, 39 AGX_PUSH_SSBO_SIZES = 5, 40 41 /* Push the attached constant memory */ 42 AGX_PUSH_CONSTANTS = 6, 43 44 /* Push the content of a UBO */ 45 AGX_PUSH_UBO_DATA = 7, 46 47 /* RGBA blend constant (FP32) */ 48 AGX_PUSH_BLEND_CONST = 8, 49 50 /* Array of 16-bit (array_size - 1) for indexed array textures, used to 51 * lower access to indexed array textures 52 */ 53 AGX_PUSH_ARRAY_SIZE_MINUS_1 = 9, 54 55 /* Keep last */ 56 AGX_PUSH_NUM_TYPES 57}; 58 59struct agx_push { 60 /* Contents to push */ 61 enum agx_push_type type : 8; 62 63 /* Base of where to push, indexed in 16-bit units. The uniform file contains 64 * 512 = 2^9 such units. */ 65 unsigned base : 9; 66 67 /* Number of 16-bit units to push */ 68 unsigned length : 9; 69 70 /* If set, rather than pushing the specified data, push a pointer to the 71 * specified data. This is slower to access but enables indirect access, as 72 * the uniform file does not support indirection. */ 73 bool indirect : 1; 74 75 union { 76 struct { 77 uint16_t ubo; 78 uint16_t offset; 79 } ubo_data; 80 }; 81}; 82 83/* Arbitrary */ 84#define AGX_MAX_PUSH_RANGES (16) 85#define AGX_MAX_VARYINGS (32) 86 87struct agx_varyings { 88 unsigned nr_descs, nr_slots; 89 struct agx_varying_packed packed[AGX_MAX_VARYINGS]; 90}; 91 92struct agx_shader_info { 93 unsigned push_ranges; 94 struct agx_push push[AGX_MAX_PUSH_RANGES]; 95 struct agx_varyings varyings; 96 97 /* Does the shader read the tilebuffer? */ 98 bool reads_tib; 99 100 /* Does the shader write point size? */ 101 bool writes_psiz; 102 103 /* Does the shader control the sample mask? */ 104 bool writes_sample_mask; 105}; 106 107#define AGX_MAX_RTS (8) 108#define AGX_MAX_ATTRIBS (16) 109#define AGX_MAX_VBUFS (16) 110 111enum agx_format { 112 AGX_FORMAT_I8 = 0, 113 AGX_FORMAT_I16 = 1, 114 AGX_FORMAT_I32 = 2, 115 AGX_FORMAT_F16 = 3, 116 AGX_FORMAT_U8NORM = 4, 117 AGX_FORMAT_S8NORM = 5, 118 AGX_FORMAT_U16NORM = 6, 119 AGX_FORMAT_S16NORM = 7, 120 AGX_FORMAT_RGB10A2 = 8, 121 AGX_FORMAT_SRGBA8 = 10, 122 AGX_FORMAT_RG11B10F = 12, 123 AGX_FORMAT_RGB9E5 = 13, 124 125 /* Keep last */ 126 AGX_NUM_FORMATS, 127}; 128 129/* Returns the number of bits at the bottom of the address required to be zero. 130 * That is, returns the base-2 logarithm of the minimum alignment for an 131 * agx_format, where the minimum alignment is 2^n where n is the result of this 132 * function. The offset argument to device_load is left-shifted by this amount 133 * in the hardware */ 134 135static inline unsigned 136agx_format_shift(enum agx_format format) 137{ 138 switch (format) { 139 case AGX_FORMAT_I8: 140 case AGX_FORMAT_U8NORM: 141 case AGX_FORMAT_S8NORM: 142 case AGX_FORMAT_SRGBA8: 143 return 0; 144 145 case AGX_FORMAT_I16: 146 case AGX_FORMAT_F16: 147 case AGX_FORMAT_U16NORM: 148 case AGX_FORMAT_S16NORM: 149 return 1; 150 151 case AGX_FORMAT_I32: 152 case AGX_FORMAT_RGB10A2: 153 case AGX_FORMAT_RG11B10F: 154 case AGX_FORMAT_RGB9E5: 155 return 2; 156 157 default: 158 unreachable("invalid format"); 159 } 160} 161 162struct agx_attribute { 163 uint32_t divisor; 164 165 unsigned buf : 5; 166 unsigned src_offset : 16; 167 unsigned nr_comps_minus_1 : 2; 168 enum agx_format format : 4; 169 unsigned padding : 5; 170}; 171 172struct agx_vs_shader_key { 173 unsigned num_vbufs; 174 unsigned vbuf_strides[AGX_MAX_VBUFS]; 175 176 struct agx_attribute attributes[AGX_MAX_ATTRIBS]; 177 178 /* Set to true for clip coordinates to range [0, 1] instead of [-1, 1] */ 179 bool clip_halfz : 1; 180}; 181 182struct agx_fs_shader_key { 183 enum agx_format tib_formats[AGX_MAX_RTS]; 184}; 185 186struct agx_shader_key { 187 union { 188 struct agx_vs_shader_key vs; 189 struct agx_fs_shader_key fs; 190 }; 191}; 192 193void 194agx_compile_shader_nir(nir_shader *nir, 195 struct agx_shader_key *key, 196 struct util_dynarray *binary, 197 struct agx_shader_info *out); 198 199static const nir_shader_compiler_options agx_nir_options = { 200 .lower_fdiv = true, 201 .fuse_ffma16 = true, 202 .fuse_ffma32 = true, 203 .lower_flrp16 = true, 204 .lower_flrp32 = true, 205 .lower_fpow = true, 206 .lower_fmod = true, 207 .lower_ifind_msb = true, 208 .lower_find_lsb = true, 209 .lower_scmp = true, 210 .lower_isign = true, 211 .lower_fsign = true, 212 .lower_iabs = true, 213 .lower_fdph = true, 214 .lower_ffract = true, 215 .lower_pack_split = true, 216 .lower_insert_byte = true, 217 .lower_insert_word = true, 218 .lower_cs_local_index_to_id = true, 219 .has_cs_global_id = true, 220 .lower_wpos_pntc = true, 221 .vectorize_io = true, 222 .use_interpolated_input_intrinsics = true, 223 .lower_rotate = true, 224 .has_fsub = true, 225 .has_isub = true, 226 .max_unroll_iterations = 32, 227 .lower_uniforms_to_ubo = true, 228 .force_indirect_unrolling_sampler = true, 229 .force_indirect_unrolling = (nir_var_shader_in | nir_var_shader_out | nir_var_function_temp), 230 .lower_int64_options = (nir_lower_int64_options) ~(nir_lower_iadd64 | nir_lower_imul_2x32_64), 231 .lower_doubles_options = nir_lower_dmod, 232}; 233 234#endif 235