1/* 2 * Copyright (C) 2018 Alyssa Rosenzweig 3 * Copyright (C) 2019-2021 Collabora, Ltd. 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice (including the next 13 * paragraph) shall be included in all copies or substantial portions of the 14 * Software. 15 * 16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 * SOFTWARE. 23 */ 24 25#include "pan_device.h" 26#include "pan_shader.h" 27#include "pan_format.h" 28 29#if PAN_ARCH <= 5 30#include "panfrost/midgard/midgard_compile.h" 31#else 32#include "panfrost/bifrost/bifrost_compile.h" 33#endif 34 35const nir_shader_compiler_options * 36GENX(pan_shader_get_compiler_options)(void) 37{ 38#if PAN_ARCH >= 6 39 return &bifrost_nir_options; 40#else 41 return &midgard_nir_options; 42#endif 43} 44 45#if PAN_ARCH <= 7 46static enum pipe_format 47varying_format(nir_alu_type t, unsigned ncomps) 48{ 49#define VARYING_FORMAT(ntype, nsz, ptype, psz) \ 50 { \ 51 .type = nir_type_ ## ntype ## nsz, \ 52 .formats = { \ 53 PIPE_FORMAT_R ## psz ## _ ## ptype, \ 54 PIPE_FORMAT_R ## psz ## G ## psz ## _ ## ptype, \ 55 PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## _ ## ptype, \ 56 PIPE_FORMAT_R ## psz ## G ## psz ## B ## psz ## A ## psz ## _ ## ptype, \ 57 } \ 58 } 59 60 static const struct { 61 nir_alu_type type; 62 enum pipe_format formats[4]; 63 } conv[] = { 64 VARYING_FORMAT(float, 32, FLOAT, 32), 65 VARYING_FORMAT(int, 32, SINT, 32), 66 VARYING_FORMAT(uint, 32, UINT, 32), 67 VARYING_FORMAT(float, 16, FLOAT, 16), 68 VARYING_FORMAT(int, 16, SINT, 16), 69 VARYING_FORMAT(uint, 16, UINT, 16), 70 VARYING_FORMAT(int, 8, SINT, 8), 71 VARYING_FORMAT(uint, 8, UINT, 8), 72 VARYING_FORMAT(bool, 32, UINT, 32), 73 VARYING_FORMAT(bool, 16, UINT, 16), 74 VARYING_FORMAT(bool, 8, UINT, 8), 75 VARYING_FORMAT(bool, 1, UINT, 8), 76 }; 77#undef VARYING_FORMAT 78 79 assert(ncomps > 0 && ncomps <= ARRAY_SIZE(conv[0].formats)); 80 81 for (unsigned i = 0; i < ARRAY_SIZE(conv); i++) { 82 if (conv[i].type == t) 83 return conv[i].formats[ncomps - 1]; 84 } 85 86 return PIPE_FORMAT_NONE; 87} 88 89static void 90collect_varyings(nir_shader *s, nir_variable_mode varying_mode, 91 struct pan_shader_varying *varyings, 92 unsigned *varying_count) 93{ 94 *varying_count = 0; 95 96 unsigned comps[PAN_MAX_VARYINGS] = { 0 }; 97 98 nir_foreach_variable_with_modes(var, s, varying_mode) { 99 unsigned loc = var->data.driver_location; 100 const struct glsl_type *column = 101 glsl_without_array_or_matrix(var->type); 102 unsigned chan = glsl_get_components(column); 103 104 /* If we have a fractional location added, we need to increase the size 105 * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4. 106 * We could do better but this is an edge case as it is, normally 107 * packed varyings will be aligned. 108 */ 109 chan += var->data.location_frac; 110 comps[loc] = MAX2(comps[loc], chan); 111 } 112 113 nir_foreach_variable_with_modes(var, s, varying_mode) { 114 unsigned loc = var->data.driver_location; 115 unsigned sz = glsl_count_attribute_slots(var->type, FALSE); 116 const struct glsl_type *column = 117 glsl_without_array_or_matrix(var->type); 118 enum glsl_base_type base_type = glsl_get_base_type(column); 119 unsigned chan = comps[loc]; 120 121 nir_alu_type type = nir_get_nir_type_for_glsl_base_type(base_type); 122 type = nir_alu_type_get_base_type(type); 123 124 /* Can't do type conversion since GLSL IR packs in funny ways */ 125 if (PAN_ARCH >= 6 && var->data.interpolation == INTERP_MODE_FLAT) 126 type = nir_type_uint; 127 128 /* Point size is handled specially on Valhall (with malloc 129 * IDVS).. probably though this entire linker should be bypassed 130 * for Valhall in the future. 131 */ 132 if (PAN_ARCH >= 9 && var->data.location == VARYING_SLOT_PSIZ) 133 continue; 134 135 /* Demote to fp16 where possible. int16 varyings are TODO as the hw 136 * will saturate instead of wrap which is not conformant, so we need to 137 * insert i2i16/u2u16 instructions before the st_vary_32i/32u to get 138 * the intended behaviour. 139 */ 140 if (type == nir_type_float && 141 (var->data.precision == GLSL_PRECISION_MEDIUM || 142 var->data.precision == GLSL_PRECISION_LOW) && 143 !s->info.has_transform_feedback_varyings) { 144 type |= 16; 145 } else { 146 type |= 32; 147 } 148 149 enum pipe_format format = varying_format(type, chan); 150 assert(format != PIPE_FORMAT_NONE); 151 152 for (int c = 0; c < sz; ++c) { 153 assert(loc + c < PAN_MAX_VARYINGS); 154 varyings[loc + c].location = var->data.location + c; 155 varyings[loc + c].format = format; 156 } 157 158 *varying_count = MAX2(*varying_count, loc + sz); 159 } 160} 161#endif 162 163#if PAN_ARCH >= 6 164static enum mali_register_file_format 165bifrost_blend_type_from_nir(nir_alu_type nir_type) 166{ 167 switch(nir_type) { 168 case 0: /* Render target not in use */ 169 return 0; 170 case nir_type_float16: 171 return MALI_REGISTER_FILE_FORMAT_F16; 172 case nir_type_float32: 173 return MALI_REGISTER_FILE_FORMAT_F32; 174 case nir_type_int32: 175 return MALI_REGISTER_FILE_FORMAT_I32; 176 case nir_type_uint32: 177 return MALI_REGISTER_FILE_FORMAT_U32; 178 case nir_type_int16: 179 return MALI_REGISTER_FILE_FORMAT_I16; 180 case nir_type_uint16: 181 return MALI_REGISTER_FILE_FORMAT_U16; 182 default: 183 unreachable("Unsupported blend shader type for NIR alu type"); 184 return 0; 185 } 186} 187#endif 188 189void 190GENX(pan_shader_compile)(nir_shader *s, 191 struct panfrost_compile_inputs *inputs, 192 struct util_dynarray *binary, 193 struct pan_shader_info *info) 194{ 195 memset(info, 0, sizeof(*info)); 196 197#if PAN_ARCH >= 6 198 bifrost_compile_shader_nir(s, inputs, binary, info); 199#else 200 for (unsigned i = 0; i < ARRAY_SIZE(inputs->rt_formats); i++) { 201 enum pipe_format fmt = inputs->rt_formats[i]; 202 unsigned wb_fmt = panfrost_blendable_formats_v6[fmt].writeback; 203 204 if (wb_fmt < MALI_COLOR_FORMAT_R8) 205 inputs->raw_fmt_mask |= BITFIELD_BIT(i); 206 } 207 208 midgard_compile_shader_nir(s, inputs, binary, info); 209#endif 210 211 info->stage = s->info.stage; 212 info->contains_barrier = s->info.uses_memory_barrier || 213 s->info.uses_control_barrier; 214 info->separable = s->info.separate_shader; 215 216 switch (info->stage) { 217 case MESA_SHADER_VERTEX: 218 info->attributes_read = s->info.inputs_read; 219 info->attributes_read_count = util_bitcount64(info->attributes_read); 220 info->attribute_count = info->attributes_read_count; 221 222#if PAN_ARCH <= 5 223 bool vertex_id = BITSET_TEST(s->info.system_values_read, 224 SYSTEM_VALUE_VERTEX_ID_ZERO_BASE); 225 if (vertex_id) 226 info->attribute_count = MAX2(info->attribute_count, PAN_VERTEX_ID + 1); 227 228 bool instance_id = BITSET_TEST(s->info.system_values_read, 229 SYSTEM_VALUE_INSTANCE_ID); 230 if (instance_id) 231 info->attribute_count = MAX2(info->attribute_count, PAN_INSTANCE_ID + 1); 232#endif 233 234 info->vs.writes_point_size = 235 s->info.outputs_written & (1 << VARYING_SLOT_PSIZ); 236 237#if PAN_ARCH >= 9 238 info->varyings.output_count = 239 util_last_bit(s->info.outputs_written >> VARYING_SLOT_VAR0); 240#else 241 collect_varyings(s, nir_var_shader_out, info->varyings.output, 242 &info->varyings.output_count); 243#endif 244 break; 245 case MESA_SHADER_FRAGMENT: 246 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_DEPTH)) 247 info->fs.writes_depth = true; 248 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_STENCIL)) 249 info->fs.writes_stencil = true; 250 if (s->info.outputs_written & BITFIELD64_BIT(FRAG_RESULT_SAMPLE_MASK)) 251 info->fs.writes_coverage = true; 252 253 info->fs.outputs_read = s->info.outputs_read >> FRAG_RESULT_DATA0; 254 info->fs.outputs_written = s->info.outputs_written >> FRAG_RESULT_DATA0; 255 info->fs.sample_shading = s->info.fs.uses_sample_shading; 256 257 info->fs.can_discard = s->info.fs.uses_discard; 258 info->fs.early_fragment_tests = s->info.fs.early_fragment_tests; 259 260 /* List of reasons we need to execute frag shaders when things 261 * are masked off */ 262 263 info->fs.sidefx = s->info.writes_memory || 264 s->info.fs.uses_discard || 265 s->info.fs.uses_demote; 266 267 /* With suitable ZSA/blend, is early-z possible? */ 268 info->fs.can_early_z = 269 !info->fs.sidefx && 270 !info->fs.writes_depth && 271 !info->fs.writes_stencil && 272 !info->fs.writes_coverage; 273 274 /* Similiarly with suitable state, is FPK possible? */ 275 info->fs.can_fpk = 276 !info->fs.writes_depth && 277 !info->fs.writes_stencil && 278 !info->fs.writes_coverage && 279 !info->fs.can_discard && 280 !info->fs.outputs_read; 281 282 /* Requires the same hardware guarantees, so grouped as one bit 283 * in the hardware. 284 */ 285 info->contains_barrier |= s->info.fs.needs_quad_helper_invocations; 286 287 info->fs.reads_frag_coord = 288 (s->info.inputs_read & (1 << VARYING_SLOT_POS)) || 289 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRAG_COORD); 290 info->fs.reads_point_coord = 291 s->info.inputs_read & (1 << VARYING_SLOT_PNTC); 292 info->fs.reads_face = 293 (s->info.inputs_read & (1 << VARYING_SLOT_FACE)) || 294 BITSET_TEST(s->info.system_values_read, SYSTEM_VALUE_FRONT_FACE); 295#if PAN_ARCH >= 9 296 info->varyings.output_count = 297 util_last_bit(s->info.outputs_read >> VARYING_SLOT_VAR0); 298#else 299 collect_varyings(s, nir_var_shader_in, info->varyings.input, 300 &info->varyings.input_count); 301#endif 302 break; 303 case MESA_SHADER_COMPUTE: 304 info->wls_size = s->info.shared_size; 305 break; 306 default: 307 unreachable("Unknown shader state"); 308 } 309 310 info->outputs_written = s->info.outputs_written; 311 312 /* Sysvals have dedicated UBO */ 313 info->ubo_count = s->info.num_ubos; 314 if (info->sysvals.sysval_count && inputs->fixed_sysval_ubo < 0) 315 info->ubo_count++; 316 317 info->attribute_count += BITSET_LAST_BIT(s->info.images_used); 318 info->writes_global = s->info.writes_memory; 319 320 info->sampler_count = info->texture_count = BITSET_LAST_BIT(s->info.textures_used); 321 322#if PAN_ARCH >= 6 323 /* This is "redundant" information, but is needed in a draw-time hot path */ 324 for (unsigned i = 0; i < ARRAY_SIZE(info->bifrost.blend); ++i) { 325 info->bifrost.blend[i].format = 326 bifrost_blend_type_from_nir(info->bifrost.blend[i].type); 327 } 328#endif 329} 330