1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Alyssa Rosenzweig <alyssa@rosenzweig.io> 3bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd. 4bf215546Sopenharmony_ci * Copyright © 2016 Broadcom 5bf215546Sopenharmony_ci * 6bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 7bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 8bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 9bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 10bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 11bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 12bf215546Sopenharmony_ci * 13bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 14bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 15bf215546Sopenharmony_ci * Software. 16bf215546Sopenharmony_ci * 17bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 23bf215546Sopenharmony_ci * SOFTWARE. 24bf215546Sopenharmony_ci */ 25bf215546Sopenharmony_ci 26bf215546Sopenharmony_ci#include "main/glheader.h" 27bf215546Sopenharmony_ci#include "compiler/nir_types.h" 28bf215546Sopenharmony_ci#include "compiler/nir/nir_builder.h" 29bf215546Sopenharmony_ci#include "util/u_debug.h" 30bf215546Sopenharmony_ci#include "util/fast_idiv_by_const.h" 31bf215546Sopenharmony_ci#include "agx_compile.h" 32bf215546Sopenharmony_ci#include "agx_compiler.h" 33bf215546Sopenharmony_ci#include "agx_builder.h" 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cistatic const struct debug_named_value agx_debug_options[] = { 36bf215546Sopenharmony_ci {"msgs", AGX_DBG_MSGS, "Print debug messages"}, 37bf215546Sopenharmony_ci {"shaders", AGX_DBG_SHADERS, "Dump shaders in NIR and AIR"}, 38bf215546Sopenharmony_ci {"shaderdb", AGX_DBG_SHADERDB, "Print statistics"}, 39bf215546Sopenharmony_ci {"verbose", AGX_DBG_VERBOSE, "Disassemble verbosely"}, 40bf215546Sopenharmony_ci {"internal", AGX_DBG_INTERNAL, "Dump even internal shaders"}, 41bf215546Sopenharmony_ci {"novalidate",AGX_DBG_NOVALIDATE,"Skip IR validation in debug builds"}, 42bf215546Sopenharmony_ci DEBUG_NAMED_VALUE_END 43bf215546Sopenharmony_ci}; 44bf215546Sopenharmony_ci 45bf215546Sopenharmony_ciDEBUG_GET_ONCE_FLAGS_OPTION(agx_debug, "AGX_MESA_DEBUG", agx_debug_options, 0) 46bf215546Sopenharmony_ci 47bf215546Sopenharmony_ciint agx_debug = 0; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci#define DBG(fmt, ...) \ 50bf215546Sopenharmony_ci do { if (agx_debug & AGX_DBG_MSGS) \ 51bf215546Sopenharmony_ci fprintf(stderr, "%s:%d: "fmt, \ 52bf215546Sopenharmony_ci __FUNCTION__, __LINE__, ##__VA_ARGS__); } while (0) 53bf215546Sopenharmony_ci 54bf215546Sopenharmony_ci/* Builds a 64-bit hash table key for an index */ 55bf215546Sopenharmony_cistatic uint64_t 56bf215546Sopenharmony_ciagx_index_to_key(agx_index idx) 57bf215546Sopenharmony_ci{ 58bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(idx) <= sizeof(uint64_t)); 59bf215546Sopenharmony_ci 60bf215546Sopenharmony_ci uint64_t key = 0; 61bf215546Sopenharmony_ci memcpy(&key, &idx, sizeof(idx)); 62bf215546Sopenharmony_ci return key; 63bf215546Sopenharmony_ci} 64bf215546Sopenharmony_ci 65bf215546Sopenharmony_ci/* 66bf215546Sopenharmony_ci * Extract a single channel out of a vector source. We split vectors with 67bf215546Sopenharmony_ci * p_split so we can use the split components directly, without emitting a 68bf215546Sopenharmony_ci * machine instruction. This has advantages of RA, as the split can usually be 69bf215546Sopenharmony_ci * optimized away. 70bf215546Sopenharmony_ci */ 71bf215546Sopenharmony_cistatic agx_index 72bf215546Sopenharmony_ciagx_emit_extract(agx_builder *b, agx_index vec, unsigned channel) 73bf215546Sopenharmony_ci{ 74bf215546Sopenharmony_ci agx_index *components = _mesa_hash_table_u64_search(b->shader->allocated_vec, 75bf215546Sopenharmony_ci agx_index_to_key(vec)); 76bf215546Sopenharmony_ci 77bf215546Sopenharmony_ci assert(components != NULL && "missing agx_emit_combine_to"); 78bf215546Sopenharmony_ci 79bf215546Sopenharmony_ci return components[channel]; 80bf215546Sopenharmony_ci} 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cistatic void 83bf215546Sopenharmony_ciagx_cache_combine(agx_builder *b, agx_index dst, 84bf215546Sopenharmony_ci agx_index s0, agx_index s1, agx_index s2, agx_index s3) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci /* Lifetime of a hash table entry has to be at least as long as the table */ 87bf215546Sopenharmony_ci agx_index *channels = ralloc_array(b->shader, agx_index, 4); 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci channels[0] = s0; 90bf215546Sopenharmony_ci channels[1] = s1; 91bf215546Sopenharmony_ci channels[2] = s2; 92bf215546Sopenharmony_ci channels[3] = s3; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ci _mesa_hash_table_u64_insert(b->shader->allocated_vec, agx_index_to_key(dst), 95bf215546Sopenharmony_ci channels); 96bf215546Sopenharmony_ci} 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci/* 99bf215546Sopenharmony_ci * Combine multiple scalars into a vector destination. This corresponds to 100bf215546Sopenharmony_ci * p_combine, lowered to moves (a shuffle in general) after register allocation. 101bf215546Sopenharmony_ci * 102bf215546Sopenharmony_ci * To optimize vector extractions, we record the individual channels 103bf215546Sopenharmony_ci */ 104bf215546Sopenharmony_cistatic agx_instr * 105bf215546Sopenharmony_ciagx_emit_combine_to(agx_builder *b, agx_index dst, 106bf215546Sopenharmony_ci agx_index s0, agx_index s1, agx_index s2, agx_index s3) 107bf215546Sopenharmony_ci{ 108bf215546Sopenharmony_ci agx_cache_combine(b, dst, s0, s1, s2, s3); 109bf215546Sopenharmony_ci return agx_p_combine_to(b, dst, s0, s1, s2, s3); 110bf215546Sopenharmony_ci} 111bf215546Sopenharmony_ci 112bf215546Sopenharmony_cistatic void 113bf215546Sopenharmony_ciagx_block_add_successor(agx_block *block, agx_block *successor) 114bf215546Sopenharmony_ci{ 115bf215546Sopenharmony_ci assert(block != NULL && successor != NULL); 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci /* Cull impossible edges */ 118bf215546Sopenharmony_ci if (block->unconditional_jumps) 119bf215546Sopenharmony_ci return; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) { 122bf215546Sopenharmony_ci if (block->successors[i]) { 123bf215546Sopenharmony_ci if (block->successors[i] == successor) 124bf215546Sopenharmony_ci return; 125bf215546Sopenharmony_ci else 126bf215546Sopenharmony_ci continue; 127bf215546Sopenharmony_ci } 128bf215546Sopenharmony_ci 129bf215546Sopenharmony_ci block->successors[i] = successor; 130bf215546Sopenharmony_ci util_dynarray_append(&successor->predecessors, agx_block *, block); 131bf215546Sopenharmony_ci return; 132bf215546Sopenharmony_ci } 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci unreachable("Too many successors"); 135bf215546Sopenharmony_ci} 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci/* 138bf215546Sopenharmony_ci * Splits an n-component vector (vec) into n scalar destinations (dests) using a 139bf215546Sopenharmony_ci * split pseudo-instruction. 140bf215546Sopenharmony_ci * 141bf215546Sopenharmony_ci * Pre-condition: dests is filled with agx_null(). 142bf215546Sopenharmony_ci */ 143bf215546Sopenharmony_cistatic void 144bf215546Sopenharmony_ciagx_emit_split(agx_builder *b, agx_index *dests, agx_index vec, unsigned n) 145bf215546Sopenharmony_ci{ 146bf215546Sopenharmony_ci /* Setup the destinations */ 147bf215546Sopenharmony_ci for (unsigned i = 0; i < n; ++i) { 148bf215546Sopenharmony_ci dests[i] = agx_temp(b->shader, vec.size); 149bf215546Sopenharmony_ci } 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci /* Emit the split */ 152bf215546Sopenharmony_ci agx_p_split_to(b, dests[0], dests[1], dests[2], dests[3], vec); 153bf215546Sopenharmony_ci} 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_cistatic void 156bf215546Sopenharmony_ciagx_emit_cached_split(agx_builder *b, agx_index vec, unsigned n) 157bf215546Sopenharmony_ci{ 158bf215546Sopenharmony_ci agx_index dests[4] = { agx_null(), agx_null(), agx_null(), agx_null() }; 159bf215546Sopenharmony_ci agx_emit_split(b, dests, vec, n); 160bf215546Sopenharmony_ci agx_cache_combine(b, vec, dests[0], dests[1], dests[2], dests[3]); 161bf215546Sopenharmony_ci} 162bf215546Sopenharmony_ci 163bf215546Sopenharmony_cistatic void 164bf215546Sopenharmony_ciagx_emit_load_const(agx_builder *b, nir_load_const_instr *instr) 165bf215546Sopenharmony_ci{ 166bf215546Sopenharmony_ci /* Ensure we've been scalarized and bit size lowered */ 167bf215546Sopenharmony_ci unsigned bit_size = instr->def.bit_size; 168bf215546Sopenharmony_ci assert(instr->def.num_components == 1); 169bf215546Sopenharmony_ci assert(bit_size == 1 || bit_size == 16 || bit_size == 32); 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_ci /* Emit move, later passes can inline/push if useful */ 172bf215546Sopenharmony_ci agx_mov_imm_to(b, 173bf215546Sopenharmony_ci agx_get_index(instr->def.index, agx_size_for_bits(bit_size)), 174bf215546Sopenharmony_ci nir_const_value_as_uint(instr->value[0], bit_size)); 175bf215546Sopenharmony_ci} 176bf215546Sopenharmony_ci 177bf215546Sopenharmony_ci/* Emit code dividing P by Q */ 178bf215546Sopenharmony_cistatic agx_index 179bf215546Sopenharmony_ciagx_udiv_const(agx_builder *b, agx_index P, uint32_t Q) 180bf215546Sopenharmony_ci{ 181bf215546Sopenharmony_ci /* P / 1 = P */ 182bf215546Sopenharmony_ci if (Q == 1) { 183bf215546Sopenharmony_ci return P; 184bf215546Sopenharmony_ci } 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_ci /* P / UINT32_MAX = 0, unless P = UINT32_MAX when it's one */ 187bf215546Sopenharmony_ci if (Q == UINT32_MAX) { 188bf215546Sopenharmony_ci agx_index max = agx_mov_imm(b, 32, UINT32_MAX); 189bf215546Sopenharmony_ci agx_index one = agx_mov_imm(b, 32, 1); 190bf215546Sopenharmony_ci return agx_icmpsel(b, P, max, one, agx_zero(), AGX_ICOND_UEQ); 191bf215546Sopenharmony_ci } 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci /* P / 2^N = P >> N */ 194bf215546Sopenharmony_ci if (util_is_power_of_two_or_zero(Q)) { 195bf215546Sopenharmony_ci return agx_ushr(b, P, agx_mov_imm(b, 32, util_logbase2(Q))); 196bf215546Sopenharmony_ci } 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci /* Fall back on multiplication by a magic number */ 199bf215546Sopenharmony_ci struct util_fast_udiv_info info = util_compute_fast_udiv_info(Q, 32, 32); 200bf215546Sopenharmony_ci agx_index preshift = agx_mov_imm(b, 32, info.pre_shift); 201bf215546Sopenharmony_ci agx_index increment = agx_mov_imm(b, 32, info.increment); 202bf215546Sopenharmony_ci agx_index postshift = agx_mov_imm(b, 32, info.post_shift); 203bf215546Sopenharmony_ci agx_index multiplier = agx_mov_imm(b, 32, info.multiplier); 204bf215546Sopenharmony_ci agx_index multiplied = agx_temp(b->shader, AGX_SIZE_64); 205bf215546Sopenharmony_ci agx_index n = P; 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci if (info.pre_shift != 0) n = agx_ushr(b, n, preshift); 208bf215546Sopenharmony_ci if (info.increment != 0) n = agx_iadd(b, n, increment, 0); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* 64-bit multiplication, zero extending 32-bit x 32-bit, get the top word */ 211bf215546Sopenharmony_ci agx_imad_to(b, multiplied, agx_abs(n), agx_abs(multiplier), agx_zero(), 0); 212bf215546Sopenharmony_ci n = agx_temp(b->shader, AGX_SIZE_32); 213bf215546Sopenharmony_ci agx_p_extract_to(b, n, multiplied, 1); 214bf215546Sopenharmony_ci 215bf215546Sopenharmony_ci if (info.post_shift != 0) n = agx_ushr(b, n, postshift); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci return n; 218bf215546Sopenharmony_ci} 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci/* AGX appears to lack support for vertex attributes. Lower to global loads. */ 221bf215546Sopenharmony_cistatic void 222bf215546Sopenharmony_ciagx_emit_load_attr(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) 223bf215546Sopenharmony_ci{ 224bf215546Sopenharmony_ci nir_src *offset_src = nir_get_io_offset_src(instr); 225bf215546Sopenharmony_ci assert(nir_src_is_const(*offset_src) && "no attribute indirects"); 226bf215546Sopenharmony_ci unsigned index = nir_intrinsic_base(instr) + 227bf215546Sopenharmony_ci nir_src_as_uint(*offset_src); 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci struct agx_shader_key *key = b->shader->key; 230bf215546Sopenharmony_ci struct agx_attribute attrib = key->vs.attributes[index]; 231bf215546Sopenharmony_ci 232bf215546Sopenharmony_ci /* address = base + (stride * vertex_id) + src_offset */ 233bf215546Sopenharmony_ci unsigned buf = attrib.buf; 234bf215546Sopenharmony_ci unsigned stride = key->vs.vbuf_strides[buf]; 235bf215546Sopenharmony_ci unsigned shift = agx_format_shift(attrib.format); 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci agx_index shifted_stride = agx_mov_imm(b, 32, stride >> shift); 238bf215546Sopenharmony_ci agx_index src_offset = agx_mov_imm(b, 32, attrib.src_offset); 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci agx_index vertex_id = agx_register(10, AGX_SIZE_32); 241bf215546Sopenharmony_ci agx_index instance_id = agx_register(12, AGX_SIZE_32); 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci /* A nonzero divisor requires dividing the instance ID. A zero divisor 244bf215546Sopenharmony_ci * specifies per-instance data. */ 245bf215546Sopenharmony_ci agx_index element_id = (attrib.divisor == 0) ? vertex_id : 246bf215546Sopenharmony_ci agx_udiv_const(b, instance_id, attrib.divisor); 247bf215546Sopenharmony_ci 248bf215546Sopenharmony_ci agx_index offset = agx_imad(b, element_id, shifted_stride, src_offset, 0); 249bf215546Sopenharmony_ci 250bf215546Sopenharmony_ci /* Each VBO has a 64-bit = 4 x 16-bit address, lookup the base address as a sysval */ 251bf215546Sopenharmony_ci unsigned num_vbos = key->vs.num_vbufs; 252bf215546Sopenharmony_ci unsigned base_length = (num_vbos * 4); 253bf215546Sopenharmony_ci agx_index base = agx_indexed_sysval(b->shader, 254bf215546Sopenharmony_ci AGX_PUSH_VBO_BASES, AGX_SIZE_64, buf * 4, base_length); 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci /* Load the data */ 257bf215546Sopenharmony_ci assert(instr->num_components <= 4); 258bf215546Sopenharmony_ci 259bf215546Sopenharmony_ci unsigned actual_comps = (attrib.nr_comps_minus_1 + 1); 260bf215546Sopenharmony_ci agx_index vec = agx_vec_for_dest(b->shader, &instr->dest); 261bf215546Sopenharmony_ci agx_device_load_to(b, vec, base, offset, attrib.format, 262bf215546Sopenharmony_ci BITFIELD_MASK(attrib.nr_comps_minus_1 + 1), 0); 263bf215546Sopenharmony_ci agx_wait(b, 0); 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci agx_emit_split(b, dests, vec, actual_comps); 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci agx_index one = agx_mov_imm(b, 32, fui(1.0)); 268bf215546Sopenharmony_ci agx_index zero = agx_mov_imm(b, 32, 0); 269bf215546Sopenharmony_ci agx_index default_value[4] = { zero, zero, zero, one }; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci for (unsigned i = actual_comps; i < instr->num_components; ++i) 272bf215546Sopenharmony_ci dests[i] = default_value[i]; 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_cistatic void 276bf215546Sopenharmony_ciagx_emit_load_vary_flat(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) 277bf215546Sopenharmony_ci{ 278bf215546Sopenharmony_ci unsigned components = instr->num_components; 279bf215546Sopenharmony_ci assert(components >= 1 && components <= 4); 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci nir_src *offset = nir_get_io_offset_src(instr); 282bf215546Sopenharmony_ci assert(nir_src_is_const(*offset) && "no indirects"); 283bf215546Sopenharmony_ci unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)]; 284bf215546Sopenharmony_ci imm_index += nir_src_as_uint(*offset); 285bf215546Sopenharmony_ci 286bf215546Sopenharmony_ci assert(nir_dest_bit_size(instr->dest) == 32 && "no 16-bit flat shading"); 287bf215546Sopenharmony_ci 288bf215546Sopenharmony_ci for (unsigned i = 0; i < components; ++i) { 289bf215546Sopenharmony_ci /* vec3 for each vertex, unknown what first 2 channels are for */ 290bf215546Sopenharmony_ci agx_index values = agx_ld_vary_flat(b, agx_immediate(imm_index + i), 1); 291bf215546Sopenharmony_ci dests[i] = agx_p_extract(b, values, 2); 292bf215546Sopenharmony_ci } 293bf215546Sopenharmony_ci} 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_cistatic void 296bf215546Sopenharmony_ciagx_emit_load_vary(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) 297bf215546Sopenharmony_ci{ 298bf215546Sopenharmony_ci ASSERTED unsigned components = instr->num_components; 299bf215546Sopenharmony_ci ASSERTED nir_intrinsic_instr *parent = nir_src_as_intrinsic(instr->src[0]); 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_ci assert(components >= 1 && components <= 4); 302bf215546Sopenharmony_ci assert(parent); 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci /* TODO: Interpolation modes */ 305bf215546Sopenharmony_ci assert(parent->intrinsic == nir_intrinsic_load_barycentric_pixel); 306bf215546Sopenharmony_ci 307bf215546Sopenharmony_ci nir_src *offset = nir_get_io_offset_src(instr); 308bf215546Sopenharmony_ci assert(nir_src_is_const(*offset) && "no indirects"); 309bf215546Sopenharmony_ci unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)]; 310bf215546Sopenharmony_ci imm_index += nir_src_as_uint(*offset) * 4; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci agx_index vec = agx_vec_for_intr(b->shader, instr); 313bf215546Sopenharmony_ci agx_ld_vary_to(b, vec, agx_immediate(imm_index), components, true); 314bf215546Sopenharmony_ci agx_emit_split(b, dests, vec, components); 315bf215546Sopenharmony_ci} 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_cistatic agx_instr * 318bf215546Sopenharmony_ciagx_emit_store_vary(agx_builder *b, nir_intrinsic_instr *instr) 319bf215546Sopenharmony_ci{ 320bf215546Sopenharmony_ci nir_src *offset = nir_get_io_offset_src(instr); 321bf215546Sopenharmony_ci assert(nir_src_is_const(*offset) && "todo: indirects"); 322bf215546Sopenharmony_ci unsigned imm_index = b->shader->varyings[nir_intrinsic_base(instr)]; 323bf215546Sopenharmony_ci imm_index += nir_intrinsic_component(instr); 324bf215546Sopenharmony_ci imm_index += nir_src_as_uint(*offset); 325bf215546Sopenharmony_ci 326bf215546Sopenharmony_ci /* nir_lower_io_to_scalar */ 327bf215546Sopenharmony_ci assert(nir_intrinsic_write_mask(instr) == 0x1); 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci return agx_st_vary(b, 330bf215546Sopenharmony_ci agx_immediate(imm_index), 331bf215546Sopenharmony_ci agx_src_index(&instr->src[0])); 332bf215546Sopenharmony_ci} 333bf215546Sopenharmony_ci 334bf215546Sopenharmony_cistatic agx_instr * 335bf215546Sopenharmony_ciagx_emit_fragment_out(agx_builder *b, nir_intrinsic_instr *instr) 336bf215546Sopenharmony_ci{ 337bf215546Sopenharmony_ci const nir_variable *var = 338bf215546Sopenharmony_ci nir_find_variable_with_driver_location(b->shader->nir, 339bf215546Sopenharmony_ci nir_var_shader_out, nir_intrinsic_base(instr)); 340bf215546Sopenharmony_ci assert(var); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci unsigned loc = var->data.location; 343bf215546Sopenharmony_ci assert(var->data.index == 0 && "todo: dual-source blending"); 344bf215546Sopenharmony_ci assert(loc == FRAG_RESULT_DATA0 && "todo: MRT"); 345bf215546Sopenharmony_ci unsigned rt = (loc - FRAG_RESULT_DATA0); 346bf215546Sopenharmony_ci 347bf215546Sopenharmony_ci /* TODO: Reverse-engineer interactions with MRT */ 348bf215546Sopenharmony_ci if (b->shader->nir->info.internal) { 349bf215546Sopenharmony_ci /* clear */ 350bf215546Sopenharmony_ci } else if (b->shader->did_writeout) { 351bf215546Sopenharmony_ci agx_writeout(b, 0x0004); 352bf215546Sopenharmony_ci } else { 353bf215546Sopenharmony_ci agx_writeout(b, 0xC200); 354bf215546Sopenharmony_ci agx_writeout(b, 0x000C); 355bf215546Sopenharmony_ci } 356bf215546Sopenharmony_ci 357bf215546Sopenharmony_ci if (b->shader->nir->info.fs.uses_discard) { 358bf215546Sopenharmony_ci /* If the shader uses discard, the sample mask must be written by the 359bf215546Sopenharmony_ci * shader on all exeuction paths. If we've reached the end of the shader, 360bf215546Sopenharmony_ci * we are therefore still active and need to write a full sample mask. 361bf215546Sopenharmony_ci * TODO: interactions with MSAA and gl_SampleMask writes 362bf215546Sopenharmony_ci */ 363bf215546Sopenharmony_ci agx_sample_mask(b, agx_immediate(1)); 364bf215546Sopenharmony_ci } 365bf215546Sopenharmony_ci 366bf215546Sopenharmony_ci b->shader->did_writeout = true; 367bf215546Sopenharmony_ci return agx_st_tile(b, agx_src_index(&instr->src[0]), 368bf215546Sopenharmony_ci b->shader->key->fs.tib_formats[rt]); 369bf215546Sopenharmony_ci} 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_cistatic void 372bf215546Sopenharmony_ciagx_emit_load_tile(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) 373bf215546Sopenharmony_ci{ 374bf215546Sopenharmony_ci const nir_variable *var = 375bf215546Sopenharmony_ci nir_find_variable_with_driver_location(b->shader->nir, 376bf215546Sopenharmony_ci nir_var_shader_out, nir_intrinsic_base(instr)); 377bf215546Sopenharmony_ci assert(var); 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci unsigned loc = var->data.location; 380bf215546Sopenharmony_ci assert(var->data.index == 0 && "todo: dual-source blending"); 381bf215546Sopenharmony_ci assert(loc == FRAG_RESULT_DATA0 && "todo: MRT"); 382bf215546Sopenharmony_ci unsigned rt = (loc - FRAG_RESULT_DATA0); 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci /* TODO: Reverse-engineer interactions with MRT */ 385bf215546Sopenharmony_ci agx_writeout(b, 0xC200); 386bf215546Sopenharmony_ci agx_writeout(b, 0x0008); 387bf215546Sopenharmony_ci b->shader->did_writeout = true; 388bf215546Sopenharmony_ci b->shader->out->reads_tib = true; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci agx_index vec = agx_vec_for_dest(b->shader, &instr->dest); 391bf215546Sopenharmony_ci agx_ld_tile_to(b, vec, b->shader->key->fs.tib_formats[rt]); 392bf215546Sopenharmony_ci agx_emit_split(b, dests, vec, 4); 393bf215546Sopenharmony_ci} 394bf215546Sopenharmony_ci 395bf215546Sopenharmony_cistatic enum agx_format 396bf215546Sopenharmony_ciagx_format_for_bits(unsigned bits) 397bf215546Sopenharmony_ci{ 398bf215546Sopenharmony_ci switch (bits) { 399bf215546Sopenharmony_ci case 8: return AGX_FORMAT_I8; 400bf215546Sopenharmony_ci case 16: return AGX_FORMAT_I16; 401bf215546Sopenharmony_ci case 32: return AGX_FORMAT_I32; 402bf215546Sopenharmony_ci default: unreachable("Invalid bit size for load/store"); 403bf215546Sopenharmony_ci } 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cistatic agx_instr * 407bf215546Sopenharmony_ciagx_emit_load_ubo(agx_builder *b, agx_index dst, nir_intrinsic_instr *instr) 408bf215546Sopenharmony_ci{ 409bf215546Sopenharmony_ci bool kernel_input = (instr->intrinsic == nir_intrinsic_load_kernel_input); 410bf215546Sopenharmony_ci nir_src *offset = nir_get_io_offset_src(instr); 411bf215546Sopenharmony_ci 412bf215546Sopenharmony_ci if (!kernel_input && !nir_src_is_const(instr->src[0])) 413bf215546Sopenharmony_ci unreachable("todo: indirect UBO access"); 414bf215546Sopenharmony_ci 415bf215546Sopenharmony_ci /* UBO blocks are specified (kernel inputs are always 0) */ 416bf215546Sopenharmony_ci uint32_t block = kernel_input ? 0 : nir_src_as_uint(instr->src[0]); 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci /* Each UBO has a 64-bit = 4 x 16-bit address */ 419bf215546Sopenharmony_ci unsigned num_ubos = b->shader->nir->info.num_ubos; 420bf215546Sopenharmony_ci unsigned base_length = (num_ubos * 4); 421bf215546Sopenharmony_ci unsigned index = block * 4; /* 16 bit units */ 422bf215546Sopenharmony_ci 423bf215546Sopenharmony_ci /* Lookup the base address (TODO: indirection) */ 424bf215546Sopenharmony_ci agx_index base = agx_indexed_sysval(b->shader, 425bf215546Sopenharmony_ci AGX_PUSH_UBO_BASES, AGX_SIZE_64, 426bf215546Sopenharmony_ci index, base_length); 427bf215546Sopenharmony_ci 428bf215546Sopenharmony_ci /* Load the data */ 429bf215546Sopenharmony_ci assert(instr->num_components <= 4); 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci agx_device_load_to(b, dst, base, agx_src_index(offset), 432bf215546Sopenharmony_ci agx_format_for_bits(nir_dest_bit_size(instr->dest)), 433bf215546Sopenharmony_ci BITFIELD_MASK(instr->num_components), 0); 434bf215546Sopenharmony_ci agx_wait(b, 0); 435bf215546Sopenharmony_ci agx_emit_cached_split(b, dst, instr->num_components); 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci return NULL; 438bf215546Sopenharmony_ci} 439bf215546Sopenharmony_ci 440bf215546Sopenharmony_cistatic void 441bf215546Sopenharmony_ciagx_emit_load_frag_coord(agx_builder *b, agx_index *dests, nir_intrinsic_instr *instr) 442bf215546Sopenharmony_ci{ 443bf215546Sopenharmony_ci /* xy */ 444bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; ++i) { 445bf215546Sopenharmony_ci dests[i] = agx_fadd(b, agx_convert(b, agx_immediate(AGX_CONVERT_U32_TO_F), 446bf215546Sopenharmony_ci agx_get_sr(b, 32, AGX_SR_THREAD_POSITION_IN_GRID_X + i), 447bf215546Sopenharmony_ci AGX_ROUND_RTE), agx_immediate_f(0.5f)); 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci dests[2] = agx_ld_vary(b, agx_immediate(1), 1, false); /* z */ 451bf215546Sopenharmony_ci dests[3] = agx_ld_vary(b, agx_immediate(0), 1, false); /* w */ 452bf215546Sopenharmony_ci} 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_cistatic agx_instr * 455bf215546Sopenharmony_ciagx_blend_const(agx_builder *b, agx_index dst, unsigned comp) 456bf215546Sopenharmony_ci{ 457bf215546Sopenharmony_ci agx_index val = agx_indexed_sysval(b->shader, 458bf215546Sopenharmony_ci AGX_PUSH_BLEND_CONST, AGX_SIZE_32, comp * 2, 4 * 2); 459bf215546Sopenharmony_ci 460bf215546Sopenharmony_ci return agx_mov_to(b, dst, val); 461bf215546Sopenharmony_ci} 462bf215546Sopenharmony_ci 463bf215546Sopenharmony_ci/* 464bf215546Sopenharmony_ci * Demoting a helper invocation is logically equivalent to zeroing the sample 465bf215546Sopenharmony_ci * mask. Metal implement discard as such. 466bf215546Sopenharmony_ci * 467bf215546Sopenharmony_ci * XXX: Actually, Metal's "discard" is a demote, and what is implemented here 468bf215546Sopenharmony_ci * is a demote. There might be a better way to implement this to get correct 469bf215546Sopenharmony_ci * helper invocation semantics. For now, I'm kicking the can down the road. 470bf215546Sopenharmony_ci */ 471bf215546Sopenharmony_cistatic agx_instr * 472bf215546Sopenharmony_ciagx_emit_discard(agx_builder *b, nir_intrinsic_instr *instr) 473bf215546Sopenharmony_ci{ 474bf215546Sopenharmony_ci agx_writeout(b, 0xC200); 475bf215546Sopenharmony_ci agx_writeout(b, 0x0001); 476bf215546Sopenharmony_ci b->shader->did_writeout = true; 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci b->shader->out->writes_sample_mask = true; 479bf215546Sopenharmony_ci return agx_sample_mask(b, agx_immediate(0)); 480bf215546Sopenharmony_ci} 481bf215546Sopenharmony_ci 482bf215546Sopenharmony_cistatic agx_instr * 483bf215546Sopenharmony_ciagx_emit_intrinsic(agx_builder *b, nir_intrinsic_instr *instr) 484bf215546Sopenharmony_ci{ 485bf215546Sopenharmony_ci agx_index dst = nir_intrinsic_infos[instr->intrinsic].has_dest ? 486bf215546Sopenharmony_ci agx_dest_index(&instr->dest) : agx_null(); 487bf215546Sopenharmony_ci gl_shader_stage stage = b->shader->stage; 488bf215546Sopenharmony_ci agx_index dests[4] = { agx_null() }; 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci switch (instr->intrinsic) { 491bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_pixel: 492bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_centroid: 493bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_sample: 494bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_sample: 495bf215546Sopenharmony_ci case nir_intrinsic_load_barycentric_at_offset: 496bf215546Sopenharmony_ci /* handled later via load_vary */ 497bf215546Sopenharmony_ci return NULL; 498bf215546Sopenharmony_ci case nir_intrinsic_load_interpolated_input: 499bf215546Sopenharmony_ci assert(stage == MESA_SHADER_FRAGMENT); 500bf215546Sopenharmony_ci agx_emit_load_vary(b, dests, instr); 501bf215546Sopenharmony_ci break; 502bf215546Sopenharmony_ci 503bf215546Sopenharmony_ci case nir_intrinsic_load_input: 504bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) 505bf215546Sopenharmony_ci agx_emit_load_vary_flat(b, dests, instr); 506bf215546Sopenharmony_ci else if (stage == MESA_SHADER_VERTEX) 507bf215546Sopenharmony_ci agx_emit_load_attr(b, dests, instr); 508bf215546Sopenharmony_ci else 509bf215546Sopenharmony_ci unreachable("Unsupported shader stage"); 510bf215546Sopenharmony_ci 511bf215546Sopenharmony_ci break; 512bf215546Sopenharmony_ci 513bf215546Sopenharmony_ci case nir_intrinsic_store_output: 514bf215546Sopenharmony_ci if (stage == MESA_SHADER_FRAGMENT) 515bf215546Sopenharmony_ci return agx_emit_fragment_out(b, instr); 516bf215546Sopenharmony_ci else if (stage == MESA_SHADER_VERTEX) 517bf215546Sopenharmony_ci return agx_emit_store_vary(b, instr); 518bf215546Sopenharmony_ci else 519bf215546Sopenharmony_ci unreachable("Unsupported shader stage"); 520bf215546Sopenharmony_ci 521bf215546Sopenharmony_ci case nir_intrinsic_load_output: 522bf215546Sopenharmony_ci assert(stage == MESA_SHADER_FRAGMENT); 523bf215546Sopenharmony_ci agx_emit_load_tile(b, dests, instr); 524bf215546Sopenharmony_ci break; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci case nir_intrinsic_load_ubo: 527bf215546Sopenharmony_ci case nir_intrinsic_load_kernel_input: 528bf215546Sopenharmony_ci return agx_emit_load_ubo(b, dst, instr); 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci case nir_intrinsic_load_frag_coord: 531bf215546Sopenharmony_ci agx_emit_load_frag_coord(b, dests, instr); 532bf215546Sopenharmony_ci break; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci case nir_intrinsic_discard: 535bf215546Sopenharmony_ci return agx_emit_discard(b, instr); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci case nir_intrinsic_load_back_face_agx: 538bf215546Sopenharmony_ci return agx_get_sr_to(b, dst, AGX_SR_BACKFACING); 539bf215546Sopenharmony_ci 540bf215546Sopenharmony_ci case nir_intrinsic_load_vertex_id: 541bf215546Sopenharmony_ci return agx_mov_to(b, dst, agx_abs(agx_register(10, AGX_SIZE_32))); 542bf215546Sopenharmony_ci 543bf215546Sopenharmony_ci case nir_intrinsic_load_instance_id: 544bf215546Sopenharmony_ci return agx_mov_to(b, dst, agx_abs(agx_register(12, AGX_SIZE_32))); 545bf215546Sopenharmony_ci 546bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_r_float: return agx_blend_const(b, dst, 0); 547bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_g_float: return agx_blend_const(b, dst, 1); 548bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_b_float: return agx_blend_const(b, dst, 2); 549bf215546Sopenharmony_ci case nir_intrinsic_load_blend_const_color_a_float: return agx_blend_const(b, dst, 3); 550bf215546Sopenharmony_ci 551bf215546Sopenharmony_ci default: 552bf215546Sopenharmony_ci fprintf(stderr, "Unhandled intrinsic %s\n", nir_intrinsic_infos[instr->intrinsic].name); 553bf215546Sopenharmony_ci unreachable("Unhandled intrinsic"); 554bf215546Sopenharmony_ci } 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci /* If we got here, there is a vector destination for the intrinsic composed 557bf215546Sopenharmony_ci * of separate scalars. Its components are specified separately in the dests 558bf215546Sopenharmony_ci * array. We need to combine them so the vector destination itself is valid. 559bf215546Sopenharmony_ci * If only individual components are accessed, this combine will be dead code 560bf215546Sopenharmony_ci * eliminated. 561bf215546Sopenharmony_ci */ 562bf215546Sopenharmony_ci return agx_emit_combine_to(b, dst, dests[0], dests[1], dests[2], dests[3]); 563bf215546Sopenharmony_ci} 564bf215546Sopenharmony_ci 565bf215546Sopenharmony_cistatic agx_index 566bf215546Sopenharmony_ciagx_alu_src_index(agx_builder *b, nir_alu_src src) 567bf215546Sopenharmony_ci{ 568bf215546Sopenharmony_ci /* Check well-formedness of the input NIR */ 569bf215546Sopenharmony_ci ASSERTED unsigned bitsize = nir_src_bit_size(src.src); 570bf215546Sopenharmony_ci unsigned comps = nir_src_num_components(src.src); 571bf215546Sopenharmony_ci unsigned channel = src.swizzle[0]; 572bf215546Sopenharmony_ci 573bf215546Sopenharmony_ci assert(bitsize == 1 || bitsize == 16 || bitsize == 32 || bitsize == 64); 574bf215546Sopenharmony_ci assert(!(src.negate || src.abs)); 575bf215546Sopenharmony_ci assert(channel < comps); 576bf215546Sopenharmony_ci 577bf215546Sopenharmony_ci agx_index idx = agx_src_index(&src.src); 578bf215546Sopenharmony_ci 579bf215546Sopenharmony_ci /* We only deal with scalars, extract a single scalar if needed */ 580bf215546Sopenharmony_ci if (comps > 1) 581bf215546Sopenharmony_ci return agx_emit_extract(b, idx, channel); 582bf215546Sopenharmony_ci else 583bf215546Sopenharmony_ci return idx; 584bf215546Sopenharmony_ci} 585bf215546Sopenharmony_ci 586bf215546Sopenharmony_cistatic agx_instr * 587bf215546Sopenharmony_ciagx_emit_alu_bool(agx_builder *b, nir_op op, 588bf215546Sopenharmony_ci agx_index dst, agx_index s0, agx_index s1, agx_index s2) 589bf215546Sopenharmony_ci{ 590bf215546Sopenharmony_ci /* Handle 1-bit bools as zero/nonzero rather than specifically 0/1 or 0/~0. 591bf215546Sopenharmony_ci * This will give the optimizer flexibility. */ 592bf215546Sopenharmony_ci agx_index f = agx_immediate(0); 593bf215546Sopenharmony_ci agx_index t = agx_immediate(0x1); 594bf215546Sopenharmony_ci 595bf215546Sopenharmony_ci switch (op) { 596bf215546Sopenharmony_ci case nir_op_feq: return agx_fcmpsel_to(b, dst, s0, s1, t, f, AGX_FCOND_EQ); 597bf215546Sopenharmony_ci case nir_op_flt: return agx_fcmpsel_to(b, dst, s0, s1, t, f, AGX_FCOND_LT); 598bf215546Sopenharmony_ci case nir_op_fge: return agx_fcmpsel_to(b, dst, s0, s1, t, f, AGX_FCOND_GE); 599bf215546Sopenharmony_ci case nir_op_fneu: return agx_fcmpsel_to(b, dst, s0, s1, f, t, AGX_FCOND_EQ); 600bf215546Sopenharmony_ci 601bf215546Sopenharmony_ci case nir_op_ieq: return agx_icmpsel_to(b, dst, s0, s1, t, f, AGX_ICOND_UEQ); 602bf215546Sopenharmony_ci case nir_op_ine: return agx_icmpsel_to(b, dst, s0, s1, f, t, AGX_ICOND_UEQ); 603bf215546Sopenharmony_ci case nir_op_ilt: return agx_icmpsel_to(b, dst, s0, s1, t, f, AGX_ICOND_SLT); 604bf215546Sopenharmony_ci case nir_op_ige: return agx_icmpsel_to(b, dst, s0, s1, f, t, AGX_ICOND_SLT); 605bf215546Sopenharmony_ci case nir_op_ult: return agx_icmpsel_to(b, dst, s0, s1, t, f, AGX_ICOND_ULT); 606bf215546Sopenharmony_ci case nir_op_uge: return agx_icmpsel_to(b, dst, s0, s1, f, t, AGX_ICOND_ULT); 607bf215546Sopenharmony_ci 608bf215546Sopenharmony_ci case nir_op_mov: return agx_mov_to(b, dst, s0); 609bf215546Sopenharmony_ci case nir_op_iand: return agx_and_to(b, dst, s0, s1); 610bf215546Sopenharmony_ci case nir_op_ior: return agx_or_to(b, dst, s0, s1); 611bf215546Sopenharmony_ci case nir_op_ixor: return agx_xor_to(b, dst, s0, s1); 612bf215546Sopenharmony_ci case nir_op_inot: return agx_xor_to(b, dst, s0, t); 613bf215546Sopenharmony_ci 614bf215546Sopenharmony_ci case nir_op_f2b1: return agx_fcmpsel_to(b, dst, s0, f, f, t, AGX_FCOND_EQ); 615bf215546Sopenharmony_ci case nir_op_i2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ); 616bf215546Sopenharmony_ci case nir_op_b2b1: return agx_icmpsel_to(b, dst, s0, f, f, t, AGX_ICOND_UEQ); 617bf215546Sopenharmony_ci 618bf215546Sopenharmony_ci case nir_op_bcsel: 619bf215546Sopenharmony_ci return agx_icmpsel_to(b, dst, s0, f, s2, s1, AGX_ICOND_UEQ); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci default: 622bf215546Sopenharmony_ci fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[op].name); 623bf215546Sopenharmony_ci unreachable("Unhandled boolean ALU instruction"); 624bf215546Sopenharmony_ci } 625bf215546Sopenharmony_ci} 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_cistatic agx_instr * 628bf215546Sopenharmony_ciagx_emit_alu(agx_builder *b, nir_alu_instr *instr) 629bf215546Sopenharmony_ci{ 630bf215546Sopenharmony_ci unsigned srcs = nir_op_infos[instr->op].num_inputs; 631bf215546Sopenharmony_ci unsigned sz = nir_dest_bit_size(instr->dest.dest); 632bf215546Sopenharmony_ci unsigned src_sz = srcs ? nir_src_bit_size(instr->src[0].src) : 0; 633bf215546Sopenharmony_ci ASSERTED unsigned comps = nir_dest_num_components(instr->dest.dest); 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci assert(comps == 1 || nir_op_is_vec(instr->op)); 636bf215546Sopenharmony_ci assert(sz == 1 || sz == 16 || sz == 32 || sz == 64); 637bf215546Sopenharmony_ci 638bf215546Sopenharmony_ci agx_index dst = agx_dest_index(&instr->dest.dest); 639bf215546Sopenharmony_ci agx_index s0 = srcs > 0 ? agx_alu_src_index(b, instr->src[0]) : agx_null(); 640bf215546Sopenharmony_ci agx_index s1 = srcs > 1 ? agx_alu_src_index(b, instr->src[1]) : agx_null(); 641bf215546Sopenharmony_ci agx_index s2 = srcs > 2 ? agx_alu_src_index(b, instr->src[2]) : agx_null(); 642bf215546Sopenharmony_ci agx_index s3 = srcs > 3 ? agx_alu_src_index(b, instr->src[3]) : agx_null(); 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci /* 1-bit bools are a bit special, only handle with select ops */ 645bf215546Sopenharmony_ci if (sz == 1) 646bf215546Sopenharmony_ci return agx_emit_alu_bool(b, instr->op, dst, s0, s1, s2); 647bf215546Sopenharmony_ci 648bf215546Sopenharmony_ci#define UNOP(nop, aop) \ 649bf215546Sopenharmony_ci case nir_op_ ## nop: return agx_ ## aop ## _to(b, dst, s0); 650bf215546Sopenharmony_ci#define BINOP(nop, aop) \ 651bf215546Sopenharmony_ci case nir_op_ ## nop: return agx_ ## aop ## _to(b, dst, s0, s1); 652bf215546Sopenharmony_ci#define TRIOP(nop, aop) \ 653bf215546Sopenharmony_ci case nir_op_ ## nop: return agx_ ## aop ## _to(b, dst, s0, s1, s2); 654bf215546Sopenharmony_ci 655bf215546Sopenharmony_ci switch (instr->op) { 656bf215546Sopenharmony_ci BINOP(fadd, fadd); 657bf215546Sopenharmony_ci BINOP(fmul, fmul); 658bf215546Sopenharmony_ci TRIOP(ffma, fma); 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci UNOP(f2f16, fmov); 661bf215546Sopenharmony_ci UNOP(f2f32, fmov); 662bf215546Sopenharmony_ci UNOP(fround_even, roundeven); 663bf215546Sopenharmony_ci UNOP(ftrunc, trunc); 664bf215546Sopenharmony_ci UNOP(ffloor, floor); 665bf215546Sopenharmony_ci UNOP(fceil, ceil); 666bf215546Sopenharmony_ci UNOP(frcp, rcp); 667bf215546Sopenharmony_ci UNOP(frsq, rsqrt); 668bf215546Sopenharmony_ci UNOP(flog2, log2); 669bf215546Sopenharmony_ci UNOP(fexp2, exp2); 670bf215546Sopenharmony_ci 671bf215546Sopenharmony_ci UNOP(fddx, dfdx); 672bf215546Sopenharmony_ci UNOP(fddx_coarse, dfdx); 673bf215546Sopenharmony_ci UNOP(fddx_fine, dfdx); 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci UNOP(fddy, dfdy); 676bf215546Sopenharmony_ci UNOP(fddy_coarse, dfdy); 677bf215546Sopenharmony_ci UNOP(fddy_fine, dfdy); 678bf215546Sopenharmony_ci 679bf215546Sopenharmony_ci UNOP(mov, mov); 680bf215546Sopenharmony_ci UNOP(u2u16, mov); 681bf215546Sopenharmony_ci UNOP(u2u32, mov); 682bf215546Sopenharmony_ci UNOP(inot, not); 683bf215546Sopenharmony_ci BINOP(iand, and); 684bf215546Sopenharmony_ci BINOP(ior, or); 685bf215546Sopenharmony_ci BINOP(ixor, xor); 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci case nir_op_fsqrt: return agx_fmul_to(b, dst, s0, agx_srsqrt(b, s0)); 688bf215546Sopenharmony_ci case nir_op_fsub: return agx_fadd_to(b, dst, s0, agx_neg(s1)); 689bf215546Sopenharmony_ci case nir_op_fabs: return agx_fmov_to(b, dst, agx_abs(s0)); 690bf215546Sopenharmony_ci case nir_op_fneg: return agx_fmov_to(b, dst, agx_neg(s0)); 691bf215546Sopenharmony_ci 692bf215546Sopenharmony_ci case nir_op_fmin: return agx_fcmpsel_to(b, dst, s0, s1, s0, s1, AGX_FCOND_LTN); 693bf215546Sopenharmony_ci case nir_op_fmax: return agx_fcmpsel_to(b, dst, s0, s1, s0, s1, AGX_FCOND_GTN); 694bf215546Sopenharmony_ci case nir_op_imin: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_SLT); 695bf215546Sopenharmony_ci case nir_op_imax: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_SGT); 696bf215546Sopenharmony_ci case nir_op_umin: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_ULT); 697bf215546Sopenharmony_ci case nir_op_umax: return agx_icmpsel_to(b, dst, s0, s1, s0, s1, AGX_ICOND_UGT); 698bf215546Sopenharmony_ci 699bf215546Sopenharmony_ci case nir_op_iadd: return agx_iadd_to(b, dst, s0, s1, 0); 700bf215546Sopenharmony_ci case nir_op_isub: return agx_iadd_to(b, dst, s0, agx_neg(s1), 0); 701bf215546Sopenharmony_ci case nir_op_ineg: return agx_iadd_to(b, dst, agx_zero(), agx_neg(s0), 0); 702bf215546Sopenharmony_ci case nir_op_imul: return agx_imad_to(b, dst, s0, s1, agx_zero(), 0); 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci case nir_op_ishl: return agx_bfi_to(b, dst, agx_zero(), s0, s1, 0); 705bf215546Sopenharmony_ci case nir_op_ushr: return agx_ushr_to(b, dst, s0, s1); 706bf215546Sopenharmony_ci case nir_op_ishr: return agx_asr_to(b, dst, s0, s1); 707bf215546Sopenharmony_ci 708bf215546Sopenharmony_ci case nir_op_bcsel: 709bf215546Sopenharmony_ci return agx_icmpsel_to(b, dst, s0, agx_zero(), s2, s1, AGX_ICOND_UEQ); 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_ci case nir_op_b2i32: 712bf215546Sopenharmony_ci case nir_op_b2i16: 713bf215546Sopenharmony_ci return agx_icmpsel_to(b, dst, s0, agx_zero(), agx_zero(), agx_immediate(1), AGX_ICOND_UEQ); 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_ci case nir_op_b2f16: 716bf215546Sopenharmony_ci case nir_op_b2f32: 717bf215546Sopenharmony_ci { 718bf215546Sopenharmony_ci /* At this point, boolean is just zero/nonzero, so compare with zero */ 719bf215546Sopenharmony_ci agx_index one = (sz == 16) ? 720bf215546Sopenharmony_ci agx_mov_imm(b, 16, _mesa_float_to_half(1.0)) : 721bf215546Sopenharmony_ci agx_mov_imm(b, 32, fui(1.0)); 722bf215546Sopenharmony_ci 723bf215546Sopenharmony_ci agx_index zero = agx_zero(); 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci return agx_fcmpsel_to(b, dst, s0, zero, zero, one, AGX_FCOND_EQ); 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci 728bf215546Sopenharmony_ci case nir_op_i2i32: 729bf215546Sopenharmony_ci { 730bf215546Sopenharmony_ci if (s0.size != AGX_SIZE_16) 731bf215546Sopenharmony_ci unreachable("todo: more conversions"); 732bf215546Sopenharmony_ci 733bf215546Sopenharmony_ci return agx_iadd_to(b, dst, s0, agx_zero(), 0); 734bf215546Sopenharmony_ci } 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci case nir_op_i2i16: 737bf215546Sopenharmony_ci { 738bf215546Sopenharmony_ci if (s0.size != AGX_SIZE_32) 739bf215546Sopenharmony_ci unreachable("todo: more conversions"); 740bf215546Sopenharmony_ci 741bf215546Sopenharmony_ci return agx_iadd_to(b, dst, s0, agx_zero(), 0); 742bf215546Sopenharmony_ci } 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci case nir_op_iadd_sat: 745bf215546Sopenharmony_ci { 746bf215546Sopenharmony_ci agx_instr *I = agx_iadd_to(b, dst, s0, s1, 0); 747bf215546Sopenharmony_ci I->saturate = true; 748bf215546Sopenharmony_ci return I; 749bf215546Sopenharmony_ci } 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci case nir_op_isub_sat: 752bf215546Sopenharmony_ci { 753bf215546Sopenharmony_ci agx_instr *I = agx_iadd_to(b, dst, s0, agx_neg(s1), 0); 754bf215546Sopenharmony_ci I->saturate = true; 755bf215546Sopenharmony_ci return I; 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci case nir_op_uadd_sat: 759bf215546Sopenharmony_ci { 760bf215546Sopenharmony_ci agx_instr *I = agx_iadd_to(b, dst, agx_abs(s0), agx_abs(s1), 0); 761bf215546Sopenharmony_ci I->saturate = true; 762bf215546Sopenharmony_ci return I; 763bf215546Sopenharmony_ci } 764bf215546Sopenharmony_ci 765bf215546Sopenharmony_ci case nir_op_usub_sat: 766bf215546Sopenharmony_ci { 767bf215546Sopenharmony_ci agx_instr *I = agx_iadd_to(b, dst, agx_abs(s0), agx_neg(agx_abs(s1)), 0); 768bf215546Sopenharmony_ci I->saturate = true; 769bf215546Sopenharmony_ci return I; 770bf215546Sopenharmony_ci } 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci case nir_op_fsat: 773bf215546Sopenharmony_ci { 774bf215546Sopenharmony_ci agx_instr *I = agx_fadd_to(b, dst, s0, agx_negzero()); 775bf215546Sopenharmony_ci I->saturate = true; 776bf215546Sopenharmony_ci return I; 777bf215546Sopenharmony_ci } 778bf215546Sopenharmony_ci 779bf215546Sopenharmony_ci case nir_op_fsin_agx: 780bf215546Sopenharmony_ci { 781bf215546Sopenharmony_ci agx_index fixup = agx_sin_pt_1(b, s0); 782bf215546Sopenharmony_ci agx_index sinc = agx_sin_pt_2(b, fixup); 783bf215546Sopenharmony_ci return agx_fmul_to(b, dst, sinc, fixup); 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci 786bf215546Sopenharmony_ci case nir_op_f2i16: 787bf215546Sopenharmony_ci return agx_convert_to(b, dst, 788bf215546Sopenharmony_ci agx_immediate(AGX_CONVERT_F_TO_S16), s0, AGX_ROUND_RTZ); 789bf215546Sopenharmony_ci 790bf215546Sopenharmony_ci case nir_op_f2i32: 791bf215546Sopenharmony_ci return agx_convert_to(b, dst, 792bf215546Sopenharmony_ci agx_immediate(AGX_CONVERT_F_TO_S32), s0, AGX_ROUND_RTZ); 793bf215546Sopenharmony_ci 794bf215546Sopenharmony_ci case nir_op_f2u16: 795bf215546Sopenharmony_ci return agx_convert_to(b, dst, 796bf215546Sopenharmony_ci agx_immediate(AGX_CONVERT_F_TO_U16), s0, AGX_ROUND_RTZ); 797bf215546Sopenharmony_ci 798bf215546Sopenharmony_ci case nir_op_f2u32: 799bf215546Sopenharmony_ci return agx_convert_to(b, dst, 800bf215546Sopenharmony_ci agx_immediate(AGX_CONVERT_F_TO_U32), s0, AGX_ROUND_RTZ); 801bf215546Sopenharmony_ci 802bf215546Sopenharmony_ci case nir_op_u2f16: 803bf215546Sopenharmony_ci case nir_op_u2f32: 804bf215546Sopenharmony_ci { 805bf215546Sopenharmony_ci if (src_sz == 64) 806bf215546Sopenharmony_ci unreachable("64-bit conversions unimplemented"); 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci enum agx_convert mode = 809bf215546Sopenharmony_ci (src_sz == 32) ? AGX_CONVERT_U32_TO_F : 810bf215546Sopenharmony_ci (src_sz == 16) ? AGX_CONVERT_U16_TO_F : 811bf215546Sopenharmony_ci AGX_CONVERT_U8_TO_F; 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE); 814bf215546Sopenharmony_ci } 815bf215546Sopenharmony_ci 816bf215546Sopenharmony_ci case nir_op_i2f16: 817bf215546Sopenharmony_ci case nir_op_i2f32: 818bf215546Sopenharmony_ci { 819bf215546Sopenharmony_ci if (src_sz == 64) 820bf215546Sopenharmony_ci unreachable("64-bit conversions unimplemented"); 821bf215546Sopenharmony_ci 822bf215546Sopenharmony_ci enum agx_convert mode = 823bf215546Sopenharmony_ci (src_sz == 32) ? AGX_CONVERT_S32_TO_F : 824bf215546Sopenharmony_ci (src_sz == 16) ? AGX_CONVERT_S16_TO_F : 825bf215546Sopenharmony_ci AGX_CONVERT_S8_TO_F; 826bf215546Sopenharmony_ci 827bf215546Sopenharmony_ci return agx_convert_to(b, dst, agx_immediate(mode), s0, AGX_ROUND_RTE); 828bf215546Sopenharmony_ci } 829bf215546Sopenharmony_ci 830bf215546Sopenharmony_ci case nir_op_vec2: 831bf215546Sopenharmony_ci case nir_op_vec3: 832bf215546Sopenharmony_ci case nir_op_vec4: 833bf215546Sopenharmony_ci return agx_emit_combine_to(b, dst, s0, s1, s2, s3); 834bf215546Sopenharmony_ci 835bf215546Sopenharmony_ci case nir_op_vec8: 836bf215546Sopenharmony_ci case nir_op_vec16: 837bf215546Sopenharmony_ci unreachable("should've been lowered"); 838bf215546Sopenharmony_ci 839bf215546Sopenharmony_ci default: 840bf215546Sopenharmony_ci fprintf(stderr, "Unhandled ALU op %s\n", nir_op_infos[instr->op].name); 841bf215546Sopenharmony_ci unreachable("Unhandled ALU instruction"); 842bf215546Sopenharmony_ci } 843bf215546Sopenharmony_ci} 844bf215546Sopenharmony_ci 845bf215546Sopenharmony_cistatic enum agx_dim 846bf215546Sopenharmony_ciagx_tex_dim(enum glsl_sampler_dim dim, bool array) 847bf215546Sopenharmony_ci{ 848bf215546Sopenharmony_ci switch (dim) { 849bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_1D: 850bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_BUF: 851bf215546Sopenharmony_ci return array ? AGX_DIM_TEX_1D_ARRAY : AGX_DIM_TEX_1D; 852bf215546Sopenharmony_ci 853bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_2D: 854bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_RECT: 855bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_EXTERNAL: 856bf215546Sopenharmony_ci return array ? AGX_DIM_TEX_2D_ARRAY : AGX_DIM_TEX_2D; 857bf215546Sopenharmony_ci 858bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_MS: 859bf215546Sopenharmony_ci assert(!array && "multisampled arrays unsupported"); 860bf215546Sopenharmony_ci return AGX_DIM_TEX_2D_MS; 861bf215546Sopenharmony_ci 862bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_3D: 863bf215546Sopenharmony_ci assert(!array && "3D arrays unsupported"); 864bf215546Sopenharmony_ci return AGX_DIM_TEX_3D; 865bf215546Sopenharmony_ci 866bf215546Sopenharmony_ci case GLSL_SAMPLER_DIM_CUBE: 867bf215546Sopenharmony_ci return array ? AGX_DIM_TEX_CUBE_ARRAY : AGX_DIM_TEX_CUBE; 868bf215546Sopenharmony_ci 869bf215546Sopenharmony_ci default: 870bf215546Sopenharmony_ci unreachable("Invalid sampler dim\n"); 871bf215546Sopenharmony_ci } 872bf215546Sopenharmony_ci} 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_cistatic enum agx_lod_mode 875bf215546Sopenharmony_ciagx_lod_mode_for_nir(nir_texop op) 876bf215546Sopenharmony_ci{ 877bf215546Sopenharmony_ci switch (op) { 878bf215546Sopenharmony_ci case nir_texop_tex: return AGX_LOD_MODE_AUTO_LOD; 879bf215546Sopenharmony_ci case nir_texop_txb: return AGX_LOD_MODE_AUTO_LOD_BIAS; 880bf215546Sopenharmony_ci case nir_texop_txl: return AGX_LOD_MODE_LOD_MIN; 881bf215546Sopenharmony_ci default: unreachable("Unhandled texture op"); 882bf215546Sopenharmony_ci } 883bf215546Sopenharmony_ci} 884bf215546Sopenharmony_ci 885bf215546Sopenharmony_cistatic void 886bf215546Sopenharmony_ciagx_emit_tex(agx_builder *b, nir_tex_instr *instr) 887bf215546Sopenharmony_ci{ 888bf215546Sopenharmony_ci switch (instr->op) { 889bf215546Sopenharmony_ci case nir_texop_tex: 890bf215546Sopenharmony_ci case nir_texop_txl: 891bf215546Sopenharmony_ci case nir_texop_txb: 892bf215546Sopenharmony_ci break; 893bf215546Sopenharmony_ci default: 894bf215546Sopenharmony_ci unreachable("Unhandled texture op"); 895bf215546Sopenharmony_ci } 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci agx_index coords = agx_null(), 898bf215546Sopenharmony_ci texture = agx_immediate(instr->texture_index), 899bf215546Sopenharmony_ci sampler = agx_immediate(instr->sampler_index), 900bf215546Sopenharmony_ci lod = agx_immediate(0), 901bf215546Sopenharmony_ci offset = agx_null(); 902bf215546Sopenharmony_ci 903bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->num_srcs; ++i) { 904bf215546Sopenharmony_ci agx_index index = agx_src_index(&instr->src[i].src); 905bf215546Sopenharmony_ci 906bf215546Sopenharmony_ci switch (instr->src[i].src_type) { 907bf215546Sopenharmony_ci case nir_tex_src_coord: 908bf215546Sopenharmony_ci coords = index; 909bf215546Sopenharmony_ci 910bf215546Sopenharmony_ci /* Array textures are indexed by a floating-point in NIR, but by an 911bf215546Sopenharmony_ci * integer in AGX. Convert the array index from float-to-int for array 912bf215546Sopenharmony_ci * textures. The array index is the last source in NIR. The conversion 913bf215546Sopenharmony_ci * is according to the rule from 8.9 ("Texture Functions") of the GLSL 914bf215546Sopenharmony_ci * ES 3.20 specification: 915bf215546Sopenharmony_ci * 916bf215546Sopenharmony_ci * max(0, min(d - 1, floor(layer + 0.5))) = 917bf215546Sopenharmony_ci * max(0, min(d - 1, f32_to_u32(layer + 0.5))) = 918bf215546Sopenharmony_ci * min(d - 1, f32_to_u32(layer + 0.5)) 919bf215546Sopenharmony_ci */ 920bf215546Sopenharmony_ci if (instr->is_array) { 921bf215546Sopenharmony_ci unsigned nr = nir_src_num_components(instr->src[i].src); 922bf215546Sopenharmony_ci agx_index channels[4] = {}; 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci for (unsigned i = 0; i < nr; ++i) 925bf215546Sopenharmony_ci channels[i] = agx_emit_extract(b, index, i); 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci agx_index layer = agx_fadd(b, channels[nr - 1], 928bf215546Sopenharmony_ci agx_immediate_f(0.5f)); 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci agx_index d1 = agx_indexed_sysval(b->shader, 931bf215546Sopenharmony_ci AGX_PUSH_ARRAY_SIZE_MINUS_1, AGX_SIZE_16, 932bf215546Sopenharmony_ci instr->texture_index, 1); 933bf215546Sopenharmony_ci 934bf215546Sopenharmony_ci layer = agx_convert(b, agx_immediate(AGX_CONVERT_F_TO_U32), layer, 935bf215546Sopenharmony_ci AGX_ROUND_RTZ); 936bf215546Sopenharmony_ci 937bf215546Sopenharmony_ci agx_index layer16 = agx_temp(b->shader, AGX_SIZE_16); 938bf215546Sopenharmony_ci agx_mov_to(b, layer16, layer); 939bf215546Sopenharmony_ci 940bf215546Sopenharmony_ci layer = agx_icmpsel(b, layer16, d1, layer16, d1, AGX_ICOND_ULT); 941bf215546Sopenharmony_ci 942bf215546Sopenharmony_ci agx_index layer32 = agx_temp(b->shader, AGX_SIZE_32); 943bf215546Sopenharmony_ci agx_mov_to(b, layer32, layer); 944bf215546Sopenharmony_ci 945bf215546Sopenharmony_ci channels[nr - 1] = layer32; 946bf215546Sopenharmony_ci coords = agx_p_combine(b, channels[0], channels[1], channels[2], channels[3]); 947bf215546Sopenharmony_ci } else { 948bf215546Sopenharmony_ci coords = index; 949bf215546Sopenharmony_ci } 950bf215546Sopenharmony_ci 951bf215546Sopenharmony_ci break; 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci case nir_tex_src_lod: 954bf215546Sopenharmony_ci case nir_tex_src_bias: 955bf215546Sopenharmony_ci lod = index; 956bf215546Sopenharmony_ci break; 957bf215546Sopenharmony_ci 958bf215546Sopenharmony_ci case nir_tex_src_ms_index: 959bf215546Sopenharmony_ci case nir_tex_src_offset: 960bf215546Sopenharmony_ci case nir_tex_src_comparator: 961bf215546Sopenharmony_ci case nir_tex_src_texture_offset: 962bf215546Sopenharmony_ci case nir_tex_src_sampler_offset: 963bf215546Sopenharmony_ci default: 964bf215546Sopenharmony_ci unreachable("todo"); 965bf215546Sopenharmony_ci } 966bf215546Sopenharmony_ci } 967bf215546Sopenharmony_ci 968bf215546Sopenharmony_ci agx_index dst = agx_dest_index(&instr->dest); 969bf215546Sopenharmony_ci agx_texture_sample_to(b, dst, coords, lod, texture, sampler, offset, 970bf215546Sopenharmony_ci agx_tex_dim(instr->sampler_dim, instr->is_array), 971bf215546Sopenharmony_ci agx_lod_mode_for_nir(instr->op), 972bf215546Sopenharmony_ci 0xF, /* TODO: wrmask */ 973bf215546Sopenharmony_ci 0); 974bf215546Sopenharmony_ci 975bf215546Sopenharmony_ci agx_wait(b, 0); 976bf215546Sopenharmony_ci agx_emit_cached_split(b, dst, 4); 977bf215546Sopenharmony_ci} 978bf215546Sopenharmony_ci 979bf215546Sopenharmony_ci/* 980bf215546Sopenharmony_ci * Mark the logical end of the current block by emitting a p_logical_end marker. 981bf215546Sopenharmony_ci * Note if an unconditional jump is emitted (for instance, to break out of a 982bf215546Sopenharmony_ci * loop from inside an if), the block has already reached its logical end so we 983bf215546Sopenharmony_ci * don't re-emit p_logical_end. The validator checks this, and correct register 984bf215546Sopenharmony_ci * allocation depends on it. 985bf215546Sopenharmony_ci */ 986bf215546Sopenharmony_cistatic void 987bf215546Sopenharmony_ciagx_emit_logical_end(agx_builder *b) 988bf215546Sopenharmony_ci{ 989bf215546Sopenharmony_ci if (!b->shader->current_block->unconditional_jumps) 990bf215546Sopenharmony_ci agx_p_logical_end(b); 991bf215546Sopenharmony_ci} 992bf215546Sopenharmony_ci 993bf215546Sopenharmony_ci/* NIR loops are treated as a pair of AGX loops: 994bf215546Sopenharmony_ci * 995bf215546Sopenharmony_ci * do { 996bf215546Sopenharmony_ci * do { 997bf215546Sopenharmony_ci * ... 998bf215546Sopenharmony_ci * } while (0); 999bf215546Sopenharmony_ci * } while (cond); 1000bf215546Sopenharmony_ci * 1001bf215546Sopenharmony_ci * By manipulating the nesting counter (r0l), we may break out of nested loops, 1002bf215546Sopenharmony_ci * so under the model, both break and continue may be implemented as breaks, 1003bf215546Sopenharmony_ci * where break breaks out of the outer loop (2 layers) and continue breaks out 1004bf215546Sopenharmony_ci * of the inner loop (1 layer). 1005bf215546Sopenharmony_ci * 1006bf215546Sopenharmony_ci * After manipulating the nesting counter directly, pop_exec #0 must be used to 1007bf215546Sopenharmony_ci * flush the update to the execution mask. 1008bf215546Sopenharmony_ci */ 1009bf215546Sopenharmony_ci 1010bf215546Sopenharmony_cistatic void 1011bf215546Sopenharmony_ciagx_emit_jump(agx_builder *b, nir_jump_instr *instr) 1012bf215546Sopenharmony_ci{ 1013bf215546Sopenharmony_ci agx_context *ctx = b->shader; 1014bf215546Sopenharmony_ci assert (instr->type == nir_jump_break || instr->type == nir_jump_continue); 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci /* Break out of either one or two loops */ 1017bf215546Sopenharmony_ci unsigned nestings = b->shader->loop_nesting; 1018bf215546Sopenharmony_ci 1019bf215546Sopenharmony_ci if (instr->type == nir_jump_continue) { 1020bf215546Sopenharmony_ci nestings += 1; 1021bf215546Sopenharmony_ci agx_block_add_successor(ctx->current_block, ctx->continue_block); 1022bf215546Sopenharmony_ci } else if (instr->type == nir_jump_break) { 1023bf215546Sopenharmony_ci nestings += 2; 1024bf215546Sopenharmony_ci agx_block_add_successor(ctx->current_block, ctx->break_block); 1025bf215546Sopenharmony_ci } 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci /* Update the counter and flush */ 1028bf215546Sopenharmony_ci agx_index r0l = agx_register(0, false); 1029bf215546Sopenharmony_ci agx_mov_to(b, r0l, agx_immediate(nestings)); 1030bf215546Sopenharmony_ci 1031bf215546Sopenharmony_ci /* Jumps must come at the end of a block */ 1032bf215546Sopenharmony_ci agx_emit_logical_end(b); 1033bf215546Sopenharmony_ci agx_pop_exec(b, 0); 1034bf215546Sopenharmony_ci 1035bf215546Sopenharmony_ci ctx->current_block->unconditional_jumps = true; 1036bf215546Sopenharmony_ci} 1037bf215546Sopenharmony_ci 1038bf215546Sopenharmony_cistatic void 1039bf215546Sopenharmony_ciagx_emit_phi(agx_builder *b, nir_phi_instr *instr) 1040bf215546Sopenharmony_ci{ 1041bf215546Sopenharmony_ci agx_instr *I = agx_phi_to(b, agx_dest_index(&instr->dest)); 1042bf215546Sopenharmony_ci 1043bf215546Sopenharmony_ci /* Deferred */ 1044bf215546Sopenharmony_ci I->phi = instr; 1045bf215546Sopenharmony_ci} 1046bf215546Sopenharmony_ci 1047bf215546Sopenharmony_ci/* Look up the AGX block corresponding to a given NIR block. Used when 1048bf215546Sopenharmony_ci * translating phi nodes after emitting all blocks. 1049bf215546Sopenharmony_ci */ 1050bf215546Sopenharmony_cistatic agx_block * 1051bf215546Sopenharmony_ciagx_from_nir_block(agx_context *ctx, nir_block *block) 1052bf215546Sopenharmony_ci{ 1053bf215546Sopenharmony_ci return ctx->indexed_nir_blocks[block->index]; 1054bf215546Sopenharmony_ci} 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_cistatic void 1057bf215546Sopenharmony_ciagx_emit_phi_deferred(agx_context *ctx, agx_block *block, agx_instr *I) 1058bf215546Sopenharmony_ci{ 1059bf215546Sopenharmony_ci nir_phi_instr *phi = I->phi; 1060bf215546Sopenharmony_ci 1061bf215546Sopenharmony_ci /* Guaranteed by lower_phis_to_scalar */ 1062bf215546Sopenharmony_ci assert(phi->dest.ssa.num_components == 1); 1063bf215546Sopenharmony_ci 1064bf215546Sopenharmony_ci I->nr_srcs = exec_list_length(&phi->srcs); 1065bf215546Sopenharmony_ci I->src = rzalloc_array(I, agx_index, I->nr_srcs); 1066bf215546Sopenharmony_ci 1067bf215546Sopenharmony_ci nir_foreach_phi_src(src, phi) { 1068bf215546Sopenharmony_ci agx_block *pred = agx_from_nir_block(ctx, src->pred); 1069bf215546Sopenharmony_ci unsigned i = agx_predecessor_index(block, pred); 1070bf215546Sopenharmony_ci assert(i < I->nr_srcs); 1071bf215546Sopenharmony_ci 1072bf215546Sopenharmony_ci I->src[i] = agx_src_index(&src->src); 1073bf215546Sopenharmony_ci } 1074bf215546Sopenharmony_ci} 1075bf215546Sopenharmony_ci 1076bf215546Sopenharmony_cistatic void 1077bf215546Sopenharmony_ciagx_emit_phis_deferred(agx_context *ctx) 1078bf215546Sopenharmony_ci{ 1079bf215546Sopenharmony_ci agx_foreach_block(ctx, block) { 1080bf215546Sopenharmony_ci agx_foreach_instr_in_block(block, I) { 1081bf215546Sopenharmony_ci if (I->op == AGX_OPCODE_PHI) 1082bf215546Sopenharmony_ci agx_emit_phi_deferred(ctx, block, I); 1083bf215546Sopenharmony_ci } 1084bf215546Sopenharmony_ci } 1085bf215546Sopenharmony_ci} 1086bf215546Sopenharmony_ci 1087bf215546Sopenharmony_cistatic void 1088bf215546Sopenharmony_ciagx_emit_instr(agx_builder *b, struct nir_instr *instr) 1089bf215546Sopenharmony_ci{ 1090bf215546Sopenharmony_ci switch (instr->type) { 1091bf215546Sopenharmony_ci case nir_instr_type_load_const: 1092bf215546Sopenharmony_ci agx_emit_load_const(b, nir_instr_as_load_const(instr)); 1093bf215546Sopenharmony_ci break; 1094bf215546Sopenharmony_ci 1095bf215546Sopenharmony_ci case nir_instr_type_intrinsic: 1096bf215546Sopenharmony_ci agx_emit_intrinsic(b, nir_instr_as_intrinsic(instr)); 1097bf215546Sopenharmony_ci break; 1098bf215546Sopenharmony_ci 1099bf215546Sopenharmony_ci case nir_instr_type_alu: 1100bf215546Sopenharmony_ci agx_emit_alu(b, nir_instr_as_alu(instr)); 1101bf215546Sopenharmony_ci break; 1102bf215546Sopenharmony_ci 1103bf215546Sopenharmony_ci case nir_instr_type_tex: 1104bf215546Sopenharmony_ci agx_emit_tex(b, nir_instr_as_tex(instr)); 1105bf215546Sopenharmony_ci break; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci case nir_instr_type_jump: 1108bf215546Sopenharmony_ci agx_emit_jump(b, nir_instr_as_jump(instr)); 1109bf215546Sopenharmony_ci break; 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_ci case nir_instr_type_phi: 1112bf215546Sopenharmony_ci agx_emit_phi(b, nir_instr_as_phi(instr)); 1113bf215546Sopenharmony_ci break; 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci default: 1116bf215546Sopenharmony_ci unreachable("should've been lowered"); 1117bf215546Sopenharmony_ci } 1118bf215546Sopenharmony_ci} 1119bf215546Sopenharmony_ci 1120bf215546Sopenharmony_cistatic agx_block * 1121bf215546Sopenharmony_ciagx_create_block(agx_context *ctx) 1122bf215546Sopenharmony_ci{ 1123bf215546Sopenharmony_ci agx_block *blk = rzalloc(ctx, agx_block); 1124bf215546Sopenharmony_ci 1125bf215546Sopenharmony_ci util_dynarray_init(&blk->predecessors, blk); 1126bf215546Sopenharmony_ci 1127bf215546Sopenharmony_ci return blk; 1128bf215546Sopenharmony_ci} 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_cistatic agx_block * 1131bf215546Sopenharmony_ciemit_block(agx_context *ctx, nir_block *block) 1132bf215546Sopenharmony_ci{ 1133bf215546Sopenharmony_ci if (ctx->after_block) { 1134bf215546Sopenharmony_ci ctx->current_block = ctx->after_block; 1135bf215546Sopenharmony_ci ctx->after_block = NULL; 1136bf215546Sopenharmony_ci } else { 1137bf215546Sopenharmony_ci ctx->current_block = agx_create_block(ctx); 1138bf215546Sopenharmony_ci } 1139bf215546Sopenharmony_ci 1140bf215546Sopenharmony_ci agx_block *blk = ctx->current_block; 1141bf215546Sopenharmony_ci list_addtail(&blk->link, &ctx->blocks); 1142bf215546Sopenharmony_ci list_inithead(&blk->instructions); 1143bf215546Sopenharmony_ci 1144bf215546Sopenharmony_ci ctx->indexed_nir_blocks[block->index] = blk; 1145bf215546Sopenharmony_ci 1146bf215546Sopenharmony_ci agx_builder _b = agx_init_builder(ctx, agx_after_block(blk)); 1147bf215546Sopenharmony_ci 1148bf215546Sopenharmony_ci nir_foreach_instr(instr, block) { 1149bf215546Sopenharmony_ci agx_emit_instr(&_b, instr); 1150bf215546Sopenharmony_ci } 1151bf215546Sopenharmony_ci 1152bf215546Sopenharmony_ci return blk; 1153bf215546Sopenharmony_ci} 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_cistatic agx_block * 1156bf215546Sopenharmony_ciemit_cf_list(agx_context *ctx, struct exec_list *list); 1157bf215546Sopenharmony_ci 1158bf215546Sopenharmony_ci/* Emit if-else as 1159bf215546Sopenharmony_ci * 1160bf215546Sopenharmony_ci * if_icmp cond != 0 1161bf215546Sopenharmony_ci * ... 1162bf215546Sopenharmony_ci * else_icmp cond == 0 1163bf215546Sopenharmony_ci * ... 1164bf215546Sopenharmony_ci * pop_exec 1165bf215546Sopenharmony_ci * 1166bf215546Sopenharmony_ci * If the else is empty, we can omit the else_icmp. This happens elsewhere, as 1167bf215546Sopenharmony_ci * an empty else block can become nonempty after RA due to phi lowering. This is 1168bf215546Sopenharmony_ci * not usually optimal, but it's a start. 1169bf215546Sopenharmony_ci */ 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_cistatic void 1172bf215546Sopenharmony_ciemit_if(agx_context *ctx, nir_if *nif) 1173bf215546Sopenharmony_ci{ 1174bf215546Sopenharmony_ci agx_block *first_block = ctx->current_block; 1175bf215546Sopenharmony_ci agx_builder _b = agx_init_builder(ctx, agx_after_block(first_block)); 1176bf215546Sopenharmony_ci agx_index cond = agx_src_index(&nif->condition); 1177bf215546Sopenharmony_ci 1178bf215546Sopenharmony_ci agx_emit_logical_end(&_b); 1179bf215546Sopenharmony_ci agx_if_icmp(&_b, cond, agx_zero(), 1, AGX_ICOND_UEQ, true); 1180bf215546Sopenharmony_ci ctx->loop_nesting++; 1181bf215546Sopenharmony_ci 1182bf215546Sopenharmony_ci /* Emit the two subblocks. */ 1183bf215546Sopenharmony_ci agx_block *if_block = emit_cf_list(ctx, &nif->then_list); 1184bf215546Sopenharmony_ci agx_block *end_then = ctx->current_block; 1185bf215546Sopenharmony_ci 1186bf215546Sopenharmony_ci _b.cursor = agx_after_block(ctx->current_block); 1187bf215546Sopenharmony_ci agx_emit_logical_end(&_b); 1188bf215546Sopenharmony_ci agx_else_icmp(&_b, cond, agx_zero(), 1, AGX_ICOND_UEQ, false); 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci agx_block *else_block = emit_cf_list(ctx, &nif->else_list); 1191bf215546Sopenharmony_ci agx_block *end_else = ctx->current_block; 1192bf215546Sopenharmony_ci 1193bf215546Sopenharmony_ci ctx->after_block = agx_create_block(ctx); 1194bf215546Sopenharmony_ci 1195bf215546Sopenharmony_ci agx_block_add_successor(first_block, if_block); 1196bf215546Sopenharmony_ci agx_block_add_successor(first_block, else_block); 1197bf215546Sopenharmony_ci agx_block_add_successor(end_then, ctx->after_block); 1198bf215546Sopenharmony_ci agx_block_add_successor(end_else, ctx->after_block); 1199bf215546Sopenharmony_ci 1200bf215546Sopenharmony_ci _b.cursor = agx_after_block(ctx->current_block); 1201bf215546Sopenharmony_ci agx_emit_logical_end(&_b); 1202bf215546Sopenharmony_ci agx_pop_exec(&_b, 1); 1203bf215546Sopenharmony_ci ctx->loop_nesting--; 1204bf215546Sopenharmony_ci} 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_cistatic void 1207bf215546Sopenharmony_ciemit_loop(agx_context *ctx, nir_loop *nloop) 1208bf215546Sopenharmony_ci{ 1209bf215546Sopenharmony_ci /* We only track nesting within the innermost loop, so push and reset */ 1210bf215546Sopenharmony_ci unsigned pushed_nesting = ctx->loop_nesting; 1211bf215546Sopenharmony_ci ctx->loop_nesting = 0; 1212bf215546Sopenharmony_ci 1213bf215546Sopenharmony_ci agx_block *popped_break = ctx->break_block; 1214bf215546Sopenharmony_ci agx_block *popped_continue = ctx->continue_block; 1215bf215546Sopenharmony_ci 1216bf215546Sopenharmony_ci ctx->break_block = agx_create_block(ctx); 1217bf215546Sopenharmony_ci ctx->continue_block = agx_create_block(ctx); 1218bf215546Sopenharmony_ci 1219bf215546Sopenharmony_ci /* Make room for break/continue nesting (TODO: skip if no divergent CF) */ 1220bf215546Sopenharmony_ci agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block)); 1221bf215546Sopenharmony_ci agx_emit_logical_end(&_b); 1222bf215546Sopenharmony_ci agx_push_exec(&_b, 2); 1223bf215546Sopenharmony_ci 1224bf215546Sopenharmony_ci /* Fallthrough to body */ 1225bf215546Sopenharmony_ci agx_block_add_successor(ctx->current_block, ctx->continue_block); 1226bf215546Sopenharmony_ci 1227bf215546Sopenharmony_ci /* Emit the body */ 1228bf215546Sopenharmony_ci ctx->after_block = ctx->continue_block; 1229bf215546Sopenharmony_ci agx_block *start_block = emit_cf_list(ctx, &nloop->body); 1230bf215546Sopenharmony_ci 1231bf215546Sopenharmony_ci /* Fix up the nesting counter via an always true while_icmp, and branch back 1232bf215546Sopenharmony_ci * to start of loop if any lanes are active */ 1233bf215546Sopenharmony_ci _b.cursor = agx_after_block(ctx->current_block); 1234bf215546Sopenharmony_ci agx_emit_logical_end(&_b); 1235bf215546Sopenharmony_ci agx_while_icmp(&_b, agx_zero(), agx_zero(), 2, AGX_ICOND_UEQ, false); 1236bf215546Sopenharmony_ci agx_jmp_exec_any(&_b, start_block); 1237bf215546Sopenharmony_ci agx_pop_exec(&_b, 2); 1238bf215546Sopenharmony_ci agx_block_add_successor(ctx->current_block, ctx->continue_block); 1239bf215546Sopenharmony_ci 1240bf215546Sopenharmony_ci /* Pop off */ 1241bf215546Sopenharmony_ci ctx->after_block = ctx->break_block; 1242bf215546Sopenharmony_ci ctx->break_block = popped_break; 1243bf215546Sopenharmony_ci ctx->continue_block = popped_continue; 1244bf215546Sopenharmony_ci 1245bf215546Sopenharmony_ci /* Update shader-db stats */ 1246bf215546Sopenharmony_ci ++ctx->loop_count; 1247bf215546Sopenharmony_ci 1248bf215546Sopenharmony_ci /* All nested control flow must have finished */ 1249bf215546Sopenharmony_ci assert(ctx->loop_nesting == 0); 1250bf215546Sopenharmony_ci 1251bf215546Sopenharmony_ci /* Restore loop nesting (we might be inside an if inside an outer loop) */ 1252bf215546Sopenharmony_ci ctx->loop_nesting = pushed_nesting; 1253bf215546Sopenharmony_ci} 1254bf215546Sopenharmony_ci 1255bf215546Sopenharmony_ci/* Before the first control flow structure, the nesting counter (r0l) needs to 1256bf215546Sopenharmony_ci * be zeroed for correct operation. This only happens at most once, since by 1257bf215546Sopenharmony_ci * definition this occurs at the end of the first block, which dominates the 1258bf215546Sopenharmony_ci * rest of the program. */ 1259bf215546Sopenharmony_ci 1260bf215546Sopenharmony_cistatic void 1261bf215546Sopenharmony_ciemit_first_cf(agx_context *ctx) 1262bf215546Sopenharmony_ci{ 1263bf215546Sopenharmony_ci if (ctx->any_cf) 1264bf215546Sopenharmony_ci return; 1265bf215546Sopenharmony_ci 1266bf215546Sopenharmony_ci agx_builder _b = agx_init_builder(ctx, agx_after_block(ctx->current_block)); 1267bf215546Sopenharmony_ci agx_index r0l = agx_register(0, false); 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_ci agx_mov_to(&_b, r0l, agx_immediate(0)); 1270bf215546Sopenharmony_ci ctx->any_cf = true; 1271bf215546Sopenharmony_ci} 1272bf215546Sopenharmony_ci 1273bf215546Sopenharmony_cistatic agx_block * 1274bf215546Sopenharmony_ciemit_cf_list(agx_context *ctx, struct exec_list *list) 1275bf215546Sopenharmony_ci{ 1276bf215546Sopenharmony_ci agx_block *start_block = NULL; 1277bf215546Sopenharmony_ci 1278bf215546Sopenharmony_ci foreach_list_typed(nir_cf_node, node, node, list) { 1279bf215546Sopenharmony_ci switch (node->type) { 1280bf215546Sopenharmony_ci case nir_cf_node_block: { 1281bf215546Sopenharmony_ci agx_block *block = emit_block(ctx, nir_cf_node_as_block(node)); 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_ci if (!start_block) 1284bf215546Sopenharmony_ci start_block = block; 1285bf215546Sopenharmony_ci 1286bf215546Sopenharmony_ci break; 1287bf215546Sopenharmony_ci } 1288bf215546Sopenharmony_ci 1289bf215546Sopenharmony_ci case nir_cf_node_if: 1290bf215546Sopenharmony_ci emit_first_cf(ctx); 1291bf215546Sopenharmony_ci emit_if(ctx, nir_cf_node_as_if(node)); 1292bf215546Sopenharmony_ci break; 1293bf215546Sopenharmony_ci 1294bf215546Sopenharmony_ci case nir_cf_node_loop: 1295bf215546Sopenharmony_ci emit_first_cf(ctx); 1296bf215546Sopenharmony_ci emit_loop(ctx, nir_cf_node_as_loop(node)); 1297bf215546Sopenharmony_ci break; 1298bf215546Sopenharmony_ci 1299bf215546Sopenharmony_ci default: 1300bf215546Sopenharmony_ci unreachable("Unknown control flow"); 1301bf215546Sopenharmony_ci } 1302bf215546Sopenharmony_ci } 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_ci return start_block; 1305bf215546Sopenharmony_ci} 1306bf215546Sopenharmony_ci 1307bf215546Sopenharmony_cistatic void 1308bf215546Sopenharmony_ciagx_set_st_vary_final(agx_context *ctx) 1309bf215546Sopenharmony_ci{ 1310bf215546Sopenharmony_ci agx_foreach_instr_global_rev(ctx, I) { 1311bf215546Sopenharmony_ci if (I->op == AGX_OPCODE_ST_VARY) { 1312bf215546Sopenharmony_ci I->last = true; 1313bf215546Sopenharmony_ci return; 1314bf215546Sopenharmony_ci } 1315bf215546Sopenharmony_ci } 1316bf215546Sopenharmony_ci} 1317bf215546Sopenharmony_ci 1318bf215546Sopenharmony_cistatic void 1319bf215546Sopenharmony_ciagx_print_stats(agx_context *ctx, unsigned size, FILE *fp) 1320bf215546Sopenharmony_ci{ 1321bf215546Sopenharmony_ci unsigned nr_ins = 0, max_reg = 0; 1322bf215546Sopenharmony_ci 1323bf215546Sopenharmony_ci agx_foreach_instr_global(ctx, I) { 1324bf215546Sopenharmony_ci /* Count instructions */ 1325bf215546Sopenharmony_ci nr_ins++; 1326bf215546Sopenharmony_ci 1327bf215546Sopenharmony_ci /* Count registers */ 1328bf215546Sopenharmony_ci agx_foreach_dest(I, d) { 1329bf215546Sopenharmony_ci if (I->dest[d].type == AGX_INDEX_REGISTER) { 1330bf215546Sopenharmony_ci max_reg = MAX2(max_reg, 1331bf215546Sopenharmony_ci I->dest[d].value + agx_write_registers(I, d) - 1); 1332bf215546Sopenharmony_ci } 1333bf215546Sopenharmony_ci } 1334bf215546Sopenharmony_ci } 1335bf215546Sopenharmony_ci 1336bf215546Sopenharmony_ci /* TODO: Pipe through occupancy */ 1337bf215546Sopenharmony_ci unsigned nr_threads = 1; 1338bf215546Sopenharmony_ci 1339bf215546Sopenharmony_ci fprintf(stderr, "%s - %s shader: %u inst, %u bytes, %u halfregs, %u threads, " 1340bf215546Sopenharmony_ci "%u loops, %u:%u spills:fills\n", 1341bf215546Sopenharmony_ci ctx->nir->info.label ?: "", 1342bf215546Sopenharmony_ci gl_shader_stage_name(ctx->stage), 1343bf215546Sopenharmony_ci nr_ins, size, max_reg, nr_threads, ctx->loop_count, 1344bf215546Sopenharmony_ci ctx->spills, ctx->fills); 1345bf215546Sopenharmony_ci} 1346bf215546Sopenharmony_ci 1347bf215546Sopenharmony_cistatic int 1348bf215546Sopenharmony_ciglsl_type_size(const struct glsl_type *type, bool bindless) 1349bf215546Sopenharmony_ci{ 1350bf215546Sopenharmony_ci return glsl_count_attribute_slots(type, false); 1351bf215546Sopenharmony_ci} 1352bf215546Sopenharmony_ci 1353bf215546Sopenharmony_cistatic bool 1354bf215546Sopenharmony_ciagx_lower_sincos_filter(const nir_instr *instr, UNUSED const void *_) 1355bf215546Sopenharmony_ci{ 1356bf215546Sopenharmony_ci if (instr->type != nir_instr_type_alu) 1357bf215546Sopenharmony_ci return false; 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 1360bf215546Sopenharmony_ci return alu->op == nir_op_fsin || alu->op == nir_op_fcos; 1361bf215546Sopenharmony_ci} 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_ci/* Sine and cosine are implemented via the sin_pt_1 and sin_pt_2 opcodes for 1364bf215546Sopenharmony_ci * heavy lifting. sin_pt_2 implements sinc in the first quadrant, expressed in 1365bf215546Sopenharmony_ci * turns (sin (tau x) / x), while sin_pt_1 implements a piecewise sign/offset 1366bf215546Sopenharmony_ci * fixup to transform a quadrant angle [0, 4] to [-1, 1]. The NIR opcode 1367bf215546Sopenharmony_ci * fsin_agx models the fixup, sinc, and multiply to obtain sine, so we just 1368bf215546Sopenharmony_ci * need to change units from radians to quadrants modulo turns. Cosine is 1369bf215546Sopenharmony_ci * implemented by shifting by one quadrant: cos(x) = sin(x + tau/4). 1370bf215546Sopenharmony_ci */ 1371bf215546Sopenharmony_ci 1372bf215546Sopenharmony_cistatic nir_ssa_def * 1373bf215546Sopenharmony_ciagx_lower_sincos_impl(struct nir_builder *b, nir_instr *instr, UNUSED void *_) 1374bf215546Sopenharmony_ci{ 1375bf215546Sopenharmony_ci nir_alu_instr *alu = nir_instr_as_alu(instr); 1376bf215546Sopenharmony_ci nir_ssa_def *x = nir_mov_alu(b, alu->src[0], 1); 1377bf215546Sopenharmony_ci nir_ssa_def *turns = nir_fmul_imm(b, x, M_1_PI * 0.5f); 1378bf215546Sopenharmony_ci 1379bf215546Sopenharmony_ci if (alu->op == nir_op_fcos) 1380bf215546Sopenharmony_ci turns = nir_fadd_imm(b, turns, 0.25f); 1381bf215546Sopenharmony_ci 1382bf215546Sopenharmony_ci nir_ssa_def *quadrants = nir_fmul_imm(b, nir_ffract(b, turns), 4.0); 1383bf215546Sopenharmony_ci return nir_fsin_agx(b, quadrants); 1384bf215546Sopenharmony_ci} 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_cistatic bool 1387bf215546Sopenharmony_ciagx_lower_sincos(nir_shader *shader) 1388bf215546Sopenharmony_ci{ 1389bf215546Sopenharmony_ci return nir_shader_lower_instructions(shader, 1390bf215546Sopenharmony_ci agx_lower_sincos_filter, agx_lower_sincos_impl, NULL); 1391bf215546Sopenharmony_ci} 1392bf215546Sopenharmony_ci 1393bf215546Sopenharmony_cistatic bool 1394bf215546Sopenharmony_ciagx_lower_front_face(struct nir_builder *b, 1395bf215546Sopenharmony_ci nir_instr *instr, UNUSED void *data) 1396bf215546Sopenharmony_ci{ 1397bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1398bf215546Sopenharmony_ci return false; 1399bf215546Sopenharmony_ci 1400bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1401bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_front_face) 1402bf215546Sopenharmony_ci return false; 1403bf215546Sopenharmony_ci 1404bf215546Sopenharmony_ci assert(intr->dest.is_ssa); 1405bf215546Sopenharmony_ci nir_ssa_def *def = &intr->dest.ssa; 1406bf215546Sopenharmony_ci assert(def->bit_size == 1); 1407bf215546Sopenharmony_ci 1408bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intr->instr); 1409bf215546Sopenharmony_ci nir_ssa_def_rewrite_uses(def, nir_inot(b, nir_load_back_face_agx(b, 1))); 1410bf215546Sopenharmony_ci return true; 1411bf215546Sopenharmony_ci} 1412bf215546Sopenharmony_ci 1413bf215546Sopenharmony_cistatic bool 1414bf215546Sopenharmony_ciagx_lower_aligned_offsets(struct nir_builder *b, 1415bf215546Sopenharmony_ci nir_instr *instr, UNUSED void *data) 1416bf215546Sopenharmony_ci{ 1417bf215546Sopenharmony_ci if (instr->type != nir_instr_type_intrinsic) 1418bf215546Sopenharmony_ci return false; 1419bf215546Sopenharmony_ci 1420bf215546Sopenharmony_ci nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr); 1421bf215546Sopenharmony_ci if (intr->intrinsic != nir_intrinsic_load_ubo) 1422bf215546Sopenharmony_ci return false; 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci b->cursor = nir_before_instr(&intr->instr); 1425bf215546Sopenharmony_ci 1426bf215546Sopenharmony_ci unsigned bytes = nir_dest_bit_size(intr->dest) / 8; 1427bf215546Sopenharmony_ci assert(util_is_power_of_two_or_zero(bytes) && bytes != 0); 1428bf215546Sopenharmony_ci 1429bf215546Sopenharmony_ci nir_src *offset = &intr->src[1]; 1430bf215546Sopenharmony_ci 1431bf215546Sopenharmony_ci unsigned shift = util_logbase2(bytes); 1432bf215546Sopenharmony_ci 1433bf215546Sopenharmony_ci nir_ssa_def *old = nir_ssa_for_src(b, *offset, 1); 1434bf215546Sopenharmony_ci nir_ssa_def *new = nir_ishr_imm(b, old, shift); 1435bf215546Sopenharmony_ci 1436bf215546Sopenharmony_ci nir_instr_rewrite_src_ssa(instr, offset, new); 1437bf215546Sopenharmony_ci return true; 1438bf215546Sopenharmony_ci} 1439bf215546Sopenharmony_ci 1440bf215546Sopenharmony_cistatic void 1441bf215546Sopenharmony_ciagx_optimize_nir(nir_shader *nir) 1442bf215546Sopenharmony_ci{ 1443bf215546Sopenharmony_ci bool progress; 1444bf215546Sopenharmony_ci 1445bf215546Sopenharmony_ci nir_lower_idiv_options idiv_options = { 1446bf215546Sopenharmony_ci .imprecise_32bit_lowering = true, 1447bf215546Sopenharmony_ci .allow_fp16 = true, 1448bf215546Sopenharmony_ci }; 1449bf215546Sopenharmony_ci 1450bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_regs_to_ssa); 1451bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_int64); 1452bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_idiv, &idiv_options); 1453bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); 1454bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_load_const_to_scalar); 1455bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_flrp, 16 | 32 | 64, false); 1456bf215546Sopenharmony_ci NIR_PASS_V(nir, agx_lower_sincos); 1457bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_shader_instructions_pass, 1458bf215546Sopenharmony_ci agx_lower_front_face, 1459bf215546Sopenharmony_ci nir_metadata_block_index | nir_metadata_dominance, NULL); 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci do { 1462bf215546Sopenharmony_ci progress = false; 1463bf215546Sopenharmony_ci 1464bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_lower_var_copies); 1465bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_lower_vars_to_ssa); 1466bf215546Sopenharmony_ci 1467bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_copy_prop); 1468bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_remove_phis); 1469bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_lower_phis_to_scalar, true); 1470bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_dce); 1471bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_dead_cf); 1472bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_cse); 1473bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_peephole_select, 64, false, true); 1474bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_algebraic); 1475bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_constant_folding); 1476bf215546Sopenharmony_ci 1477bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_undef); 1478bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_lower_undef_to_zero); 1479bf215546Sopenharmony_ci 1480bf215546Sopenharmony_ci NIR_PASS(progress, nir, nir_opt_loop_unroll); 1481bf215546Sopenharmony_ci } while (progress); 1482bf215546Sopenharmony_ci 1483bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_algebraic_late); 1484bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_constant_folding); 1485bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_copy_prop); 1486bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_dce); 1487bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_cse); 1488bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL); 1489bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_load_const_to_scalar); 1490bf215546Sopenharmony_ci 1491bf215546Sopenharmony_ci /* Cleanup optimizations */ 1492bf215546Sopenharmony_ci nir_move_options move_all = 1493bf215546Sopenharmony_ci nir_move_const_undef | nir_move_load_ubo | nir_move_load_input | 1494bf215546Sopenharmony_ci nir_move_comparisons | nir_move_copies | nir_move_load_ssbo; 1495bf215546Sopenharmony_ci 1496bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_sink, move_all); 1497bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_opt_move, move_all); 1498bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_phis_to_scalar, true); 1499bf215546Sopenharmony_ci} 1500bf215546Sopenharmony_ci 1501bf215546Sopenharmony_ci/* ABI: position first, then user, then psiz */ 1502bf215546Sopenharmony_cistatic void 1503bf215546Sopenharmony_ciagx_remap_varyings_vs(nir_shader *nir, struct agx_varyings *varyings, 1504bf215546Sopenharmony_ci unsigned *remap) 1505bf215546Sopenharmony_ci{ 1506bf215546Sopenharmony_ci unsigned base = 0; 1507bf215546Sopenharmony_ci 1508bf215546Sopenharmony_ci nir_variable *pos = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_POS); 1509bf215546Sopenharmony_ci if (pos) { 1510bf215546Sopenharmony_ci assert(pos->data.driver_location < AGX_MAX_VARYINGS); 1511bf215546Sopenharmony_ci remap[pos->data.driver_location] = base; 1512bf215546Sopenharmony_ci base += 4; 1513bf215546Sopenharmony_ci } 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci nir_foreach_shader_out_variable(var, nir) { 1516bf215546Sopenharmony_ci unsigned loc = var->data.location; 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci if(loc == VARYING_SLOT_POS || loc == VARYING_SLOT_PSIZ) { 1519bf215546Sopenharmony_ci continue; 1520bf215546Sopenharmony_ci } 1521bf215546Sopenharmony_ci 1522bf215546Sopenharmony_ci assert(var->data.driver_location < AGX_MAX_VARYINGS); 1523bf215546Sopenharmony_ci remap[var->data.driver_location] = base; 1524bf215546Sopenharmony_ci base += 4; 1525bf215546Sopenharmony_ci } 1526bf215546Sopenharmony_ci 1527bf215546Sopenharmony_ci nir_variable *psiz = nir_find_variable_with_location(nir, nir_var_shader_out, VARYING_SLOT_PSIZ); 1528bf215546Sopenharmony_ci if (psiz) { 1529bf215546Sopenharmony_ci assert(psiz->data.driver_location < AGX_MAX_VARYINGS); 1530bf215546Sopenharmony_ci remap[psiz->data.driver_location] = base; 1531bf215546Sopenharmony_ci base += 1; 1532bf215546Sopenharmony_ci } 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci varyings->nr_slots = base; 1535bf215546Sopenharmony_ci} 1536bf215546Sopenharmony_ci 1537bf215546Sopenharmony_cistatic void 1538bf215546Sopenharmony_ciagx_remap_varyings_fs(nir_shader *nir, struct agx_varyings *varyings, 1539bf215546Sopenharmony_ci unsigned *remap) 1540bf215546Sopenharmony_ci{ 1541bf215546Sopenharmony_ci struct agx_varying_packed *packed = varyings->packed; 1542bf215546Sopenharmony_ci unsigned base = 0; 1543bf215546Sopenharmony_ci 1544bf215546Sopenharmony_ci agx_pack(packed, VARYING, cfg) { 1545bf215546Sopenharmony_ci cfg.type = AGX_VARYING_TYPE_FRAGCOORD_W; 1546bf215546Sopenharmony_ci cfg.components = 1; 1547bf215546Sopenharmony_ci cfg.triangle_slot = cfg.point_slot = base; 1548bf215546Sopenharmony_ci } 1549bf215546Sopenharmony_ci 1550bf215546Sopenharmony_ci base++; 1551bf215546Sopenharmony_ci packed++; 1552bf215546Sopenharmony_ci 1553bf215546Sopenharmony_ci agx_pack(packed, VARYING, cfg) { 1554bf215546Sopenharmony_ci cfg.type = AGX_VARYING_TYPE_FRAGCOORD_Z; 1555bf215546Sopenharmony_ci cfg.components = 1; 1556bf215546Sopenharmony_ci cfg.triangle_slot = cfg.point_slot = base; 1557bf215546Sopenharmony_ci } 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci base++; 1560bf215546Sopenharmony_ci packed++; 1561bf215546Sopenharmony_ci 1562bf215546Sopenharmony_ci unsigned comps[MAX_VARYING] = { 0 }; 1563bf215546Sopenharmony_ci 1564bf215546Sopenharmony_ci nir_foreach_shader_in_variable(var, nir) { 1565bf215546Sopenharmony_ci unsigned loc = var->data.driver_location; 1566bf215546Sopenharmony_ci const struct glsl_type *column = 1567bf215546Sopenharmony_ci glsl_without_array_or_matrix(var->type); 1568bf215546Sopenharmony_ci unsigned chan = glsl_get_components(column); 1569bf215546Sopenharmony_ci 1570bf215546Sopenharmony_ci /* If we have a fractional location added, we need to increase the size 1571bf215546Sopenharmony_ci * so it will fit, i.e. a vec3 in YZW requires us to allocate a vec4. 1572bf215546Sopenharmony_ci * We could do better but this is an edge case as it is, normally 1573bf215546Sopenharmony_ci * packed varyings will be aligned. 1574bf215546Sopenharmony_ci */ 1575bf215546Sopenharmony_ci chan += var->data.location_frac; 1576bf215546Sopenharmony_ci comps[loc] = MAX2(comps[loc], chan); 1577bf215546Sopenharmony_ci } 1578bf215546Sopenharmony_ci 1579bf215546Sopenharmony_ci nir_foreach_shader_in_variable(var, nir) { 1580bf215546Sopenharmony_ci unsigned loc = var->data.driver_location; 1581bf215546Sopenharmony_ci unsigned sz = glsl_count_attribute_slots(var->type, FALSE); 1582bf215546Sopenharmony_ci unsigned channels = comps[loc]; 1583bf215546Sopenharmony_ci 1584bf215546Sopenharmony_ci assert(var->data.driver_location <= AGX_MAX_VARYINGS); 1585bf215546Sopenharmony_ci remap[var->data.driver_location] = base; 1586bf215546Sopenharmony_ci 1587bf215546Sopenharmony_ci for (int c = 0; c < sz; ++c) { 1588bf215546Sopenharmony_ci agx_pack(packed, VARYING, cfg) { 1589bf215546Sopenharmony_ci cfg.type = (var->data.location == VARYING_SLOT_PNTC) ? 1590bf215546Sopenharmony_ci AGX_VARYING_TYPE_POINT_COORDINATES : 1591bf215546Sopenharmony_ci (var->data.interpolation == INTERP_MODE_FLAT) ? 1592bf215546Sopenharmony_ci AGX_VARYING_TYPE_FLAT_LAST : 1593bf215546Sopenharmony_ci AGX_VARYING_TYPE_SMOOTH; 1594bf215546Sopenharmony_ci 1595bf215546Sopenharmony_ci cfg.components = channels; 1596bf215546Sopenharmony_ci cfg.triangle_slot = cfg.point_slot = base; 1597bf215546Sopenharmony_ci } 1598bf215546Sopenharmony_ci 1599bf215546Sopenharmony_ci base += channels; 1600bf215546Sopenharmony_ci packed++; 1601bf215546Sopenharmony_ci } 1602bf215546Sopenharmony_ci } 1603bf215546Sopenharmony_ci 1604bf215546Sopenharmony_ci varyings->nr_descs = (packed - varyings->packed); 1605bf215546Sopenharmony_ci varyings->nr_slots = base; 1606bf215546Sopenharmony_ci} 1607bf215546Sopenharmony_ci 1608bf215546Sopenharmony_ci/* 1609bf215546Sopenharmony_ci * Build a bit mask of varyings (by location) that are flatshaded. This 1610bf215546Sopenharmony_ci * information is needed by lower_mediump_io. 1611bf215546Sopenharmony_ci */ 1612bf215546Sopenharmony_cistatic uint64_t 1613bf215546Sopenharmony_ciagx_flat_varying_mask(nir_shader *nir) 1614bf215546Sopenharmony_ci{ 1615bf215546Sopenharmony_ci uint64_t mask = 0; 1616bf215546Sopenharmony_ci 1617bf215546Sopenharmony_ci assert(nir->info.stage == MESA_SHADER_FRAGMENT); 1618bf215546Sopenharmony_ci 1619bf215546Sopenharmony_ci nir_foreach_shader_in_variable(var, nir) { 1620bf215546Sopenharmony_ci if (var->data.interpolation == INTERP_MODE_FLAT) 1621bf215546Sopenharmony_ci mask |= BITFIELD64_BIT(var->data.location); 1622bf215546Sopenharmony_ci } 1623bf215546Sopenharmony_ci 1624bf215546Sopenharmony_ci return mask; 1625bf215546Sopenharmony_ci} 1626bf215546Sopenharmony_ci 1627bf215546Sopenharmony_civoid 1628bf215546Sopenharmony_ciagx_compile_shader_nir(nir_shader *nir, 1629bf215546Sopenharmony_ci struct agx_shader_key *key, 1630bf215546Sopenharmony_ci struct util_dynarray *binary, 1631bf215546Sopenharmony_ci struct agx_shader_info *out) 1632bf215546Sopenharmony_ci{ 1633bf215546Sopenharmony_ci agx_debug = debug_get_option_agx_debug(); 1634bf215546Sopenharmony_ci 1635bf215546Sopenharmony_ci agx_context *ctx = rzalloc(NULL, agx_context); 1636bf215546Sopenharmony_ci ctx->nir = nir; 1637bf215546Sopenharmony_ci ctx->out = out; 1638bf215546Sopenharmony_ci ctx->key = key; 1639bf215546Sopenharmony_ci ctx->stage = nir->info.stage; 1640bf215546Sopenharmony_ci list_inithead(&ctx->blocks); 1641bf215546Sopenharmony_ci 1642bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_VERTEX) { 1643bf215546Sopenharmony_ci out->writes_psiz = nir->info.outputs_written & 1644bf215546Sopenharmony_ci BITFIELD_BIT(VARYING_SLOT_PSIZ); 1645bf215546Sopenharmony_ci } 1646bf215546Sopenharmony_ci 1647bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_vars_to_ssa); 1648bf215546Sopenharmony_ci 1649bf215546Sopenharmony_ci /* Lower large arrays to scratch and small arrays to csel */ 1650bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_vars_to_scratch, nir_var_function_temp, 16, 1651bf215546Sopenharmony_ci glsl_get_natural_size_align_bytes); 1652bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_function_temp, ~0); 1653bf215546Sopenharmony_ci 1654bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_VERTEX) { 1655bf215546Sopenharmony_ci /* Lower from OpenGL [-1, 1] to [0, 1] if half-z is not set */ 1656bf215546Sopenharmony_ci if (!key->vs.clip_halfz) 1657bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_clip_halfz); 1658bf215546Sopenharmony_ci } 1659bf215546Sopenharmony_ci 1660bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_split_var_copies); 1661bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_global_vars_to_local); 1662bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_var_copies); 1663bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_vars_to_ssa); 1664bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out, 1665bf215546Sopenharmony_ci glsl_type_size, 0); 1666bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_FRAGMENT) { 1667bf215546Sopenharmony_ci /* Interpolate varyings at fp16 and write to the tilebuffer at fp16. As an 1668bf215546Sopenharmony_ci * exception, interpolate flat shaded at fp32. This works around a 1669bf215546Sopenharmony_ci * hardware limitation. The resulting code (with an extra f2f16 at the end 1670bf215546Sopenharmony_ci * if needed) matches what Metal produces. 1671bf215546Sopenharmony_ci */ 1672bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_mediump_io, 1673bf215546Sopenharmony_ci nir_var_shader_in | nir_var_shader_out, 1674bf215546Sopenharmony_ci ~agx_flat_varying_mask(nir), false); 1675bf215546Sopenharmony_ci } 1676bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_shader_instructions_pass, 1677bf215546Sopenharmony_ci agx_lower_aligned_offsets, 1678bf215546Sopenharmony_ci nir_metadata_block_index | nir_metadata_dominance, NULL); 1679bf215546Sopenharmony_ci 1680bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_ssbo); 1681bf215546Sopenharmony_ci 1682bf215546Sopenharmony_ci /* Varying output is scalar, other I/O is vector */ 1683bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_VERTEX) { 1684bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_io_to_scalar, nir_var_shader_out); 1685bf215546Sopenharmony_ci } 1686bf215546Sopenharmony_ci 1687bf215546Sopenharmony_ci nir_lower_tex_options lower_tex_options = { 1688bf215546Sopenharmony_ci .lower_txs_lod = true, 1689bf215546Sopenharmony_ci .lower_txp = ~0, 1690bf215546Sopenharmony_ci .lower_invalid_implicit_lod = true, 1691bf215546Sopenharmony_ci }; 1692bf215546Sopenharmony_ci 1693bf215546Sopenharmony_ci nir_tex_src_type_constraints tex_constraints = { 1694bf215546Sopenharmony_ci [nir_tex_src_lod] = { true, 16 }, 1695bf215546Sopenharmony_ci [nir_tex_src_bias] = { true, 16 }, 1696bf215546Sopenharmony_ci }; 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_tex, &lower_tex_options); 1699bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_legalize_16bit_sampler_srcs, tex_constraints); 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_ci agx_optimize_nir(nir); 1702bf215546Sopenharmony_ci 1703bf215546Sopenharmony_ci /* Implement conditional discard with real control flow like Metal */ 1704bf215546Sopenharmony_ci NIR_PASS_V(nir, nir_lower_discard_if); 1705bf215546Sopenharmony_ci 1706bf215546Sopenharmony_ci /* Must be last since NIR passes can remap driver_location freely */ 1707bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_VERTEX) { 1708bf215546Sopenharmony_ci agx_remap_varyings_vs(nir, &out->varyings, ctx->varyings); 1709bf215546Sopenharmony_ci } else if (ctx->stage == MESA_SHADER_FRAGMENT) { 1710bf215546Sopenharmony_ci agx_remap_varyings_fs(nir, &out->varyings, ctx->varyings); 1711bf215546Sopenharmony_ci } 1712bf215546Sopenharmony_ci 1713bf215546Sopenharmony_ci bool skip_internal = nir->info.internal; 1714bf215546Sopenharmony_ci skip_internal &= !(agx_debug & AGX_DBG_INTERNAL); 1715bf215546Sopenharmony_ci 1716bf215546Sopenharmony_ci if (agx_debug & AGX_DBG_SHADERS && !skip_internal) { 1717bf215546Sopenharmony_ci nir_print_shader(nir, stdout); 1718bf215546Sopenharmony_ci } 1719bf215546Sopenharmony_ci 1720bf215546Sopenharmony_ci ctx->allocated_vec = _mesa_hash_table_u64_create(ctx); 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci nir_foreach_function(func, nir) { 1723bf215546Sopenharmony_ci if (!func->impl) 1724bf215546Sopenharmony_ci continue; 1725bf215546Sopenharmony_ci 1726bf215546Sopenharmony_ci nir_index_blocks(func->impl); 1727bf215546Sopenharmony_ci 1728bf215546Sopenharmony_ci ctx->indexed_nir_blocks = 1729bf215546Sopenharmony_ci rzalloc_array(ctx, agx_block *, func->impl->num_blocks); 1730bf215546Sopenharmony_ci 1731bf215546Sopenharmony_ci ctx->alloc += func->impl->ssa_alloc; 1732bf215546Sopenharmony_ci emit_cf_list(ctx, &func->impl->body); 1733bf215546Sopenharmony_ci agx_emit_phis_deferred(ctx); 1734bf215546Sopenharmony_ci break; /* TODO: Multi-function shaders */ 1735bf215546Sopenharmony_ci } 1736bf215546Sopenharmony_ci 1737bf215546Sopenharmony_ci /* Terminate the shader after the exit block */ 1738bf215546Sopenharmony_ci agx_block *last_block = list_last_entry(&ctx->blocks, agx_block, link); 1739bf215546Sopenharmony_ci agx_builder _b = agx_init_builder(ctx, agx_after_block(last_block)); 1740bf215546Sopenharmony_ci agx_stop(&_b); 1741bf215546Sopenharmony_ci 1742bf215546Sopenharmony_ci /* Also add traps to match the blob, unsure what the function is */ 1743bf215546Sopenharmony_ci for (unsigned i = 0; i < 8; ++i) 1744bf215546Sopenharmony_ci agx_trap(&_b); 1745bf215546Sopenharmony_ci 1746bf215546Sopenharmony_ci /* Index blocks now that we're done emitting so the order is consistent */ 1747bf215546Sopenharmony_ci agx_foreach_block(ctx, block) 1748bf215546Sopenharmony_ci block->index = ctx->num_blocks++; 1749bf215546Sopenharmony_ci 1750bf215546Sopenharmony_ci agx_validate(ctx, "IR translation"); 1751bf215546Sopenharmony_ci 1752bf215546Sopenharmony_ci if (agx_debug & AGX_DBG_SHADERS && !skip_internal) 1753bf215546Sopenharmony_ci agx_print_shader(ctx, stdout); 1754bf215546Sopenharmony_ci 1755bf215546Sopenharmony_ci agx_optimizer(ctx); 1756bf215546Sopenharmony_ci agx_dce(ctx); 1757bf215546Sopenharmony_ci agx_validate(ctx, "Optimization"); 1758bf215546Sopenharmony_ci 1759bf215546Sopenharmony_ci if (agx_debug & AGX_DBG_SHADERS && !skip_internal) 1760bf215546Sopenharmony_ci agx_print_shader(ctx, stdout); 1761bf215546Sopenharmony_ci 1762bf215546Sopenharmony_ci agx_ra(ctx); 1763bf215546Sopenharmony_ci 1764bf215546Sopenharmony_ci if (ctx->stage == MESA_SHADER_VERTEX) 1765bf215546Sopenharmony_ci agx_set_st_vary_final(ctx); 1766bf215546Sopenharmony_ci 1767bf215546Sopenharmony_ci if (agx_debug & AGX_DBG_SHADERS && !skip_internal) 1768bf215546Sopenharmony_ci agx_print_shader(ctx, stdout); 1769bf215546Sopenharmony_ci 1770bf215546Sopenharmony_ci agx_lower_pseudo(ctx); 1771bf215546Sopenharmony_ci 1772bf215546Sopenharmony_ci agx_pack_binary(ctx, binary); 1773bf215546Sopenharmony_ci 1774bf215546Sopenharmony_ci if ((agx_debug & AGX_DBG_SHADERDB) && !skip_internal) 1775bf215546Sopenharmony_ci agx_print_stats(ctx, binary->size, stderr); 1776bf215546Sopenharmony_ci 1777bf215546Sopenharmony_ci ralloc_free(ctx); 1778bf215546Sopenharmony_ci} 1779