1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci * Authors (Collabora): 24bf215546Sopenharmony_ci * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com> 25bf215546Sopenharmony_ci */ 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "compiler.h" 28bf215546Sopenharmony_ci#include "bi_builder.h" 29bf215546Sopenharmony_ci 30bf215546Sopenharmony_ci/* Arguments common to worklist, passed by value for convenience */ 31bf215546Sopenharmony_ci 32bf215546Sopenharmony_cistruct bi_worklist { 33bf215546Sopenharmony_ci /* # of instructions in the block */ 34bf215546Sopenharmony_ci unsigned count; 35bf215546Sopenharmony_ci 36bf215546Sopenharmony_ci /* Instructions in the block */ 37bf215546Sopenharmony_ci bi_instr **instructions; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_ci /* Bitset of instructions in the block ready for scheduling */ 40bf215546Sopenharmony_ci BITSET_WORD *worklist; 41bf215546Sopenharmony_ci 42bf215546Sopenharmony_ci /* The backwards dependency graph. nr_dependencies is the number of 43bf215546Sopenharmony_ci * unscheduled instructions that must still be scheduled after (before) 44bf215546Sopenharmony_ci * this instruction. dependents are which instructions need to be 45bf215546Sopenharmony_ci * scheduled before (after) this instruction. */ 46bf215546Sopenharmony_ci unsigned *dep_counts; 47bf215546Sopenharmony_ci BITSET_WORD **dependents; 48bf215546Sopenharmony_ci}; 49bf215546Sopenharmony_ci 50bf215546Sopenharmony_ci/* State of a single tuple and clause under construction */ 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_cistruct bi_reg_state { 53bf215546Sopenharmony_ci /* Number of register writes */ 54bf215546Sopenharmony_ci unsigned nr_writes; 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci /* Register reads, expressed as (equivalence classes of) 57bf215546Sopenharmony_ci * sources. Only 3 reads are allowed, but up to 2 may spill as 58bf215546Sopenharmony_ci * "forced" for the next scheduled tuple, provided such a tuple 59bf215546Sopenharmony_ci * can be constructed */ 60bf215546Sopenharmony_ci bi_index reads[5]; 61bf215546Sopenharmony_ci unsigned nr_reads; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci /* The previous tuple scheduled (= the next tuple executed in the 64bf215546Sopenharmony_ci * program) may require certain writes, in order to bypass the register 65bf215546Sopenharmony_ci * file and use a temporary passthrough for the value. Up to 2 such 66bf215546Sopenharmony_ci * constraints are architecturally satisfiable */ 67bf215546Sopenharmony_ci unsigned forced_count; 68bf215546Sopenharmony_ci bi_index forceds[2]; 69bf215546Sopenharmony_ci}; 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_cistruct bi_tuple_state { 72bf215546Sopenharmony_ci /* Is this the last tuple in the clause */ 73bf215546Sopenharmony_ci bool last; 74bf215546Sopenharmony_ci 75bf215546Sopenharmony_ci /* Scheduled ADD instruction, or null if none */ 76bf215546Sopenharmony_ci bi_instr *add; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci /* Reads for previous (succeeding) tuple */ 79bf215546Sopenharmony_ci bi_index prev_reads[5]; 80bf215546Sopenharmony_ci unsigned nr_prev_reads; 81bf215546Sopenharmony_ci bi_tuple *prev; 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_ci /* Register slot state for current tuple */ 84bf215546Sopenharmony_ci struct bi_reg_state reg; 85bf215546Sopenharmony_ci 86bf215546Sopenharmony_ci /* Constants are shared in the tuple. If constant_count is nonzero, it 87bf215546Sopenharmony_ci * is a size for constant count. Otherwise, fau is the slot read from 88bf215546Sopenharmony_ci * FAU, or zero if none is assigned. Ordinarily FAU slot 0 reads zero, 89bf215546Sopenharmony_ci * but within a tuple, that should be encoded as constant_count != 0 90bf215546Sopenharmony_ci * and constants[0] = constants[1] = 0 */ 91bf215546Sopenharmony_ci unsigned constant_count; 92bf215546Sopenharmony_ci 93bf215546Sopenharmony_ci union { 94bf215546Sopenharmony_ci uint32_t constants[2]; 95bf215546Sopenharmony_ci enum bir_fau fau; 96bf215546Sopenharmony_ci }; 97bf215546Sopenharmony_ci 98bf215546Sopenharmony_ci unsigned pcrel_idx; 99bf215546Sopenharmony_ci}; 100bf215546Sopenharmony_ci 101bf215546Sopenharmony_cistruct bi_const_state { 102bf215546Sopenharmony_ci unsigned constant_count; 103bf215546Sopenharmony_ci bool pcrel; /* applies to first const */ 104bf215546Sopenharmony_ci uint32_t constants[2]; 105bf215546Sopenharmony_ci 106bf215546Sopenharmony_ci /* Index of the constant into the clause */ 107bf215546Sopenharmony_ci unsigned word_idx; 108bf215546Sopenharmony_ci}; 109bf215546Sopenharmony_ci 110bf215546Sopenharmony_cienum bi_ftz_state { 111bf215546Sopenharmony_ci /* No flush-to-zero state assigned yet */ 112bf215546Sopenharmony_ci BI_FTZ_STATE_NONE, 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci /* Never flush-to-zero */ 115bf215546Sopenharmony_ci BI_FTZ_STATE_DISABLE, 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci /* Always flush-to-zero */ 118bf215546Sopenharmony_ci BI_FTZ_STATE_ENABLE, 119bf215546Sopenharmony_ci}; 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistruct bi_clause_state { 122bf215546Sopenharmony_ci /* Has a message-passing instruction already been assigned? */ 123bf215546Sopenharmony_ci bool message; 124bf215546Sopenharmony_ci 125bf215546Sopenharmony_ci /* Indices already accessed, this needs to be tracked to avoid hazards 126bf215546Sopenharmony_ci * around message-passing instructions */ 127bf215546Sopenharmony_ci unsigned access_count; 128bf215546Sopenharmony_ci bi_index accesses[(BI_MAX_SRCS + BI_MAX_DESTS) * 16]; 129bf215546Sopenharmony_ci 130bf215546Sopenharmony_ci unsigned tuple_count; 131bf215546Sopenharmony_ci struct bi_const_state consts[8]; 132bf215546Sopenharmony_ci 133bf215546Sopenharmony_ci /* Numerical state of the clause */ 134bf215546Sopenharmony_ci enum bi_ftz_state ftz; 135bf215546Sopenharmony_ci}; 136bf215546Sopenharmony_ci 137bf215546Sopenharmony_ci/* Determines messsage type by checking the table and a few special cases. Only 138bf215546Sopenharmony_ci * case missing is tilebuffer instructions that access depth/stencil, which 139bf215546Sopenharmony_ci * require a Z_STENCIL message (to implement 140bf215546Sopenharmony_ci * ARM_shader_framebuffer_fetch_depth_stencil) */ 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_cistatic enum bifrost_message_type 143bf215546Sopenharmony_cibi_message_type_for_instr(bi_instr *ins) 144bf215546Sopenharmony_ci{ 145bf215546Sopenharmony_ci enum bifrost_message_type msg = bi_opcode_props[ins->op].message; 146bf215546Sopenharmony_ci bool ld_var_special = (ins->op == BI_OPCODE_LD_VAR_SPECIAL); 147bf215546Sopenharmony_ci 148bf215546Sopenharmony_ci if (ld_var_special && ins->varying_name == BI_VARYING_NAME_FRAG_Z) 149bf215546Sopenharmony_ci return BIFROST_MESSAGE_Z_STENCIL; 150bf215546Sopenharmony_ci 151bf215546Sopenharmony_ci if (msg == BIFROST_MESSAGE_LOAD && ins->seg == BI_SEG_UBO) 152bf215546Sopenharmony_ci return BIFROST_MESSAGE_ATTRIBUTE; 153bf215546Sopenharmony_ci 154bf215546Sopenharmony_ci return msg; 155bf215546Sopenharmony_ci} 156bf215546Sopenharmony_ci 157bf215546Sopenharmony_ci/* Attribute, texture, and UBO load (attribute message) instructions support 158bf215546Sopenharmony_ci * bindless, so just check the message type */ 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_ciASSERTED static bool 161bf215546Sopenharmony_cibi_supports_dtsel(bi_instr *ins) 162bf215546Sopenharmony_ci{ 163bf215546Sopenharmony_ci switch (bi_message_type_for_instr(ins)) { 164bf215546Sopenharmony_ci case BIFROST_MESSAGE_ATTRIBUTE: 165bf215546Sopenharmony_ci return ins->op != BI_OPCODE_LD_GCLK_U64; 166bf215546Sopenharmony_ci case BIFROST_MESSAGE_TEX: 167bf215546Sopenharmony_ci return true; 168bf215546Sopenharmony_ci default: 169bf215546Sopenharmony_ci return false; 170bf215546Sopenharmony_ci } 171bf215546Sopenharmony_ci} 172bf215546Sopenharmony_ci 173bf215546Sopenharmony_ci/* Adds an edge to the dependency graph */ 174bf215546Sopenharmony_ci 175bf215546Sopenharmony_cistatic void 176bf215546Sopenharmony_cibi_push_dependency(unsigned parent, unsigned child, 177bf215546Sopenharmony_ci BITSET_WORD **dependents, unsigned *dep_counts) 178bf215546Sopenharmony_ci{ 179bf215546Sopenharmony_ci if (!BITSET_TEST(dependents[parent], child)) { 180bf215546Sopenharmony_ci BITSET_SET(dependents[parent], child); 181bf215546Sopenharmony_ci dep_counts[child]++; 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci} 184bf215546Sopenharmony_ci 185bf215546Sopenharmony_cistatic void 186bf215546Sopenharmony_ciadd_dependency(struct util_dynarray *table, unsigned index, unsigned child, 187bf215546Sopenharmony_ci BITSET_WORD **dependents, unsigned *dep_counts) 188bf215546Sopenharmony_ci{ 189bf215546Sopenharmony_ci assert(index < 64); 190bf215546Sopenharmony_ci util_dynarray_foreach(table + index, unsigned, parent) 191bf215546Sopenharmony_ci bi_push_dependency(*parent, child, dependents, dep_counts); 192bf215546Sopenharmony_ci} 193bf215546Sopenharmony_ci 194bf215546Sopenharmony_cistatic void 195bf215546Sopenharmony_cimark_access(struct util_dynarray *table, unsigned index, unsigned parent) 196bf215546Sopenharmony_ci{ 197bf215546Sopenharmony_ci assert(index < 64); 198bf215546Sopenharmony_ci util_dynarray_append(&table[index], unsigned, parent); 199bf215546Sopenharmony_ci} 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_cistatic bool 202bf215546Sopenharmony_cibi_is_sched_barrier(bi_instr *I) 203bf215546Sopenharmony_ci{ 204bf215546Sopenharmony_ci switch (I->op) { 205bf215546Sopenharmony_ci case BI_OPCODE_BARRIER: 206bf215546Sopenharmony_ci case BI_OPCODE_DISCARD_F32: 207bf215546Sopenharmony_ci return true; 208bf215546Sopenharmony_ci default: 209bf215546Sopenharmony_ci return false; 210bf215546Sopenharmony_ci } 211bf215546Sopenharmony_ci} 212bf215546Sopenharmony_ci 213bf215546Sopenharmony_cistatic void 214bf215546Sopenharmony_cibi_create_dependency_graph(struct bi_worklist st, bool inorder, bool is_blend) 215bf215546Sopenharmony_ci{ 216bf215546Sopenharmony_ci struct util_dynarray last_read[64], last_write[64]; 217bf215546Sopenharmony_ci 218bf215546Sopenharmony_ci for (unsigned i = 0; i < 64; ++i) { 219bf215546Sopenharmony_ci util_dynarray_init(&last_read[i], NULL); 220bf215546Sopenharmony_ci util_dynarray_init(&last_write[i], NULL); 221bf215546Sopenharmony_ci } 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci /* Initialize dependency graph */ 224bf215546Sopenharmony_ci for (unsigned i = 0; i < st.count; ++i) { 225bf215546Sopenharmony_ci st.dependents[i] = 226bf215546Sopenharmony_ci calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD)); 227bf215546Sopenharmony_ci 228bf215546Sopenharmony_ci st.dep_counts[i] = 0; 229bf215546Sopenharmony_ci } 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci unsigned prev_msg = ~0; 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci /* Populate dependency graph */ 234bf215546Sopenharmony_ci for (signed i = st.count - 1; i >= 0; --i) { 235bf215546Sopenharmony_ci bi_instr *ins = st.instructions[i]; 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci bi_foreach_src(ins, s) { 238bf215546Sopenharmony_ci if (ins->src[s].type != BI_INDEX_REGISTER) continue; 239bf215546Sopenharmony_ci unsigned count = bi_count_read_registers(ins, s); 240bf215546Sopenharmony_ci 241bf215546Sopenharmony_ci for (unsigned c = 0; c < count; ++c) 242bf215546Sopenharmony_ci add_dependency(last_write, ins->src[s].value + c, i, st.dependents, st.dep_counts); 243bf215546Sopenharmony_ci } 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ci /* Keep message-passing ops in order. (This pass only cares 246bf215546Sopenharmony_ci * about bundling; reordering of message-passing instructions 247bf215546Sopenharmony_ci * happens during earlier scheduling.) */ 248bf215546Sopenharmony_ci 249bf215546Sopenharmony_ci if (bi_message_type_for_instr(ins)) { 250bf215546Sopenharmony_ci if (prev_msg != ~0) 251bf215546Sopenharmony_ci bi_push_dependency(prev_msg, i, st.dependents, st.dep_counts); 252bf215546Sopenharmony_ci 253bf215546Sopenharmony_ci prev_msg = i; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci /* Handle schedule barriers, adding All the deps */ 257bf215546Sopenharmony_ci if (inorder || bi_is_sched_barrier(ins)) { 258bf215546Sopenharmony_ci for (unsigned j = 0; j < st.count; ++j) { 259bf215546Sopenharmony_ci if (i == j) continue; 260bf215546Sopenharmony_ci 261bf215546Sopenharmony_ci bi_push_dependency(MAX2(i, j), MIN2(i, j), 262bf215546Sopenharmony_ci st.dependents, st.dep_counts); 263bf215546Sopenharmony_ci } 264bf215546Sopenharmony_ci } 265bf215546Sopenharmony_ci 266bf215546Sopenharmony_ci bi_foreach_dest(ins, d) { 267bf215546Sopenharmony_ci if (ins->dest[d].type != BI_INDEX_REGISTER) continue; 268bf215546Sopenharmony_ci unsigned dest = ins->dest[d].value; 269bf215546Sopenharmony_ci 270bf215546Sopenharmony_ci unsigned count = bi_count_write_registers(ins, d); 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci for (unsigned c = 0; c < count; ++c) { 273bf215546Sopenharmony_ci add_dependency(last_read, dest + c, i, st.dependents, st.dep_counts); 274bf215546Sopenharmony_ci add_dependency(last_write, dest + c, i, st.dependents, st.dep_counts); 275bf215546Sopenharmony_ci mark_access(last_write, dest + c, i); 276bf215546Sopenharmony_ci } 277bf215546Sopenharmony_ci } 278bf215546Sopenharmony_ci 279bf215546Sopenharmony_ci /* Blend shaders are allowed to clobber R0-R15. Treat these 280bf215546Sopenharmony_ci * registers like extra destinations for scheduling purposes. 281bf215546Sopenharmony_ci */ 282bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_BLEND && !is_blend) { 283bf215546Sopenharmony_ci for (unsigned c = 0; c < 16; ++c) { 284bf215546Sopenharmony_ci add_dependency(last_read, c, i, st.dependents, st.dep_counts); 285bf215546Sopenharmony_ci add_dependency(last_write, c, i, st.dependents, st.dep_counts); 286bf215546Sopenharmony_ci mark_access(last_write, c, i); 287bf215546Sopenharmony_ci } 288bf215546Sopenharmony_ci } 289bf215546Sopenharmony_ci 290bf215546Sopenharmony_ci bi_foreach_src(ins, s) { 291bf215546Sopenharmony_ci if (ins->src[s].type != BI_INDEX_REGISTER) continue; 292bf215546Sopenharmony_ci 293bf215546Sopenharmony_ci unsigned count = bi_count_read_registers(ins, s); 294bf215546Sopenharmony_ci 295bf215546Sopenharmony_ci for (unsigned c = 0; c < count; ++c) 296bf215546Sopenharmony_ci mark_access(last_read, ins->src[s].value + c, i); 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci 300bf215546Sopenharmony_ci /* If there is a branch, all instructions depend on it, as interblock 301bf215546Sopenharmony_ci * execution must be purely in-order */ 302bf215546Sopenharmony_ci 303bf215546Sopenharmony_ci bi_instr *last = st.instructions[st.count - 1]; 304bf215546Sopenharmony_ci if (last->branch_target || last->op == BI_OPCODE_JUMP) { 305bf215546Sopenharmony_ci for (signed i = st.count - 2; i >= 0; --i) 306bf215546Sopenharmony_ci bi_push_dependency(st.count - 1, i, st.dependents, st.dep_counts); 307bf215546Sopenharmony_ci } 308bf215546Sopenharmony_ci 309bf215546Sopenharmony_ci /* Free the intermediate structures */ 310bf215546Sopenharmony_ci for (unsigned i = 0; i < 64; ++i) { 311bf215546Sopenharmony_ci util_dynarray_fini(&last_read[i]); 312bf215546Sopenharmony_ci util_dynarray_fini(&last_write[i]); 313bf215546Sopenharmony_ci } 314bf215546Sopenharmony_ci} 315bf215546Sopenharmony_ci 316bf215546Sopenharmony_ci/* Scheduler pseudoinstruction lowerings to enable instruction pairings. 317bf215546Sopenharmony_ci * Currently only support CUBEFACE -> *CUBEFACE1/+CUBEFACE2 318bf215546Sopenharmony_ci */ 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_cistatic bi_instr * 321bf215546Sopenharmony_cibi_lower_cubeface(bi_context *ctx, 322bf215546Sopenharmony_ci struct bi_clause_state *clause, struct bi_tuple_state *tuple) 323bf215546Sopenharmony_ci{ 324bf215546Sopenharmony_ci bi_instr *pinstr = tuple->add; 325bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr)); 326bf215546Sopenharmony_ci bi_instr *cubeface1 = bi_cubeface1_to(&b, pinstr->dest[0], 327bf215546Sopenharmony_ci pinstr->src[0], pinstr->src[1], pinstr->src[2]); 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci pinstr->op = BI_OPCODE_CUBEFACE2; 330bf215546Sopenharmony_ci pinstr->dest[0] = pinstr->dest[1]; 331bf215546Sopenharmony_ci pinstr->dest[1] = bi_null(); 332bf215546Sopenharmony_ci pinstr->src[0] = cubeface1->dest[0]; 333bf215546Sopenharmony_ci pinstr->src[1] = bi_null(); 334bf215546Sopenharmony_ci pinstr->src[2] = bi_null(); 335bf215546Sopenharmony_ci 336bf215546Sopenharmony_ci return cubeface1; 337bf215546Sopenharmony_ci} 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci/* Psuedo arguments are (rbase, address lo, address hi). We need *ATOM_C.i32 to 340bf215546Sopenharmony_ci * have the arguments (address lo, address hi, rbase), and +ATOM_CX to have the 341bf215546Sopenharmony_ci * arguments (rbase, address lo, address hi, rbase) */ 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_cistatic bi_instr * 344bf215546Sopenharmony_cibi_lower_atom_c(bi_context *ctx, struct bi_clause_state *clause, struct 345bf215546Sopenharmony_ci bi_tuple_state *tuple) 346bf215546Sopenharmony_ci{ 347bf215546Sopenharmony_ci bi_instr *pinstr = tuple->add; 348bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr)); 349bf215546Sopenharmony_ci bi_instr *atom_c = bi_atom_c_return_i32(&b, 350bf215546Sopenharmony_ci pinstr->src[1], pinstr->src[2], pinstr->src[0], 351bf215546Sopenharmony_ci pinstr->atom_opc); 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci if (bi_is_null(pinstr->dest[0])) 354bf215546Sopenharmony_ci atom_c->op = BI_OPCODE_ATOM_C_I32; 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci pinstr->op = BI_OPCODE_ATOM_CX; 357bf215546Sopenharmony_ci pinstr->src[3] = atom_c->src[2]; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci return atom_c; 360bf215546Sopenharmony_ci} 361bf215546Sopenharmony_ci 362bf215546Sopenharmony_cistatic bi_instr * 363bf215546Sopenharmony_cibi_lower_atom_c1(bi_context *ctx, struct bi_clause_state *clause, struct 364bf215546Sopenharmony_ci bi_tuple_state *tuple) 365bf215546Sopenharmony_ci{ 366bf215546Sopenharmony_ci bi_instr *pinstr = tuple->add; 367bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr)); 368bf215546Sopenharmony_ci bi_instr *atom_c = bi_atom_c1_return_i32(&b, 369bf215546Sopenharmony_ci pinstr->src[0], pinstr->src[1], pinstr->atom_opc); 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (bi_is_null(pinstr->dest[0])) 372bf215546Sopenharmony_ci atom_c->op = BI_OPCODE_ATOM_C1_I32; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci pinstr->op = BI_OPCODE_ATOM_CX; 375bf215546Sopenharmony_ci pinstr->src[2] = pinstr->src[1]; 376bf215546Sopenharmony_ci pinstr->src[1] = pinstr->src[0]; 377bf215546Sopenharmony_ci pinstr->src[3] = bi_dontcare(&b); 378bf215546Sopenharmony_ci pinstr->src[0] = bi_null(); 379bf215546Sopenharmony_ci 380bf215546Sopenharmony_ci return atom_c; 381bf215546Sopenharmony_ci} 382bf215546Sopenharmony_ci 383bf215546Sopenharmony_cistatic bi_instr * 384bf215546Sopenharmony_cibi_lower_seg_add(bi_context *ctx, 385bf215546Sopenharmony_ci struct bi_clause_state *clause, struct bi_tuple_state *tuple) 386bf215546Sopenharmony_ci{ 387bf215546Sopenharmony_ci bi_instr *pinstr = tuple->add; 388bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_instr(pinstr)); 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci bi_instr *fma = bi_seg_add_to(&b, pinstr->dest[0], pinstr->src[0], 391bf215546Sopenharmony_ci pinstr->preserve_null, pinstr->seg); 392bf215546Sopenharmony_ci 393bf215546Sopenharmony_ci pinstr->op = BI_OPCODE_SEG_ADD; 394bf215546Sopenharmony_ci pinstr->src[0] = pinstr->src[1]; 395bf215546Sopenharmony_ci pinstr->src[1] = bi_null(); 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci assert(pinstr->dest[0].type == BI_INDEX_REGISTER); 398bf215546Sopenharmony_ci pinstr->dest[0].value += 1; 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci return fma; 401bf215546Sopenharmony_ci} 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_cistatic bi_instr * 404bf215546Sopenharmony_cibi_lower_dtsel(bi_context *ctx, 405bf215546Sopenharmony_ci struct bi_clause_state *clause, struct bi_tuple_state *tuple) 406bf215546Sopenharmony_ci{ 407bf215546Sopenharmony_ci bi_instr *add = tuple->add; 408bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_instr(add)); 409bf215546Sopenharmony_ci 410bf215546Sopenharmony_ci bi_instr *dtsel = bi_dtsel_imm_to(&b, bi_temp(b.shader), 411bf215546Sopenharmony_ci add->src[0], add->table); 412bf215546Sopenharmony_ci add->src[0] = dtsel->dest[0]; 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci assert(bi_supports_dtsel(add)); 415bf215546Sopenharmony_ci return dtsel; 416bf215546Sopenharmony_ci} 417bf215546Sopenharmony_ci 418bf215546Sopenharmony_ci/* Flatten linked list to array for O(1) indexing */ 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_cistatic bi_instr ** 421bf215546Sopenharmony_cibi_flatten_block(bi_block *block, unsigned *len) 422bf215546Sopenharmony_ci{ 423bf215546Sopenharmony_ci if (list_is_empty(&block->instructions)) 424bf215546Sopenharmony_ci return NULL; 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci *len = list_length(&block->instructions); 427bf215546Sopenharmony_ci bi_instr **instructions = malloc(sizeof(bi_instr *) * (*len)); 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci unsigned i = 0; 430bf215546Sopenharmony_ci 431bf215546Sopenharmony_ci bi_foreach_instr_in_block(block, ins) 432bf215546Sopenharmony_ci instructions[i++] = ins; 433bf215546Sopenharmony_ci 434bf215546Sopenharmony_ci return instructions; 435bf215546Sopenharmony_ci} 436bf215546Sopenharmony_ci 437bf215546Sopenharmony_ci/* The worklist would track instructions without outstanding dependencies. For 438bf215546Sopenharmony_ci * debug, force in-order scheduling (no dependency graph is constructed). 439bf215546Sopenharmony_ci */ 440bf215546Sopenharmony_ci 441bf215546Sopenharmony_cistatic struct bi_worklist 442bf215546Sopenharmony_cibi_initialize_worklist(bi_block *block, bool inorder, bool is_blend) 443bf215546Sopenharmony_ci{ 444bf215546Sopenharmony_ci struct bi_worklist st = { }; 445bf215546Sopenharmony_ci st.instructions = bi_flatten_block(block, &st.count); 446bf215546Sopenharmony_ci 447bf215546Sopenharmony_ci if (!st.count) 448bf215546Sopenharmony_ci return st; 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci st.dependents = calloc(st.count, sizeof(st.dependents[0])); 451bf215546Sopenharmony_ci st.dep_counts = calloc(st.count, sizeof(st.dep_counts[0])); 452bf215546Sopenharmony_ci 453bf215546Sopenharmony_ci bi_create_dependency_graph(st, inorder, is_blend); 454bf215546Sopenharmony_ci st.worklist = calloc(BITSET_WORDS(st.count), sizeof(BITSET_WORD)); 455bf215546Sopenharmony_ci 456bf215546Sopenharmony_ci for (unsigned i = 0; i < st.count; ++i) { 457bf215546Sopenharmony_ci if (st.dep_counts[i] == 0) 458bf215546Sopenharmony_ci BITSET_SET(st.worklist, i); 459bf215546Sopenharmony_ci } 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci return st; 462bf215546Sopenharmony_ci} 463bf215546Sopenharmony_ci 464bf215546Sopenharmony_cistatic void 465bf215546Sopenharmony_cibi_free_worklist(struct bi_worklist st) 466bf215546Sopenharmony_ci{ 467bf215546Sopenharmony_ci free(st.dep_counts); 468bf215546Sopenharmony_ci free(st.dependents); 469bf215546Sopenharmony_ci free(st.instructions); 470bf215546Sopenharmony_ci free(st.worklist); 471bf215546Sopenharmony_ci} 472bf215546Sopenharmony_ci 473bf215546Sopenharmony_cistatic void 474bf215546Sopenharmony_cibi_update_worklist(struct bi_worklist st, unsigned idx) 475bf215546Sopenharmony_ci{ 476bf215546Sopenharmony_ci assert(st.dep_counts[idx] == 0); 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ci if (!st.dependents[idx]) 479bf215546Sopenharmony_ci return; 480bf215546Sopenharmony_ci 481bf215546Sopenharmony_ci /* Iterate each dependent to remove one dependency (`done`), 482bf215546Sopenharmony_ci * adding dependents to the worklist where possible. */ 483bf215546Sopenharmony_ci 484bf215546Sopenharmony_ci unsigned i; 485bf215546Sopenharmony_ci BITSET_FOREACH_SET(i, st.dependents[idx], st.count) { 486bf215546Sopenharmony_ci assert(st.dep_counts[i] != 0); 487bf215546Sopenharmony_ci unsigned new_deps = --st.dep_counts[i]; 488bf215546Sopenharmony_ci 489bf215546Sopenharmony_ci if (new_deps == 0) 490bf215546Sopenharmony_ci BITSET_SET(st.worklist, i); 491bf215546Sopenharmony_ci } 492bf215546Sopenharmony_ci 493bf215546Sopenharmony_ci free(st.dependents[idx]); 494bf215546Sopenharmony_ci} 495bf215546Sopenharmony_ci 496bf215546Sopenharmony_ci/* Scheduler predicates */ 497bf215546Sopenharmony_ci 498bf215546Sopenharmony_ci/* IADDC.i32 can implement IADD.u32 if no saturation or swizzling is in use */ 499bf215546Sopenharmony_cistatic bool 500bf215546Sopenharmony_cibi_can_iaddc(bi_instr *ins) 501bf215546Sopenharmony_ci{ 502bf215546Sopenharmony_ci return (ins->op == BI_OPCODE_IADD_U32 && !ins->saturate && 503bf215546Sopenharmony_ci ins->src[0].swizzle == BI_SWIZZLE_H01 && 504bf215546Sopenharmony_ci ins->src[1].swizzle == BI_SWIZZLE_H01); 505bf215546Sopenharmony_ci} 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci/* 508bf215546Sopenharmony_ci * The encoding of *FADD.v2f16 only specifies a single abs flag. All abs 509bf215546Sopenharmony_ci * encodings are permitted by swapping operands; however, this scheme fails if 510bf215546Sopenharmony_ci * both operands are equal. Test for this case. 511bf215546Sopenharmony_ci */ 512bf215546Sopenharmony_cistatic bool 513bf215546Sopenharmony_cibi_impacted_abs(bi_instr *I) 514bf215546Sopenharmony_ci{ 515bf215546Sopenharmony_ci return I->src[0].abs && I->src[1].abs && 516bf215546Sopenharmony_ci bi_is_word_equiv(I->src[0], I->src[1]); 517bf215546Sopenharmony_ci} 518bf215546Sopenharmony_ci 519bf215546Sopenharmony_cibool 520bf215546Sopenharmony_cibi_can_fma(bi_instr *ins) 521bf215546Sopenharmony_ci{ 522bf215546Sopenharmony_ci /* +IADD.i32 -> *IADDC.i32 */ 523bf215546Sopenharmony_ci if (bi_can_iaddc(ins)) 524bf215546Sopenharmony_ci return true; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci /* +MUX -> *CSEL */ 527bf215546Sopenharmony_ci if (bi_can_replace_with_csel(ins)) 528bf215546Sopenharmony_ci return true; 529bf215546Sopenharmony_ci 530bf215546Sopenharmony_ci /* *FADD.v2f16 has restricted abs modifiers, use +FADD.v2f16 instead */ 531bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_FADD_V2F16 && bi_impacted_abs(ins)) 532bf215546Sopenharmony_ci return false; 533bf215546Sopenharmony_ci 534bf215546Sopenharmony_ci /* TODO: some additional fp16 constraints */ 535bf215546Sopenharmony_ci return bi_opcode_props[ins->op].fma; 536bf215546Sopenharmony_ci} 537bf215546Sopenharmony_ci 538bf215546Sopenharmony_cistatic bool 539bf215546Sopenharmony_cibi_impacted_fadd_widens(bi_instr *I) 540bf215546Sopenharmony_ci{ 541bf215546Sopenharmony_ci enum bi_swizzle swz0 = I->src[0].swizzle; 542bf215546Sopenharmony_ci enum bi_swizzle swz1 = I->src[1].swizzle; 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci return (swz0 == BI_SWIZZLE_H00 && swz1 == BI_SWIZZLE_H11) || 545bf215546Sopenharmony_ci (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H11) || 546bf215546Sopenharmony_ci (swz0 == BI_SWIZZLE_H11 && swz1 == BI_SWIZZLE_H00); 547bf215546Sopenharmony_ci} 548bf215546Sopenharmony_ci 549bf215546Sopenharmony_cibool 550bf215546Sopenharmony_cibi_can_add(bi_instr *ins) 551bf215546Sopenharmony_ci{ 552bf215546Sopenharmony_ci /* +FADD.v2f16 lacks clamp modifier, use *FADD.v2f16 instead */ 553bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_FADD_V2F16 && ins->clamp) 554bf215546Sopenharmony_ci return false; 555bf215546Sopenharmony_ci 556bf215546Sopenharmony_ci /* +FCMP.v2f16 lacks abs modifier, use *FCMP.v2f16 instead */ 557bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_FCMP_V2F16 && (ins->src[0].abs || ins->src[1].abs)) 558bf215546Sopenharmony_ci return false; 559bf215546Sopenharmony_ci 560bf215546Sopenharmony_ci /* +FADD.f32 has restricted widens, use +FADD.f32 for the full set */ 561bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_FADD_F32 && bi_impacted_fadd_widens(ins)) 562bf215546Sopenharmony_ci return false; 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci /* TODO: some additional fp16 constraints */ 565bf215546Sopenharmony_ci return bi_opcode_props[ins->op].add; 566bf215546Sopenharmony_ci} 567bf215546Sopenharmony_ci 568bf215546Sopenharmony_ci/* Architecturally, no single instruction has a "not last" constraint. However, 569bf215546Sopenharmony_ci * pseudoinstructions writing multiple destinations (expanding to multiple 570bf215546Sopenharmony_ci * paired instructions) can run afoul of the "no two writes on the last clause" 571bf215546Sopenharmony_ci * constraint, so we check for that here. 572bf215546Sopenharmony_ci * 573bf215546Sopenharmony_ci * Exception to the exception: TEXC, which writes to multiple sets of staging 574bf215546Sopenharmony_ci * registers. Staging registers bypass the usual register write mechanism so 575bf215546Sopenharmony_ci * this restriction does not apply. 576bf215546Sopenharmony_ci */ 577bf215546Sopenharmony_ci 578bf215546Sopenharmony_cistatic bool 579bf215546Sopenharmony_cibi_must_not_last(bi_instr *ins) 580bf215546Sopenharmony_ci{ 581bf215546Sopenharmony_ci return !bi_is_null(ins->dest[0]) && !bi_is_null(ins->dest[1]) && 582bf215546Sopenharmony_ci (ins->op != BI_OPCODE_TEXC); 583bf215546Sopenharmony_ci} 584bf215546Sopenharmony_ci 585bf215546Sopenharmony_ci/* Check for a message-passing instruction. +DISCARD.f32 is special-cased; we 586bf215546Sopenharmony_ci * treat it as a message-passing instruction for the purpose of scheduling 587bf215546Sopenharmony_ci * despite no passing no logical message. Otherwise invalid encoding faults may 588bf215546Sopenharmony_ci * be raised for unknown reasons (possibly an errata). 589bf215546Sopenharmony_ci */ 590bf215546Sopenharmony_ci 591bf215546Sopenharmony_cibool 592bf215546Sopenharmony_cibi_must_message(bi_instr *ins) 593bf215546Sopenharmony_ci{ 594bf215546Sopenharmony_ci return (bi_opcode_props[ins->op].message != BIFROST_MESSAGE_NONE) || 595bf215546Sopenharmony_ci (ins->op == BI_OPCODE_DISCARD_F32); 596bf215546Sopenharmony_ci} 597bf215546Sopenharmony_ci 598bf215546Sopenharmony_cistatic bool 599bf215546Sopenharmony_cibi_fma_atomic(enum bi_opcode op) 600bf215546Sopenharmony_ci{ 601bf215546Sopenharmony_ci switch (op) { 602bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C_I32: 603bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C_I64: 604bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C1_I32: 605bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C1_I64: 606bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C1_RETURN_I32: 607bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C1_RETURN_I64: 608bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C_RETURN_I32: 609bf215546Sopenharmony_ci case BI_OPCODE_ATOM_C_RETURN_I64: 610bf215546Sopenharmony_ci case BI_OPCODE_ATOM_POST_I32: 611bf215546Sopenharmony_ci case BI_OPCODE_ATOM_POST_I64: 612bf215546Sopenharmony_ci case BI_OPCODE_ATOM_PRE_I64: 613bf215546Sopenharmony_ci return true; 614bf215546Sopenharmony_ci default: 615bf215546Sopenharmony_ci return false; 616bf215546Sopenharmony_ci } 617bf215546Sopenharmony_ci} 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_cibool 620bf215546Sopenharmony_cibi_reads_zero(bi_instr *ins) 621bf215546Sopenharmony_ci{ 622bf215546Sopenharmony_ci return !(bi_fma_atomic(ins->op) || ins->op == BI_OPCODE_IMULD); 623bf215546Sopenharmony_ci} 624bf215546Sopenharmony_ci 625bf215546Sopenharmony_cibool 626bf215546Sopenharmony_cibi_reads_temps(bi_instr *ins, unsigned src) 627bf215546Sopenharmony_ci{ 628bf215546Sopenharmony_ci switch (ins->op) { 629bf215546Sopenharmony_ci /* Cannot permute a temporary */ 630bf215546Sopenharmony_ci case BI_OPCODE_CLPER_I32: 631bf215546Sopenharmony_ci case BI_OPCODE_CLPER_OLD_I32: 632bf215546Sopenharmony_ci return src != 0; 633bf215546Sopenharmony_ci 634bf215546Sopenharmony_ci /* ATEST isn't supposed to be restricted, but in practice it always 635bf215546Sopenharmony_ci * wants to source its coverage mask input (source 0) from register 60, 636bf215546Sopenharmony_ci * which won't work properly if we put the input in a temp. This 637bf215546Sopenharmony_ci * requires workarounds in both RA and clause scheduling. 638bf215546Sopenharmony_ci */ 639bf215546Sopenharmony_ci case BI_OPCODE_ATEST: 640bf215546Sopenharmony_ci return src != 0; 641bf215546Sopenharmony_ci 642bf215546Sopenharmony_ci case BI_OPCODE_IMULD: 643bf215546Sopenharmony_ci return false; 644bf215546Sopenharmony_ci default: 645bf215546Sopenharmony_ci return true; 646bf215546Sopenharmony_ci } 647bf215546Sopenharmony_ci} 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_cistatic bool 650bf215546Sopenharmony_cibi_impacted_t_modifiers(bi_instr *I, unsigned src) 651bf215546Sopenharmony_ci{ 652bf215546Sopenharmony_ci enum bi_swizzle swizzle = I->src[src].swizzle; 653bf215546Sopenharmony_ci 654bf215546Sopenharmony_ci switch (I->op) { 655bf215546Sopenharmony_ci case BI_OPCODE_F16_TO_F32: 656bf215546Sopenharmony_ci case BI_OPCODE_F16_TO_S32: 657bf215546Sopenharmony_ci case BI_OPCODE_F16_TO_U32: 658bf215546Sopenharmony_ci case BI_OPCODE_MKVEC_V2I16: 659bf215546Sopenharmony_ci case BI_OPCODE_S16_TO_F32: 660bf215546Sopenharmony_ci case BI_OPCODE_S16_TO_S32: 661bf215546Sopenharmony_ci case BI_OPCODE_U16_TO_F32: 662bf215546Sopenharmony_ci case BI_OPCODE_U16_TO_U32: 663bf215546Sopenharmony_ci return (swizzle != BI_SWIZZLE_H00); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci case BI_OPCODE_BRANCH_F32: 666bf215546Sopenharmony_ci case BI_OPCODE_LOGB_F32: 667bf215546Sopenharmony_ci case BI_OPCODE_ILOGB_F32: 668bf215546Sopenharmony_ci case BI_OPCODE_FADD_F32: 669bf215546Sopenharmony_ci case BI_OPCODE_FCMP_F32: 670bf215546Sopenharmony_ci case BI_OPCODE_FREXPE_F32: 671bf215546Sopenharmony_ci case BI_OPCODE_FREXPM_F32: 672bf215546Sopenharmony_ci case BI_OPCODE_FROUND_F32: 673bf215546Sopenharmony_ci return (swizzle != BI_SWIZZLE_H01); 674bf215546Sopenharmony_ci 675bf215546Sopenharmony_ci case BI_OPCODE_IADD_S32: 676bf215546Sopenharmony_ci case BI_OPCODE_IADD_U32: 677bf215546Sopenharmony_ci case BI_OPCODE_ISUB_S32: 678bf215546Sopenharmony_ci case BI_OPCODE_ISUB_U32: 679bf215546Sopenharmony_ci case BI_OPCODE_IADD_V4S8: 680bf215546Sopenharmony_ci case BI_OPCODE_IADD_V4U8: 681bf215546Sopenharmony_ci case BI_OPCODE_ISUB_V4S8: 682bf215546Sopenharmony_ci case BI_OPCODE_ISUB_V4U8: 683bf215546Sopenharmony_ci return (src == 1) && (swizzle != BI_SWIZZLE_H01); 684bf215546Sopenharmony_ci 685bf215546Sopenharmony_ci case BI_OPCODE_S8_TO_F32: 686bf215546Sopenharmony_ci case BI_OPCODE_S8_TO_S32: 687bf215546Sopenharmony_ci case BI_OPCODE_U8_TO_F32: 688bf215546Sopenharmony_ci case BI_OPCODE_U8_TO_U32: 689bf215546Sopenharmony_ci return (swizzle != BI_SWIZZLE_B0000); 690bf215546Sopenharmony_ci 691bf215546Sopenharmony_ci case BI_OPCODE_V2S8_TO_V2F16: 692bf215546Sopenharmony_ci case BI_OPCODE_V2S8_TO_V2S16: 693bf215546Sopenharmony_ci case BI_OPCODE_V2U8_TO_V2F16: 694bf215546Sopenharmony_ci case BI_OPCODE_V2U8_TO_V2U16: 695bf215546Sopenharmony_ci return (swizzle != BI_SWIZZLE_B0022); 696bf215546Sopenharmony_ci 697bf215546Sopenharmony_ci case BI_OPCODE_IADD_V2S16: 698bf215546Sopenharmony_ci case BI_OPCODE_IADD_V2U16: 699bf215546Sopenharmony_ci case BI_OPCODE_ISUB_V2S16: 700bf215546Sopenharmony_ci case BI_OPCODE_ISUB_V2U16: 701bf215546Sopenharmony_ci return (src == 1) && (swizzle >= BI_SWIZZLE_H11); 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci#if 0 704bf215546Sopenharmony_ci /* Restriction on IADD in 64-bit clauses on G72 */ 705bf215546Sopenharmony_ci case BI_OPCODE_IADD_S64: 706bf215546Sopenharmony_ci case BI_OPCODE_IADD_U64: 707bf215546Sopenharmony_ci return (src == 1) && (swizzle != BI_SWIZZLE_D0); 708bf215546Sopenharmony_ci#endif 709bf215546Sopenharmony_ci 710bf215546Sopenharmony_ci default: 711bf215546Sopenharmony_ci return false; 712bf215546Sopenharmony_ci } 713bf215546Sopenharmony_ci} 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_cibool 716bf215546Sopenharmony_cibi_reads_t(bi_instr *ins, unsigned src) 717bf215546Sopenharmony_ci{ 718bf215546Sopenharmony_ci /* Branch offset cannot come from passthrough */ 719bf215546Sopenharmony_ci if (bi_opcode_props[ins->op].branch) 720bf215546Sopenharmony_ci return src != 2; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci /* Table can never read passthrough */ 723bf215546Sopenharmony_ci if (bi_opcode_props[ins->op].table) 724bf215546Sopenharmony_ci return false; 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci /* Staging register reads may happen before the succeeding register 727bf215546Sopenharmony_ci * block encodes a write, so effectively there is no passthrough */ 728bf215546Sopenharmony_ci if (bi_is_staging_src(ins, src)) 729bf215546Sopenharmony_ci return false; 730bf215546Sopenharmony_ci 731bf215546Sopenharmony_ci /* Bifrost cores newer than Mali G71 have restrictions on swizzles on 732bf215546Sopenharmony_ci * same-cycle temporaries. Check the list for these hazards. */ 733bf215546Sopenharmony_ci if (bi_impacted_t_modifiers(ins, src)) 734bf215546Sopenharmony_ci return false; 735bf215546Sopenharmony_ci 736bf215546Sopenharmony_ci /* Descriptor must not come from a passthrough */ 737bf215546Sopenharmony_ci switch (ins->op) { 738bf215546Sopenharmony_ci case BI_OPCODE_LD_CVT: 739bf215546Sopenharmony_ci case BI_OPCODE_LD_TILE: 740bf215546Sopenharmony_ci case BI_OPCODE_ST_CVT: 741bf215546Sopenharmony_ci case BI_OPCODE_ST_TILE: 742bf215546Sopenharmony_ci case BI_OPCODE_TEXC: 743bf215546Sopenharmony_ci return src != 2; 744bf215546Sopenharmony_ci case BI_OPCODE_BLEND: 745bf215546Sopenharmony_ci return src != 2 && src != 3; 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci /* +JUMP can't read the offset from T */ 748bf215546Sopenharmony_ci case BI_OPCODE_JUMP: 749bf215546Sopenharmony_ci return false; 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci /* Else, just check if we can read any temps */ 752bf215546Sopenharmony_ci default: 753bf215546Sopenharmony_ci return bi_reads_temps(ins, src); 754bf215546Sopenharmony_ci } 755bf215546Sopenharmony_ci} 756bf215546Sopenharmony_ci 757bf215546Sopenharmony_ci/* Counts the number of 64-bit constants required by a clause. TODO: We 758bf215546Sopenharmony_ci * might want to account for merging, right now we overestimate, but 759bf215546Sopenharmony_ci * that's probably fine most of the time */ 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_cistatic unsigned 762bf215546Sopenharmony_cibi_nconstants(struct bi_clause_state *clause) 763bf215546Sopenharmony_ci{ 764bf215546Sopenharmony_ci unsigned count_32 = 0; 765bf215546Sopenharmony_ci 766bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(clause->consts); ++i) 767bf215546Sopenharmony_ci count_32 += clause->consts[i].constant_count; 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_ci return DIV_ROUND_UP(count_32, 2); 770bf215546Sopenharmony_ci} 771bf215546Sopenharmony_ci 772bf215546Sopenharmony_ci/* Would there be space for constants if we added one tuple? */ 773bf215546Sopenharmony_ci 774bf215546Sopenharmony_cistatic bool 775bf215546Sopenharmony_cibi_space_for_more_constants(struct bi_clause_state *clause) 776bf215546Sopenharmony_ci{ 777bf215546Sopenharmony_ci return (bi_nconstants(clause) < 13 - (clause->tuple_count + 1)); 778bf215546Sopenharmony_ci} 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci/* Updates the FAU assignment for a tuple. A valid FAU assignment must be 781bf215546Sopenharmony_ci * possible (as a precondition), though not necessarily on the selected unit; 782bf215546Sopenharmony_ci * this is gauranteed per-instruction by bi_lower_fau and per-tuple by 783bf215546Sopenharmony_ci * bi_instr_schedulable */ 784bf215546Sopenharmony_ci 785bf215546Sopenharmony_cistatic bool 786bf215546Sopenharmony_cibi_update_fau(struct bi_clause_state *clause, 787bf215546Sopenharmony_ci struct bi_tuple_state *tuple, 788bf215546Sopenharmony_ci bi_instr *instr, bool fma, bool destructive) 789bf215546Sopenharmony_ci{ 790bf215546Sopenharmony_ci /* Maintain our own constants, for nondestructive mode */ 791bf215546Sopenharmony_ci uint32_t copied_constants[2], copied_count; 792bf215546Sopenharmony_ci unsigned *constant_count = &tuple->constant_count; 793bf215546Sopenharmony_ci uint32_t *constants = tuple->constants; 794bf215546Sopenharmony_ci enum bir_fau fau = tuple->fau; 795bf215546Sopenharmony_ci 796bf215546Sopenharmony_ci if (!destructive) { 797bf215546Sopenharmony_ci memcpy(copied_constants, tuple->constants, 798bf215546Sopenharmony_ci (*constant_count) * sizeof(constants[0])); 799bf215546Sopenharmony_ci copied_count = tuple->constant_count; 800bf215546Sopenharmony_ci 801bf215546Sopenharmony_ci constant_count = &copied_count; 802bf215546Sopenharmony_ci constants = copied_constants; 803bf215546Sopenharmony_ci } 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci bi_foreach_src(instr, s) { 806bf215546Sopenharmony_ci bi_index src = instr->src[s]; 807bf215546Sopenharmony_ci 808bf215546Sopenharmony_ci if (src.type == BI_INDEX_FAU) { 809bf215546Sopenharmony_ci bool no_constants = *constant_count == 0; 810bf215546Sopenharmony_ci bool no_other_fau = (fau == src.value) || !fau; 811bf215546Sopenharmony_ci bool mergable = no_constants && no_other_fau; 812bf215546Sopenharmony_ci 813bf215546Sopenharmony_ci if (destructive) { 814bf215546Sopenharmony_ci assert(mergable); 815bf215546Sopenharmony_ci tuple->fau = src.value; 816bf215546Sopenharmony_ci } else if (!mergable) { 817bf215546Sopenharmony_ci return false; 818bf215546Sopenharmony_ci } 819bf215546Sopenharmony_ci 820bf215546Sopenharmony_ci fau = src.value; 821bf215546Sopenharmony_ci } else if (src.type == BI_INDEX_CONSTANT) { 822bf215546Sopenharmony_ci /* No need to reserve space if we have a fast 0 */ 823bf215546Sopenharmony_ci if (src.value == 0 && fma && bi_reads_zero(instr)) 824bf215546Sopenharmony_ci continue; 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci /* If there is a branch target, #0 by convention is the 827bf215546Sopenharmony_ci * PC-relative offset to the target */ 828bf215546Sopenharmony_ci bool pcrel = instr->branch_target && src.value == 0; 829bf215546Sopenharmony_ci bool found = false; 830bf215546Sopenharmony_ci 831bf215546Sopenharmony_ci for (unsigned i = 0; i < *constant_count; ++i) { 832bf215546Sopenharmony_ci found |= (constants[i] == src.value) && 833bf215546Sopenharmony_ci (i != tuple->pcrel_idx); 834bf215546Sopenharmony_ci } 835bf215546Sopenharmony_ci 836bf215546Sopenharmony_ci /* pcrel constants are unique, so don't match */ 837bf215546Sopenharmony_ci if (found && !pcrel) 838bf215546Sopenharmony_ci continue; 839bf215546Sopenharmony_ci 840bf215546Sopenharmony_ci bool no_fau = (*constant_count > 0) || !fau; 841bf215546Sopenharmony_ci bool mergable = no_fau && ((*constant_count) < 2); 842bf215546Sopenharmony_ci 843bf215546Sopenharmony_ci if (destructive) { 844bf215546Sopenharmony_ci assert(mergable); 845bf215546Sopenharmony_ci 846bf215546Sopenharmony_ci if (pcrel) 847bf215546Sopenharmony_ci tuple->pcrel_idx = *constant_count; 848bf215546Sopenharmony_ci } else if (!mergable) 849bf215546Sopenharmony_ci return false; 850bf215546Sopenharmony_ci 851bf215546Sopenharmony_ci constants[(*constant_count)++] = src.value; 852bf215546Sopenharmony_ci } 853bf215546Sopenharmony_ci } 854bf215546Sopenharmony_ci 855bf215546Sopenharmony_ci /* Constants per clause may be limited by tuple count */ 856bf215546Sopenharmony_ci bool room_for_constants = (*constant_count == 0) || 857bf215546Sopenharmony_ci bi_space_for_more_constants(clause); 858bf215546Sopenharmony_ci 859bf215546Sopenharmony_ci if (destructive) 860bf215546Sopenharmony_ci assert(room_for_constants); 861bf215546Sopenharmony_ci else if (!room_for_constants) 862bf215546Sopenharmony_ci return false; 863bf215546Sopenharmony_ci 864bf215546Sopenharmony_ci return true; 865bf215546Sopenharmony_ci} 866bf215546Sopenharmony_ci 867bf215546Sopenharmony_ci/* Given an in-progress tuple, a candidate new instruction to add to the tuple, 868bf215546Sopenharmony_ci * and a source (index) from that candidate, determine whether this source is 869bf215546Sopenharmony_ci * "new", in the sense of requiring an additional read slot. That is, checks 870bf215546Sopenharmony_ci * whether the specified source reads from the register file via a read slot 871bf215546Sopenharmony_ci * (determined by its type and placement) and whether the source was already 872bf215546Sopenharmony_ci * specified by a prior read slot (to avoid double counting) */ 873bf215546Sopenharmony_ci 874bf215546Sopenharmony_cistatic bool 875bf215546Sopenharmony_cibi_tuple_is_new_src(bi_instr *instr, struct bi_reg_state *reg, unsigned src_idx) 876bf215546Sopenharmony_ci{ 877bf215546Sopenharmony_ci bi_index src = instr->src[src_idx]; 878bf215546Sopenharmony_ci 879bf215546Sopenharmony_ci /* Only consider sources which come from the register file */ 880bf215546Sopenharmony_ci if (!(src.type == BI_INDEX_NORMAL || src.type == BI_INDEX_REGISTER)) 881bf215546Sopenharmony_ci return false; 882bf215546Sopenharmony_ci 883bf215546Sopenharmony_ci /* Staging register reads bypass the usual register file mechanism */ 884bf215546Sopenharmony_ci if (bi_is_staging_src(instr, src_idx)) 885bf215546Sopenharmony_ci return false; 886bf215546Sopenharmony_ci 887bf215546Sopenharmony_ci /* If a source is already read in the tuple, it is already counted */ 888bf215546Sopenharmony_ci for (unsigned t = 0; t < reg->nr_reads; ++t) 889bf215546Sopenharmony_ci if (bi_is_word_equiv(src, reg->reads[t])) 890bf215546Sopenharmony_ci return false; 891bf215546Sopenharmony_ci 892bf215546Sopenharmony_ci /* If a source is read in _this instruction_, it is already counted */ 893bf215546Sopenharmony_ci for (unsigned t = 0; t < src_idx; ++t) 894bf215546Sopenharmony_ci if (bi_is_word_equiv(src, instr->src[t])) 895bf215546Sopenharmony_ci return false; 896bf215546Sopenharmony_ci 897bf215546Sopenharmony_ci return true; 898bf215546Sopenharmony_ci} 899bf215546Sopenharmony_ci 900bf215546Sopenharmony_ci/* Given two tuples in source order, count the number of register reads of the 901bf215546Sopenharmony_ci * successor, determined as the number of unique words accessed that aren't 902bf215546Sopenharmony_ci * written by the predecessor (since those are tempable). 903bf215546Sopenharmony_ci */ 904bf215546Sopenharmony_ci 905bf215546Sopenharmony_cistatic unsigned 906bf215546Sopenharmony_cibi_count_succ_reads(bi_index t0, bi_index t1, 907bf215546Sopenharmony_ci bi_index *succ_reads, unsigned nr_succ_reads) 908bf215546Sopenharmony_ci{ 909bf215546Sopenharmony_ci unsigned reads = 0; 910bf215546Sopenharmony_ci 911bf215546Sopenharmony_ci for (unsigned i = 0; i < nr_succ_reads; ++i) { 912bf215546Sopenharmony_ci bool unique = true; 913bf215546Sopenharmony_ci 914bf215546Sopenharmony_ci for (unsigned j = 0; j < i; ++j) 915bf215546Sopenharmony_ci if (bi_is_word_equiv(succ_reads[i], succ_reads[j])) 916bf215546Sopenharmony_ci unique = false; 917bf215546Sopenharmony_ci 918bf215546Sopenharmony_ci if (!unique) 919bf215546Sopenharmony_ci continue; 920bf215546Sopenharmony_ci 921bf215546Sopenharmony_ci if (bi_is_word_equiv(succ_reads[i], t0)) 922bf215546Sopenharmony_ci continue; 923bf215546Sopenharmony_ci 924bf215546Sopenharmony_ci if (bi_is_word_equiv(succ_reads[i], t1)) 925bf215546Sopenharmony_ci continue; 926bf215546Sopenharmony_ci 927bf215546Sopenharmony_ci reads++; 928bf215546Sopenharmony_ci } 929bf215546Sopenharmony_ci 930bf215546Sopenharmony_ci return reads; 931bf215546Sopenharmony_ci} 932bf215546Sopenharmony_ci 933bf215546Sopenharmony_ci/* Not all instructions can read from the staging passthrough (as determined by 934bf215546Sopenharmony_ci * reads_t), check if a given pair of instructions has such a restriction. Note 935bf215546Sopenharmony_ci * we also use this mechanism to prevent data races around staging register 936bf215546Sopenharmony_ci * reads, so we allow the input source to potentially be vector-valued */ 937bf215546Sopenharmony_ci 938bf215546Sopenharmony_cistatic bool 939bf215546Sopenharmony_cibi_has_staging_passthrough_hazard(bi_index fma, bi_instr *add) 940bf215546Sopenharmony_ci{ 941bf215546Sopenharmony_ci bi_foreach_src(add, s) { 942bf215546Sopenharmony_ci bi_index src = add->src[s]; 943bf215546Sopenharmony_ci 944bf215546Sopenharmony_ci if (src.type != BI_INDEX_REGISTER) 945bf215546Sopenharmony_ci continue; 946bf215546Sopenharmony_ci 947bf215546Sopenharmony_ci unsigned count = bi_count_read_registers(add, s); 948bf215546Sopenharmony_ci bool read = false; 949bf215546Sopenharmony_ci 950bf215546Sopenharmony_ci for (unsigned d = 0; d < count; ++d) 951bf215546Sopenharmony_ci read |= bi_is_equiv(fma, bi_register(src.value + d)); 952bf215546Sopenharmony_ci 953bf215546Sopenharmony_ci if (read && !bi_reads_t(add, s)) 954bf215546Sopenharmony_ci return true; 955bf215546Sopenharmony_ci } 956bf215546Sopenharmony_ci 957bf215546Sopenharmony_ci return false; 958bf215546Sopenharmony_ci} 959bf215546Sopenharmony_ci 960bf215546Sopenharmony_ci/* Likewise for cross-tuple passthrough (reads_temps) */ 961bf215546Sopenharmony_ci 962bf215546Sopenharmony_cistatic bool 963bf215546Sopenharmony_cibi_has_cross_passthrough_hazard(bi_tuple *succ, bi_instr *ins) 964bf215546Sopenharmony_ci{ 965bf215546Sopenharmony_ci bi_foreach_instr_in_tuple(succ, pins) { 966bf215546Sopenharmony_ci bi_foreach_src(pins, s) { 967bf215546Sopenharmony_ci if (bi_is_word_equiv(ins->dest[0], pins->src[s]) && 968bf215546Sopenharmony_ci !bi_reads_temps(pins, s)) 969bf215546Sopenharmony_ci return true; 970bf215546Sopenharmony_ci } 971bf215546Sopenharmony_ci } 972bf215546Sopenharmony_ci 973bf215546Sopenharmony_ci return false; 974bf215546Sopenharmony_ci} 975bf215546Sopenharmony_ci 976bf215546Sopenharmony_ci/* Is a register written other than the staging mechanism? ATEST is special, 977bf215546Sopenharmony_ci * writing to both a staging register and a regular register (fixed packing). 978bf215546Sopenharmony_ci * BLEND is special since it has to write r48 the normal way even if it never 979bf215546Sopenharmony_ci * gets read. This depends on liveness analysis, as a register is not needed 980bf215546Sopenharmony_ci * for a write that will be discarded after one tuple. */ 981bf215546Sopenharmony_ci 982bf215546Sopenharmony_cistatic unsigned 983bf215546Sopenharmony_cibi_write_count(bi_instr *instr, uint64_t live_after_temp) 984bf215546Sopenharmony_ci{ 985bf215546Sopenharmony_ci if (instr->op == BI_OPCODE_ATEST || instr->op == BI_OPCODE_BLEND) 986bf215546Sopenharmony_ci return 1; 987bf215546Sopenharmony_ci 988bf215546Sopenharmony_ci unsigned count = 0; 989bf215546Sopenharmony_ci 990bf215546Sopenharmony_ci bi_foreach_dest(instr, d) { 991bf215546Sopenharmony_ci if (d == 0 && bi_opcode_props[instr->op].sr_write) 992bf215546Sopenharmony_ci continue; 993bf215546Sopenharmony_ci 994bf215546Sopenharmony_ci if (bi_is_null(instr->dest[d])) 995bf215546Sopenharmony_ci continue; 996bf215546Sopenharmony_ci 997bf215546Sopenharmony_ci assert(instr->dest[0].type == BI_INDEX_REGISTER); 998bf215546Sopenharmony_ci if (live_after_temp & BITFIELD64_BIT(instr->dest[0].value)) 999bf215546Sopenharmony_ci count++; 1000bf215546Sopenharmony_ci } 1001bf215546Sopenharmony_ci 1002bf215546Sopenharmony_ci return count; 1003bf215546Sopenharmony_ci} 1004bf215546Sopenharmony_ci 1005bf215546Sopenharmony_ci/* 1006bf215546Sopenharmony_ci * Test if an instruction required flush-to-zero mode. Currently only supported 1007bf215546Sopenharmony_ci * for f16<-->f32 conversions to implement fquantize16 1008bf215546Sopenharmony_ci */ 1009bf215546Sopenharmony_cistatic bool 1010bf215546Sopenharmony_cibi_needs_ftz(bi_instr *I) 1011bf215546Sopenharmony_ci{ 1012bf215546Sopenharmony_ci return (I->op == BI_OPCODE_F16_TO_F32 || 1013bf215546Sopenharmony_ci I->op == BI_OPCODE_V2F32_TO_V2F16) && I->ftz; 1014bf215546Sopenharmony_ci} 1015bf215546Sopenharmony_ci 1016bf215546Sopenharmony_ci/* 1017bf215546Sopenharmony_ci * Test if an instruction would be numerically incompatible with the clause. At 1018bf215546Sopenharmony_ci * present we only consider flush-to-zero modes. 1019bf215546Sopenharmony_ci */ 1020bf215546Sopenharmony_cistatic bool 1021bf215546Sopenharmony_cibi_numerically_incompatible(struct bi_clause_state *clause, bi_instr *instr) 1022bf215546Sopenharmony_ci{ 1023bf215546Sopenharmony_ci return (clause->ftz != BI_FTZ_STATE_NONE) && 1024bf215546Sopenharmony_ci ((clause->ftz == BI_FTZ_STATE_ENABLE) != bi_needs_ftz(instr)); 1025bf215546Sopenharmony_ci} 1026bf215546Sopenharmony_ci 1027bf215546Sopenharmony_ci/* Instruction placement entails two questions: what subset of instructions in 1028bf215546Sopenharmony_ci * the block can legally be scheduled? and of those which is the best? That is, 1029bf215546Sopenharmony_ci * we seek to maximize a cost function on a subset of the worklist satisfying a 1030bf215546Sopenharmony_ci * particular predicate. The necessary predicate is determined entirely by 1031bf215546Sopenharmony_ci * Bifrost's architectural limitations and is described in the accompanying 1032bf215546Sopenharmony_ci * whitepaper. The cost function is a heuristic. */ 1033bf215546Sopenharmony_ci 1034bf215546Sopenharmony_cistatic bool 1035bf215546Sopenharmony_cibi_instr_schedulable(bi_instr *instr, 1036bf215546Sopenharmony_ci struct bi_clause_state *clause, 1037bf215546Sopenharmony_ci struct bi_tuple_state *tuple, 1038bf215546Sopenharmony_ci uint64_t live_after_temp, 1039bf215546Sopenharmony_ci bool fma) 1040bf215546Sopenharmony_ci{ 1041bf215546Sopenharmony_ci /* The units must match */ 1042bf215546Sopenharmony_ci if ((fma && !bi_can_fma(instr)) || (!fma && !bi_can_add(instr))) 1043bf215546Sopenharmony_ci return false; 1044bf215546Sopenharmony_ci 1045bf215546Sopenharmony_ci /* There can only be one message-passing instruction per clause */ 1046bf215546Sopenharmony_ci if (bi_must_message(instr) && clause->message) 1047bf215546Sopenharmony_ci return false; 1048bf215546Sopenharmony_ci 1049bf215546Sopenharmony_ci /* Some instructions have placement requirements */ 1050bf215546Sopenharmony_ci if (bi_opcode_props[instr->op].last && !tuple->last) 1051bf215546Sopenharmony_ci return false; 1052bf215546Sopenharmony_ci 1053bf215546Sopenharmony_ci if (bi_must_not_last(instr) && tuple->last) 1054bf215546Sopenharmony_ci return false; 1055bf215546Sopenharmony_ci 1056bf215546Sopenharmony_ci /* Numerical properties must be compatible with the clause */ 1057bf215546Sopenharmony_ci if (bi_numerically_incompatible(clause, instr)) 1058bf215546Sopenharmony_ci return false; 1059bf215546Sopenharmony_ci 1060bf215546Sopenharmony_ci /* Message-passing instructions are not guaranteed write within the 1061bf215546Sopenharmony_ci * same clause (most likely they will not), so if a later instruction 1062bf215546Sopenharmony_ci * in the clause accesses the destination, the message-passing 1063bf215546Sopenharmony_ci * instruction can't be scheduled */ 1064bf215546Sopenharmony_ci if (bi_opcode_props[instr->op].sr_write) { 1065bf215546Sopenharmony_ci bi_foreach_dest(instr, d) { 1066bf215546Sopenharmony_ci if (bi_is_null(instr->dest[d])) 1067bf215546Sopenharmony_ci continue; 1068bf215546Sopenharmony_ci 1069bf215546Sopenharmony_ci unsigned nr = bi_count_write_registers(instr, d); 1070bf215546Sopenharmony_ci assert(instr->dest[d].type == BI_INDEX_REGISTER); 1071bf215546Sopenharmony_ci unsigned reg = instr->dest[d].value; 1072bf215546Sopenharmony_ci 1073bf215546Sopenharmony_ci for (unsigned i = 0; i < clause->access_count; ++i) { 1074bf215546Sopenharmony_ci bi_index idx = clause->accesses[i]; 1075bf215546Sopenharmony_ci for (unsigned d = 0; d < nr; ++d) { 1076bf215546Sopenharmony_ci if (bi_is_equiv(bi_register(reg + d), idx)) 1077bf215546Sopenharmony_ci return false; 1078bf215546Sopenharmony_ci } 1079bf215546Sopenharmony_ci } 1080bf215546Sopenharmony_ci } 1081bf215546Sopenharmony_ci } 1082bf215546Sopenharmony_ci 1083bf215546Sopenharmony_ci if (bi_opcode_props[instr->op].sr_read && !bi_is_null(instr->src[0])) { 1084bf215546Sopenharmony_ci unsigned nr = bi_count_read_registers(instr, 0); 1085bf215546Sopenharmony_ci assert(instr->src[0].type == BI_INDEX_REGISTER); 1086bf215546Sopenharmony_ci unsigned reg = instr->src[0].value; 1087bf215546Sopenharmony_ci 1088bf215546Sopenharmony_ci for (unsigned i = 0; i < clause->access_count; ++i) { 1089bf215546Sopenharmony_ci bi_index idx = clause->accesses[i]; 1090bf215546Sopenharmony_ci for (unsigned d = 0; d < nr; ++d) { 1091bf215546Sopenharmony_ci if (bi_is_equiv(bi_register(reg + d), idx)) 1092bf215546Sopenharmony_ci return false; 1093bf215546Sopenharmony_ci } 1094bf215546Sopenharmony_ci } 1095bf215546Sopenharmony_ci } 1096bf215546Sopenharmony_ci 1097bf215546Sopenharmony_ci /* If FAU is already assigned, we may not disrupt that. Do a 1098bf215546Sopenharmony_ci * non-disruptive test update */ 1099bf215546Sopenharmony_ci if (!bi_update_fau(clause, tuple, instr, fma, false)) 1100bf215546Sopenharmony_ci return false; 1101bf215546Sopenharmony_ci 1102bf215546Sopenharmony_ci /* If this choice of FMA would force a staging passthrough, the ADD 1103bf215546Sopenharmony_ci * instruction must support such a passthrough */ 1104bf215546Sopenharmony_ci if (tuple->add && bi_has_staging_passthrough_hazard(instr->dest[0], tuple->add)) 1105bf215546Sopenharmony_ci return false; 1106bf215546Sopenharmony_ci 1107bf215546Sopenharmony_ci /* If this choice of destination would force a cross-tuple passthrough, the next tuple must support that */ 1108bf215546Sopenharmony_ci if (tuple->prev && bi_has_cross_passthrough_hazard(tuple->prev, instr)) 1109bf215546Sopenharmony_ci return false; 1110bf215546Sopenharmony_ci 1111bf215546Sopenharmony_ci /* Register file writes are limited */ 1112bf215546Sopenharmony_ci unsigned total_writes = tuple->reg.nr_writes; 1113bf215546Sopenharmony_ci total_writes += bi_write_count(instr, live_after_temp); 1114bf215546Sopenharmony_ci 1115bf215546Sopenharmony_ci /* Last tuple in a clause can only write a single value */ 1116bf215546Sopenharmony_ci if (tuple->last && total_writes > 1) 1117bf215546Sopenharmony_ci return false; 1118bf215546Sopenharmony_ci 1119bf215546Sopenharmony_ci /* Register file reads are limited, so count unique */ 1120bf215546Sopenharmony_ci 1121bf215546Sopenharmony_ci unsigned unique_new_srcs = 0; 1122bf215546Sopenharmony_ci 1123bf215546Sopenharmony_ci bi_foreach_src(instr, s) { 1124bf215546Sopenharmony_ci if (bi_tuple_is_new_src(instr, &tuple->reg, s)) 1125bf215546Sopenharmony_ci unique_new_srcs++; 1126bf215546Sopenharmony_ci } 1127bf215546Sopenharmony_ci 1128bf215546Sopenharmony_ci unsigned total_srcs = tuple->reg.nr_reads + unique_new_srcs; 1129bf215546Sopenharmony_ci 1130bf215546Sopenharmony_ci bool can_spill_to_moves = (!tuple->add); 1131bf215546Sopenharmony_ci can_spill_to_moves &= (bi_nconstants(clause) < 13 - (clause->tuple_count + 2)); 1132bf215546Sopenharmony_ci can_spill_to_moves &= (clause->tuple_count < 7); 1133bf215546Sopenharmony_ci 1134bf215546Sopenharmony_ci /* However, we can get an extra 1 or 2 sources by inserting moves */ 1135bf215546Sopenharmony_ci if (total_srcs > (can_spill_to_moves ? 4 : 3)) 1136bf215546Sopenharmony_ci return false; 1137bf215546Sopenharmony_ci 1138bf215546Sopenharmony_ci /* Count effective reads for the successor */ 1139bf215546Sopenharmony_ci unsigned succ_reads = bi_count_succ_reads(instr->dest[0], 1140bf215546Sopenharmony_ci tuple->add ? tuple->add->dest[0] : bi_null(), 1141bf215546Sopenharmony_ci tuple->prev_reads, tuple->nr_prev_reads); 1142bf215546Sopenharmony_ci 1143bf215546Sopenharmony_ci /* Successor must satisfy R+W <= 4, so we require W <= 4-R */ 1144bf215546Sopenharmony_ci if ((signed) total_writes > (4 - (signed) succ_reads)) 1145bf215546Sopenharmony_ci return false; 1146bf215546Sopenharmony_ci 1147bf215546Sopenharmony_ci return true; 1148bf215546Sopenharmony_ci} 1149bf215546Sopenharmony_ci 1150bf215546Sopenharmony_cistatic signed 1151bf215546Sopenharmony_cibi_instr_cost(bi_instr *instr, struct bi_tuple_state *tuple) 1152bf215546Sopenharmony_ci{ 1153bf215546Sopenharmony_ci signed cost = 0; 1154bf215546Sopenharmony_ci 1155bf215546Sopenharmony_ci /* Instructions that can schedule to either FMA or to ADD should be 1156bf215546Sopenharmony_ci * deprioritized since they're easier to reschedule elsewhere */ 1157bf215546Sopenharmony_ci if (bi_can_fma(instr) && bi_can_add(instr)) 1158bf215546Sopenharmony_ci cost++; 1159bf215546Sopenharmony_ci 1160bf215546Sopenharmony_ci /* Message-passing instructions impose constraints on the registers 1161bf215546Sopenharmony_ci * later in the clause, so schedule them as late within a clause as 1162bf215546Sopenharmony_ci * possible (<==> prioritize them since we're backwards <==> decrease 1163bf215546Sopenharmony_ci * cost) */ 1164bf215546Sopenharmony_ci if (bi_must_message(instr)) 1165bf215546Sopenharmony_ci cost--; 1166bf215546Sopenharmony_ci 1167bf215546Sopenharmony_ci /* Last instructions are big constraints (XXX: no effect on shader-db) */ 1168bf215546Sopenharmony_ci if (bi_opcode_props[instr->op].last) 1169bf215546Sopenharmony_ci cost -= 2; 1170bf215546Sopenharmony_ci 1171bf215546Sopenharmony_ci return cost; 1172bf215546Sopenharmony_ci} 1173bf215546Sopenharmony_ci 1174bf215546Sopenharmony_cistatic unsigned 1175bf215546Sopenharmony_cibi_choose_index(struct bi_worklist st, 1176bf215546Sopenharmony_ci struct bi_clause_state *clause, 1177bf215546Sopenharmony_ci struct bi_tuple_state *tuple, 1178bf215546Sopenharmony_ci uint64_t live_after_temp, 1179bf215546Sopenharmony_ci bool fma) 1180bf215546Sopenharmony_ci{ 1181bf215546Sopenharmony_ci unsigned i, best_idx = ~0; 1182bf215546Sopenharmony_ci signed best_cost = INT_MAX; 1183bf215546Sopenharmony_ci 1184bf215546Sopenharmony_ci BITSET_FOREACH_SET(i, st.worklist, st.count) { 1185bf215546Sopenharmony_ci bi_instr *instr = st.instructions[i]; 1186bf215546Sopenharmony_ci 1187bf215546Sopenharmony_ci if (!bi_instr_schedulable(instr, clause, tuple, live_after_temp, fma)) 1188bf215546Sopenharmony_ci continue; 1189bf215546Sopenharmony_ci 1190bf215546Sopenharmony_ci signed cost = bi_instr_cost(instr, tuple); 1191bf215546Sopenharmony_ci 1192bf215546Sopenharmony_ci /* Tie break in favour of later instructions, under the 1193bf215546Sopenharmony_ci * assumption this promotes temporary usage (reducing pressure 1194bf215546Sopenharmony_ci * on the register file). This is a side effect of a prepass 1195bf215546Sopenharmony_ci * scheduling for pressure. */ 1196bf215546Sopenharmony_ci 1197bf215546Sopenharmony_ci if (cost <= best_cost) { 1198bf215546Sopenharmony_ci best_idx = i; 1199bf215546Sopenharmony_ci best_cost = cost; 1200bf215546Sopenharmony_ci } 1201bf215546Sopenharmony_ci } 1202bf215546Sopenharmony_ci 1203bf215546Sopenharmony_ci return best_idx; 1204bf215546Sopenharmony_ci} 1205bf215546Sopenharmony_ci 1206bf215546Sopenharmony_cistatic void 1207bf215546Sopenharmony_cibi_pop_instr(struct bi_clause_state *clause, struct bi_tuple_state *tuple, 1208bf215546Sopenharmony_ci bi_instr *instr, uint64_t live_after_temp, bool fma) 1209bf215546Sopenharmony_ci{ 1210bf215546Sopenharmony_ci bi_update_fau(clause, tuple, instr, fma, true); 1211bf215546Sopenharmony_ci 1212bf215546Sopenharmony_ci /* TODO: maybe opt a bit? or maybe doesn't matter */ 1213bf215546Sopenharmony_ci assert(clause->access_count + BI_MAX_SRCS + BI_MAX_DESTS <= ARRAY_SIZE(clause->accesses)); 1214bf215546Sopenharmony_ci memcpy(clause->accesses + clause->access_count, instr->src, sizeof(instr->src)); 1215bf215546Sopenharmony_ci clause->access_count += BI_MAX_SRCS; 1216bf215546Sopenharmony_ci memcpy(clause->accesses + clause->access_count, instr->dest, sizeof(instr->dest)); 1217bf215546Sopenharmony_ci clause->access_count += BI_MAX_DESTS; 1218bf215546Sopenharmony_ci tuple->reg.nr_writes += bi_write_count(instr, live_after_temp); 1219bf215546Sopenharmony_ci 1220bf215546Sopenharmony_ci bi_foreach_src(instr, s) { 1221bf215546Sopenharmony_ci if (bi_tuple_is_new_src(instr, &tuple->reg, s)) 1222bf215546Sopenharmony_ci tuple->reg.reads[tuple->reg.nr_reads++] = instr->src[s]; 1223bf215546Sopenharmony_ci } 1224bf215546Sopenharmony_ci 1225bf215546Sopenharmony_ci /* This could be optimized to allow pairing integer instructions with 1226bf215546Sopenharmony_ci * special flush-to-zero instructions, but punting on this until we have 1227bf215546Sopenharmony_ci * a workload that cares. 1228bf215546Sopenharmony_ci */ 1229bf215546Sopenharmony_ci clause->ftz = bi_needs_ftz(instr) ? BI_FTZ_STATE_ENABLE : 1230bf215546Sopenharmony_ci BI_FTZ_STATE_DISABLE; 1231bf215546Sopenharmony_ci} 1232bf215546Sopenharmony_ci 1233bf215546Sopenharmony_ci/* Choose the best instruction and pop it off the worklist. Returns NULL if no 1234bf215546Sopenharmony_ci * instruction is available. This function is destructive. */ 1235bf215546Sopenharmony_ci 1236bf215546Sopenharmony_cistatic bi_instr * 1237bf215546Sopenharmony_cibi_take_instr(bi_context *ctx, struct bi_worklist st, 1238bf215546Sopenharmony_ci struct bi_clause_state *clause, 1239bf215546Sopenharmony_ci struct bi_tuple_state *tuple, 1240bf215546Sopenharmony_ci uint64_t live_after_temp, 1241bf215546Sopenharmony_ci bool fma) 1242bf215546Sopenharmony_ci{ 1243bf215546Sopenharmony_ci if (tuple->add && tuple->add->op == BI_OPCODE_CUBEFACE) 1244bf215546Sopenharmony_ci return bi_lower_cubeface(ctx, clause, tuple); 1245bf215546Sopenharmony_ci else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM_RETURN_I32) 1246bf215546Sopenharmony_ci return bi_lower_atom_c(ctx, clause, tuple); 1247bf215546Sopenharmony_ci else if (tuple->add && tuple->add->op == BI_OPCODE_ATOM1_RETURN_I32) 1248bf215546Sopenharmony_ci return bi_lower_atom_c1(ctx, clause, tuple); 1249bf215546Sopenharmony_ci else if (tuple->add && tuple->add->op == BI_OPCODE_SEG_ADD_I64) 1250bf215546Sopenharmony_ci return bi_lower_seg_add(ctx, clause, tuple); 1251bf215546Sopenharmony_ci else if (tuple->add && tuple->add->table) 1252bf215546Sopenharmony_ci return bi_lower_dtsel(ctx, clause, tuple); 1253bf215546Sopenharmony_ci 1254bf215546Sopenharmony_ci /* TODO: Optimize these moves */ 1255bf215546Sopenharmony_ci if (!fma && tuple->nr_prev_reads > 3) { 1256bf215546Sopenharmony_ci /* Only spill by one source for now */ 1257bf215546Sopenharmony_ci assert(tuple->nr_prev_reads == 4); 1258bf215546Sopenharmony_ci 1259bf215546Sopenharmony_ci /* Pick a source to spill */ 1260bf215546Sopenharmony_ci bi_index src = tuple->prev_reads[0]; 1261bf215546Sopenharmony_ci 1262bf215546Sopenharmony_ci /* Schedule the spill */ 1263bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_tuple(tuple->prev)); 1264bf215546Sopenharmony_ci bi_instr *mov = bi_mov_i32_to(&b, src, src); 1265bf215546Sopenharmony_ci bi_pop_instr(clause, tuple, mov, live_after_temp, fma); 1266bf215546Sopenharmony_ci return mov; 1267bf215546Sopenharmony_ci } 1268bf215546Sopenharmony_ci 1269bf215546Sopenharmony_ci#ifndef NDEBUG 1270bf215546Sopenharmony_ci /* Don't pair instructions if debugging */ 1271bf215546Sopenharmony_ci if ((bifrost_debug & BIFROST_DBG_NOSCHED) && tuple->add) 1272bf215546Sopenharmony_ci return NULL; 1273bf215546Sopenharmony_ci#endif 1274bf215546Sopenharmony_ci 1275bf215546Sopenharmony_ci unsigned idx = bi_choose_index(st, clause, tuple, live_after_temp, fma); 1276bf215546Sopenharmony_ci 1277bf215546Sopenharmony_ci if (idx >= st.count) 1278bf215546Sopenharmony_ci return NULL; 1279bf215546Sopenharmony_ci 1280bf215546Sopenharmony_ci /* Update state to reflect taking the instruction */ 1281bf215546Sopenharmony_ci bi_instr *instr = st.instructions[idx]; 1282bf215546Sopenharmony_ci 1283bf215546Sopenharmony_ci BITSET_CLEAR(st.worklist, idx); 1284bf215546Sopenharmony_ci bi_update_worklist(st, idx); 1285bf215546Sopenharmony_ci bi_pop_instr(clause, tuple, instr, live_after_temp, fma); 1286bf215546Sopenharmony_ci 1287bf215546Sopenharmony_ci /* Fixups */ 1288bf215546Sopenharmony_ci if (instr->op == BI_OPCODE_IADD_U32 && fma) { 1289bf215546Sopenharmony_ci assert(bi_can_iaddc(instr)); 1290bf215546Sopenharmony_ci instr->op = BI_OPCODE_IADDC_I32; 1291bf215546Sopenharmony_ci instr->src[2] = bi_zero(); 1292bf215546Sopenharmony_ci } else if (fma && bi_can_replace_with_csel(instr)) { 1293bf215546Sopenharmony_ci bi_replace_mux_with_csel(instr, false); 1294bf215546Sopenharmony_ci } 1295bf215546Sopenharmony_ci 1296bf215546Sopenharmony_ci return instr; 1297bf215546Sopenharmony_ci} 1298bf215546Sopenharmony_ci 1299bf215546Sopenharmony_ci/* Variant of bi_rewrite_index_src_single that uses word-equivalence, rewriting 1300bf215546Sopenharmony_ci * to a passthrough register. If except_sr is true, the staging sources are 1301bf215546Sopenharmony_ci * skipped, so staging register reads are not accidentally encoded as 1302bf215546Sopenharmony_ci * passthrough (which is impossible) */ 1303bf215546Sopenharmony_ci 1304bf215546Sopenharmony_cistatic void 1305bf215546Sopenharmony_cibi_use_passthrough(bi_instr *ins, bi_index old, 1306bf215546Sopenharmony_ci enum bifrost_packed_src new, 1307bf215546Sopenharmony_ci bool except_sr) 1308bf215546Sopenharmony_ci{ 1309bf215546Sopenharmony_ci /* Optional for convenience */ 1310bf215546Sopenharmony_ci if (!ins || bi_is_null(old)) 1311bf215546Sopenharmony_ci return; 1312bf215546Sopenharmony_ci 1313bf215546Sopenharmony_ci bi_foreach_src(ins, i) { 1314bf215546Sopenharmony_ci if ((i == 0 || i == 4) && except_sr) 1315bf215546Sopenharmony_ci continue; 1316bf215546Sopenharmony_ci 1317bf215546Sopenharmony_ci if (bi_is_word_equiv(ins->src[i], old)) { 1318bf215546Sopenharmony_ci ins->src[i].type = BI_INDEX_PASS; 1319bf215546Sopenharmony_ci ins->src[i].value = new; 1320bf215546Sopenharmony_ci ins->src[i].reg = false; 1321bf215546Sopenharmony_ci ins->src[i].offset = 0; 1322bf215546Sopenharmony_ci } 1323bf215546Sopenharmony_ci } 1324bf215546Sopenharmony_ci} 1325bf215546Sopenharmony_ci 1326bf215546Sopenharmony_ci/* Rewrites an adjacent pair of tuples _prec_eding and _succ_eding to use 1327bf215546Sopenharmony_ci * intertuple passthroughs where necessary. Passthroughs are allowed as a 1328bf215546Sopenharmony_ci * post-condition of scheduling. Note we rewrite ADD first, FMA second -- 1329bf215546Sopenharmony_ci * opposite the order of execution. This is deliberate -- if both FMA and ADD 1330bf215546Sopenharmony_ci * write to the same logical register, the next executed tuple will get the 1331bf215546Sopenharmony_ci * latter result. There's no interference issue under the assumption of correct 1332bf215546Sopenharmony_ci * register allocation. */ 1333bf215546Sopenharmony_ci 1334bf215546Sopenharmony_cistatic void 1335bf215546Sopenharmony_cibi_rewrite_passthrough(bi_tuple prec, bi_tuple succ) 1336bf215546Sopenharmony_ci{ 1337bf215546Sopenharmony_ci bool sr_read = succ.add ? bi_opcode_props[succ.add->op].sr_read : false; 1338bf215546Sopenharmony_ci 1339bf215546Sopenharmony_ci if (prec.add) { 1340bf215546Sopenharmony_ci bi_use_passthrough(succ.fma, prec.add->dest[0], BIFROST_SRC_PASS_ADD, false); 1341bf215546Sopenharmony_ci bi_use_passthrough(succ.add, prec.add->dest[0], BIFROST_SRC_PASS_ADD, sr_read); 1342bf215546Sopenharmony_ci } 1343bf215546Sopenharmony_ci 1344bf215546Sopenharmony_ci if (prec.fma) { 1345bf215546Sopenharmony_ci bi_use_passthrough(succ.fma, prec.fma->dest[0], BIFROST_SRC_PASS_FMA, false); 1346bf215546Sopenharmony_ci bi_use_passthrough(succ.add, prec.fma->dest[0], BIFROST_SRC_PASS_FMA, sr_read); 1347bf215546Sopenharmony_ci } 1348bf215546Sopenharmony_ci} 1349bf215546Sopenharmony_ci 1350bf215546Sopenharmony_cistatic void 1351bf215546Sopenharmony_cibi_rewrite_fau_to_pass(bi_tuple *tuple) 1352bf215546Sopenharmony_ci{ 1353bf215546Sopenharmony_ci bi_foreach_instr_and_src_in_tuple(tuple, ins, s) { 1354bf215546Sopenharmony_ci if (ins->src[s].type != BI_INDEX_FAU) continue; 1355bf215546Sopenharmony_ci 1356bf215546Sopenharmony_ci bi_index pass = bi_passthrough(ins->src[s].offset ? 1357bf215546Sopenharmony_ci BIFROST_SRC_FAU_HI : BIFROST_SRC_FAU_LO); 1358bf215546Sopenharmony_ci 1359bf215546Sopenharmony_ci ins->src[s] = bi_replace_index(ins->src[s], pass); 1360bf215546Sopenharmony_ci } 1361bf215546Sopenharmony_ci} 1362bf215546Sopenharmony_ci 1363bf215546Sopenharmony_cistatic void 1364bf215546Sopenharmony_cibi_rewrite_zero(bi_instr *ins, bool fma) 1365bf215546Sopenharmony_ci{ 1366bf215546Sopenharmony_ci bi_index zero = bi_passthrough(fma ? BIFROST_SRC_STAGE : BIFROST_SRC_FAU_LO); 1367bf215546Sopenharmony_ci 1368bf215546Sopenharmony_ci bi_foreach_src(ins, s) { 1369bf215546Sopenharmony_ci bi_index src = ins->src[s]; 1370bf215546Sopenharmony_ci 1371bf215546Sopenharmony_ci if (src.type == BI_INDEX_CONSTANT && src.value == 0) 1372bf215546Sopenharmony_ci ins->src[s] = bi_replace_index(src, zero); 1373bf215546Sopenharmony_ci } 1374bf215546Sopenharmony_ci} 1375bf215546Sopenharmony_ci 1376bf215546Sopenharmony_ci/* Assumes #0 to {T, FAU} rewrite has already occurred */ 1377bf215546Sopenharmony_ci 1378bf215546Sopenharmony_cistatic void 1379bf215546Sopenharmony_cibi_rewrite_constants_to_pass(bi_tuple *tuple, uint64_t constant, bool pcrel) 1380bf215546Sopenharmony_ci{ 1381bf215546Sopenharmony_ci bi_foreach_instr_and_src_in_tuple(tuple, ins, s) { 1382bf215546Sopenharmony_ci if (ins->src[s].type != BI_INDEX_CONSTANT) continue; 1383bf215546Sopenharmony_ci 1384bf215546Sopenharmony_ci uint32_t cons = ins->src[s].value; 1385bf215546Sopenharmony_ci 1386bf215546Sopenharmony_ci ASSERTED bool lo = (cons == (constant & 0xffffffff)); 1387bf215546Sopenharmony_ci bool hi = (cons == (constant >> 32ull)); 1388bf215546Sopenharmony_ci 1389bf215546Sopenharmony_ci /* PC offsets always live in the upper half, set to zero by 1390bf215546Sopenharmony_ci * convention before pack time. (This is safe, since if you 1391bf215546Sopenharmony_ci * wanted to compare against zero, you would use a BRANCHZ 1392bf215546Sopenharmony_ci * instruction instead.) */ 1393bf215546Sopenharmony_ci if (cons == 0 && ins->branch_target != NULL) { 1394bf215546Sopenharmony_ci assert(pcrel); 1395bf215546Sopenharmony_ci hi = true; 1396bf215546Sopenharmony_ci lo = false; 1397bf215546Sopenharmony_ci } else if (pcrel) { 1398bf215546Sopenharmony_ci hi = false; 1399bf215546Sopenharmony_ci } 1400bf215546Sopenharmony_ci 1401bf215546Sopenharmony_ci assert(lo || hi); 1402bf215546Sopenharmony_ci 1403bf215546Sopenharmony_ci ins->src[s] = bi_replace_index(ins->src[s], 1404bf215546Sopenharmony_ci bi_passthrough(hi ? BIFROST_SRC_FAU_HI : 1405bf215546Sopenharmony_ci BIFROST_SRC_FAU_LO)); 1406bf215546Sopenharmony_ci } 1407bf215546Sopenharmony_ci} 1408bf215546Sopenharmony_ci 1409bf215546Sopenharmony_ci/* Constructs a constant state given a tuple state. This has the 1410bf215546Sopenharmony_ci * postcondition that pcrel applies to the first constant by convention, 1411bf215546Sopenharmony_ci * and PC-relative constants will be #0 by convention here, so swap to 1412bf215546Sopenharmony_ci * match if needed */ 1413bf215546Sopenharmony_ci 1414bf215546Sopenharmony_cistatic struct bi_const_state 1415bf215546Sopenharmony_cibi_get_const_state(struct bi_tuple_state *tuple) 1416bf215546Sopenharmony_ci{ 1417bf215546Sopenharmony_ci struct bi_const_state consts = { 1418bf215546Sopenharmony_ci .constant_count = tuple->constant_count, 1419bf215546Sopenharmony_ci .constants[0] = tuple->constants[0], 1420bf215546Sopenharmony_ci .constants[1] = tuple->constants[1], 1421bf215546Sopenharmony_ci .pcrel = tuple->add && tuple->add->branch_target, 1422bf215546Sopenharmony_ci }; 1423bf215546Sopenharmony_ci 1424bf215546Sopenharmony_ci /* pcrel applies to the first constant by convention, and 1425bf215546Sopenharmony_ci * PC-relative constants will be #0 by convention here, so swap 1426bf215546Sopenharmony_ci * to match if needed */ 1427bf215546Sopenharmony_ci if (consts.pcrel && consts.constants[0]) { 1428bf215546Sopenharmony_ci assert(consts.constant_count == 2); 1429bf215546Sopenharmony_ci assert(consts.constants[1] == 0); 1430bf215546Sopenharmony_ci 1431bf215546Sopenharmony_ci consts.constants[1] = consts.constants[0]; 1432bf215546Sopenharmony_ci consts.constants[0] = 0; 1433bf215546Sopenharmony_ci } 1434bf215546Sopenharmony_ci 1435bf215546Sopenharmony_ci return consts; 1436bf215546Sopenharmony_ci} 1437bf215546Sopenharmony_ci 1438bf215546Sopenharmony_ci/* Merges constants in a clause, satisfying the following rules, assuming no 1439bf215546Sopenharmony_ci * more than one tuple has pcrel: 1440bf215546Sopenharmony_ci * 1441bf215546Sopenharmony_ci * 1. If a tuple has two constants, they must be packed together. If one is 1442bf215546Sopenharmony_ci * pcrel, it must be the high constant to use the M1=4 modification [sx64(E0) + 1443bf215546Sopenharmony_ci * (PC << 32)]. Otherwise choose an arbitrary order. 1444bf215546Sopenharmony_ci * 1445bf215546Sopenharmony_ci * 4. If a tuple has one constant, it may be shared with an existing 1446bf215546Sopenharmony_ci * pair that already contains that constant, or it may be combined with another 1447bf215546Sopenharmony_ci * (distinct) tuple of a single constant. 1448bf215546Sopenharmony_ci * 1449bf215546Sopenharmony_ci * This gaurantees a packing is possible. The next routine handles modification 1450bf215546Sopenharmony_ci * related swapping, to satisfy format 12 and the lack of modification for 1451bf215546Sopenharmony_ci * tuple count 5/8 in EC0. 1452bf215546Sopenharmony_ci */ 1453bf215546Sopenharmony_ci 1454bf215546Sopenharmony_cistatic uint64_t 1455bf215546Sopenharmony_cibi_merge_u32(uint32_t c0, uint32_t c1, bool pcrel) 1456bf215546Sopenharmony_ci{ 1457bf215546Sopenharmony_ci /* At this point in the constant merge algorithm, pcrel constants are 1458bf215546Sopenharmony_ci * treated as zero, so pcrel implies at least one constants is zero */ 1459bf215546Sopenharmony_ci assert(!pcrel || (c0 == 0 || c1 == 0)); 1460bf215546Sopenharmony_ci 1461bf215546Sopenharmony_ci /* Order: pcrel, maximum non-pcrel, minimum non-pcrel */ 1462bf215546Sopenharmony_ci uint32_t hi = pcrel ? 0 : MAX2(c0, c1); 1463bf215546Sopenharmony_ci uint32_t lo = (c0 == hi) ? c1 : c0; 1464bf215546Sopenharmony_ci 1465bf215546Sopenharmony_ci /* Merge in the selected order */ 1466bf215546Sopenharmony_ci return lo | (((uint64_t) hi) << 32ull); 1467bf215546Sopenharmony_ci} 1468bf215546Sopenharmony_ci 1469bf215546Sopenharmony_cistatic unsigned 1470bf215546Sopenharmony_cibi_merge_pairs(struct bi_const_state *consts, unsigned tuple_count, 1471bf215546Sopenharmony_ci uint64_t *merged, unsigned *pcrel_pair) 1472bf215546Sopenharmony_ci{ 1473bf215546Sopenharmony_ci unsigned merge_count = 0; 1474bf215546Sopenharmony_ci 1475bf215546Sopenharmony_ci for (unsigned t = 0; t < tuple_count; ++t) { 1476bf215546Sopenharmony_ci if (consts[t].constant_count != 2) continue; 1477bf215546Sopenharmony_ci 1478bf215546Sopenharmony_ci unsigned idx = ~0; 1479bf215546Sopenharmony_ci uint64_t val = bi_merge_u32(consts[t].constants[0], 1480bf215546Sopenharmony_ci consts[t].constants[1], consts[t].pcrel); 1481bf215546Sopenharmony_ci 1482bf215546Sopenharmony_ci /* Skip the pcrel pair if assigned, because if one is assigned, 1483bf215546Sopenharmony_ci * this one is not pcrel by uniqueness so it's a mismatch */ 1484bf215546Sopenharmony_ci for (unsigned s = 0; s < merge_count; ++s) { 1485bf215546Sopenharmony_ci if (merged[s] == val && (*pcrel_pair) != s) { 1486bf215546Sopenharmony_ci idx = s; 1487bf215546Sopenharmony_ci break; 1488bf215546Sopenharmony_ci } 1489bf215546Sopenharmony_ci } 1490bf215546Sopenharmony_ci 1491bf215546Sopenharmony_ci if (idx == ~0) { 1492bf215546Sopenharmony_ci idx = merge_count++; 1493bf215546Sopenharmony_ci merged[idx] = val; 1494bf215546Sopenharmony_ci 1495bf215546Sopenharmony_ci if (consts[t].pcrel) 1496bf215546Sopenharmony_ci (*pcrel_pair) = idx; 1497bf215546Sopenharmony_ci } 1498bf215546Sopenharmony_ci 1499bf215546Sopenharmony_ci consts[t].word_idx = idx; 1500bf215546Sopenharmony_ci } 1501bf215546Sopenharmony_ci 1502bf215546Sopenharmony_ci return merge_count; 1503bf215546Sopenharmony_ci} 1504bf215546Sopenharmony_ci 1505bf215546Sopenharmony_cistatic unsigned 1506bf215546Sopenharmony_cibi_merge_singles(struct bi_const_state *consts, unsigned tuple_count, 1507bf215546Sopenharmony_ci uint64_t *pairs, unsigned pair_count, unsigned *pcrel_pair) 1508bf215546Sopenharmony_ci{ 1509bf215546Sopenharmony_ci bool pending = false, pending_pcrel = false; 1510bf215546Sopenharmony_ci uint32_t pending_single = 0; 1511bf215546Sopenharmony_ci 1512bf215546Sopenharmony_ci for (unsigned t = 0; t < tuple_count; ++t) { 1513bf215546Sopenharmony_ci if (consts[t].constant_count != 1) continue; 1514bf215546Sopenharmony_ci 1515bf215546Sopenharmony_ci uint32_t val = consts[t].constants[0]; 1516bf215546Sopenharmony_ci unsigned idx = ~0; 1517bf215546Sopenharmony_ci 1518bf215546Sopenharmony_ci /* Try to match, but don't match pcrel with non-pcrel, even 1519bf215546Sopenharmony_ci * though we can merge a pcrel with a non-pcrel single */ 1520bf215546Sopenharmony_ci for (unsigned i = 0; i < pair_count; ++i) { 1521bf215546Sopenharmony_ci bool lo = ((pairs[i] & 0xffffffff) == val); 1522bf215546Sopenharmony_ci bool hi = ((pairs[i] >> 32) == val); 1523bf215546Sopenharmony_ci bool match = (lo || hi); 1524bf215546Sopenharmony_ci match &= ((*pcrel_pair) != i); 1525bf215546Sopenharmony_ci if (match && !consts[t].pcrel) { 1526bf215546Sopenharmony_ci idx = i; 1527bf215546Sopenharmony_ci break; 1528bf215546Sopenharmony_ci } 1529bf215546Sopenharmony_ci } 1530bf215546Sopenharmony_ci 1531bf215546Sopenharmony_ci if (idx == ~0) { 1532bf215546Sopenharmony_ci idx = pair_count; 1533bf215546Sopenharmony_ci 1534bf215546Sopenharmony_ci if (pending && pending_single != val) { 1535bf215546Sopenharmony_ci assert(!(pending_pcrel && consts[t].pcrel)); 1536bf215546Sopenharmony_ci bool pcrel = pending_pcrel || consts[t].pcrel; 1537bf215546Sopenharmony_ci 1538bf215546Sopenharmony_ci if (pcrel) 1539bf215546Sopenharmony_ci *pcrel_pair = idx; 1540bf215546Sopenharmony_ci 1541bf215546Sopenharmony_ci pairs[pair_count++] = bi_merge_u32(pending_single, val, pcrel); 1542bf215546Sopenharmony_ci 1543bf215546Sopenharmony_ci pending = pending_pcrel = false; 1544bf215546Sopenharmony_ci } else { 1545bf215546Sopenharmony_ci pending = true; 1546bf215546Sopenharmony_ci pending_pcrel = consts[t].pcrel; 1547bf215546Sopenharmony_ci pending_single = val; 1548bf215546Sopenharmony_ci } 1549bf215546Sopenharmony_ci } 1550bf215546Sopenharmony_ci 1551bf215546Sopenharmony_ci consts[t].word_idx = idx; 1552bf215546Sopenharmony_ci } 1553bf215546Sopenharmony_ci 1554bf215546Sopenharmony_ci /* Shift so it works whether pending_pcrel is set or not */ 1555bf215546Sopenharmony_ci if (pending) { 1556bf215546Sopenharmony_ci if (pending_pcrel) 1557bf215546Sopenharmony_ci *pcrel_pair = pair_count; 1558bf215546Sopenharmony_ci 1559bf215546Sopenharmony_ci pairs[pair_count++] = ((uint64_t) pending_single) << 32ull; 1560bf215546Sopenharmony_ci } 1561bf215546Sopenharmony_ci 1562bf215546Sopenharmony_ci return pair_count; 1563bf215546Sopenharmony_ci} 1564bf215546Sopenharmony_ci 1565bf215546Sopenharmony_cistatic unsigned 1566bf215546Sopenharmony_cibi_merge_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned *pcrel_idx) 1567bf215546Sopenharmony_ci{ 1568bf215546Sopenharmony_ci unsigned pair_count = bi_merge_pairs(consts, 8, pairs, pcrel_idx); 1569bf215546Sopenharmony_ci return bi_merge_singles(consts, 8, pairs, pair_count, pcrel_idx); 1570bf215546Sopenharmony_ci} 1571bf215546Sopenharmony_ci 1572bf215546Sopenharmony_ci/* Swap two constants at word i and i+1 by swapping their actual positions and 1573bf215546Sopenharmony_ci * swapping all references so the meaning of the clause is preserved */ 1574bf215546Sopenharmony_ci 1575bf215546Sopenharmony_cistatic void 1576bf215546Sopenharmony_cibi_swap_constants(struct bi_const_state *consts, uint64_t *pairs, unsigned i) 1577bf215546Sopenharmony_ci{ 1578bf215546Sopenharmony_ci uint64_t tmp_pair = pairs[i + 0]; 1579bf215546Sopenharmony_ci pairs[i + 0] = pairs[i + 1]; 1580bf215546Sopenharmony_ci pairs[i + 1] = tmp_pair; 1581bf215546Sopenharmony_ci 1582bf215546Sopenharmony_ci for (unsigned t = 0; t < 8; ++t) { 1583bf215546Sopenharmony_ci if (consts[t].word_idx == i) 1584bf215546Sopenharmony_ci consts[t].word_idx = (i + 1); 1585bf215546Sopenharmony_ci else if (consts[t].word_idx == (i + 1)) 1586bf215546Sopenharmony_ci consts[t].word_idx = i; 1587bf215546Sopenharmony_ci } 1588bf215546Sopenharmony_ci} 1589bf215546Sopenharmony_ci 1590bf215546Sopenharmony_ci/* Given merged constants, one of which might be PC-relative, fix up the M 1591bf215546Sopenharmony_ci * values so the PC-relative constant (if it exists) has the M1=4 modification 1592bf215546Sopenharmony_ci * and other constants are used as-is (which might require swapping) */ 1593bf215546Sopenharmony_ci 1594bf215546Sopenharmony_cistatic unsigned 1595bf215546Sopenharmony_cibi_apply_constant_modifiers(struct bi_const_state *consts, 1596bf215546Sopenharmony_ci uint64_t *pairs, unsigned *pcrel_idx, 1597bf215546Sopenharmony_ci unsigned tuple_count, unsigned constant_count) 1598bf215546Sopenharmony_ci{ 1599bf215546Sopenharmony_ci unsigned start = bi_ec0_packed(tuple_count) ? 1 : 0; 1600bf215546Sopenharmony_ci 1601bf215546Sopenharmony_ci /* Clauses with these tuple counts lack an M field for the packed EC0, 1602bf215546Sopenharmony_ci * so EC0 cannot be PC-relative, which might require swapping (and 1603bf215546Sopenharmony_ci * possibly adding an unused constant) to fit */ 1604bf215546Sopenharmony_ci 1605bf215546Sopenharmony_ci if (*pcrel_idx == 0 && (tuple_count == 5 || tuple_count == 8)) { 1606bf215546Sopenharmony_ci constant_count = MAX2(constant_count, 2); 1607bf215546Sopenharmony_ci *pcrel_idx = 1; 1608bf215546Sopenharmony_ci bi_swap_constants(consts, pairs, 0); 1609bf215546Sopenharmony_ci } 1610bf215546Sopenharmony_ci 1611bf215546Sopenharmony_ci /* EC0 might be packed free, after that constants are packed in pairs 1612bf215546Sopenharmony_ci * (with clause format 12), with M1 values computed from the pair */ 1613bf215546Sopenharmony_ci 1614bf215546Sopenharmony_ci for (unsigned i = start; i < constant_count; i += 2) { 1615bf215546Sopenharmony_ci bool swap = false; 1616bf215546Sopenharmony_ci bool last = (i + 1) == constant_count; 1617bf215546Sopenharmony_ci 1618bf215546Sopenharmony_ci unsigned A1 = (pairs[i] >> 60); 1619bf215546Sopenharmony_ci unsigned B1 = (pairs[i + 1] >> 60); 1620bf215546Sopenharmony_ci 1621bf215546Sopenharmony_ci if (*pcrel_idx == i || *pcrel_idx == (i + 1)) { 1622bf215546Sopenharmony_ci /* PC-relative constant must be E0, not E1 */ 1623bf215546Sopenharmony_ci swap = (*pcrel_idx == (i + 1)); 1624bf215546Sopenharmony_ci 1625bf215546Sopenharmony_ci /* Set M1 = 4 by noting (A - B) mod 16 = 4 is 1626bf215546Sopenharmony_ci * equivalent to A = (B + 4) mod 16 and that we can 1627bf215546Sopenharmony_ci * control A */ 1628bf215546Sopenharmony_ci unsigned B = swap ? A1 : B1; 1629bf215546Sopenharmony_ci unsigned A = (B + 4) & 0xF; 1630bf215546Sopenharmony_ci pairs[*pcrel_idx] |= ((uint64_t) A) << 60; 1631bf215546Sopenharmony_ci 1632bf215546Sopenharmony_ci /* Swapped if swap set, identity if swap not set */ 1633bf215546Sopenharmony_ci *pcrel_idx = i; 1634bf215546Sopenharmony_ci } else { 1635bf215546Sopenharmony_ci /* Compute M1 value if we don't swap */ 1636bf215546Sopenharmony_ci unsigned M1 = (16 + A1 - B1) & 0xF; 1637bf215546Sopenharmony_ci 1638bf215546Sopenharmony_ci /* For M1 = 0 or M1 >= 8, the constants are unchanged, 1639bf215546Sopenharmony_ci * we have 0 < (A1 - B1) % 16 < 8, which implies (B1 - 1640bf215546Sopenharmony_ci * A1) % 16 >= 8, so swapping will let them be used 1641bf215546Sopenharmony_ci * unchanged */ 1642bf215546Sopenharmony_ci swap = (M1 != 0) && (M1 < 8); 1643bf215546Sopenharmony_ci 1644bf215546Sopenharmony_ci /* However, we can't swap the last constant, so we 1645bf215546Sopenharmony_ci * force M1 = 0 instead for this case */ 1646bf215546Sopenharmony_ci if (last && swap) { 1647bf215546Sopenharmony_ci pairs[i + 1] |= pairs[i] & (0xfull << 60); 1648bf215546Sopenharmony_ci swap = false; 1649bf215546Sopenharmony_ci } 1650bf215546Sopenharmony_ci } 1651bf215546Sopenharmony_ci 1652bf215546Sopenharmony_ci if (swap) { 1653bf215546Sopenharmony_ci assert(!last); 1654bf215546Sopenharmony_ci bi_swap_constants(consts, pairs, i); 1655bf215546Sopenharmony_ci } 1656bf215546Sopenharmony_ci } 1657bf215546Sopenharmony_ci 1658bf215546Sopenharmony_ci return constant_count; 1659bf215546Sopenharmony_ci} 1660bf215546Sopenharmony_ci 1661bf215546Sopenharmony_ci/* Schedule a single clause. If no instructions remain, return NULL. */ 1662bf215546Sopenharmony_ci 1663bf215546Sopenharmony_cistatic bi_clause * 1664bf215546Sopenharmony_cibi_schedule_clause(bi_context *ctx, bi_block *block, struct bi_worklist st, uint64_t *live) 1665bf215546Sopenharmony_ci{ 1666bf215546Sopenharmony_ci struct bi_clause_state clause_state = { 0 }; 1667bf215546Sopenharmony_ci bi_clause *clause = rzalloc(ctx, bi_clause); 1668bf215546Sopenharmony_ci bi_tuple *tuple = NULL; 1669bf215546Sopenharmony_ci 1670bf215546Sopenharmony_ci const unsigned max_tuples = ARRAY_SIZE(clause->tuples); 1671bf215546Sopenharmony_ci 1672bf215546Sopenharmony_ci /* TODO: Decide flow control better */ 1673bf215546Sopenharmony_ci clause->flow_control = BIFROST_FLOW_NBTB; 1674bf215546Sopenharmony_ci 1675bf215546Sopenharmony_ci /* The last clause can only write one instruction, so initialize that */ 1676bf215546Sopenharmony_ci struct bi_reg_state reg_state = {}; 1677bf215546Sopenharmony_ci bi_index prev_reads[5] = { bi_null() }; 1678bf215546Sopenharmony_ci unsigned nr_prev_reads = 0; 1679bf215546Sopenharmony_ci 1680bf215546Sopenharmony_ci /* We need to track future liveness. The main *live set tracks what is 1681bf215546Sopenharmony_ci * live at the current point int he program we are scheduling, but to 1682bf215546Sopenharmony_ci * determine temp eligibility, we instead want what will be live after 1683bf215546Sopenharmony_ci * the next tuple in the program. If you scheduled forwards, you'd need 1684bf215546Sopenharmony_ci * a crystall ball for this. Luckily we schedule backwards, so we just 1685bf215546Sopenharmony_ci * delay updates to the live_after_temp by an extra tuple. */ 1686bf215546Sopenharmony_ci uint64_t live_after_temp = *live; 1687bf215546Sopenharmony_ci uint64_t live_next_tuple = live_after_temp; 1688bf215546Sopenharmony_ci 1689bf215546Sopenharmony_ci do { 1690bf215546Sopenharmony_ci struct bi_tuple_state tuple_state = { 1691bf215546Sopenharmony_ci .last = (clause->tuple_count == 0), 1692bf215546Sopenharmony_ci .reg = reg_state, 1693bf215546Sopenharmony_ci .nr_prev_reads = nr_prev_reads, 1694bf215546Sopenharmony_ci .prev = tuple, 1695bf215546Sopenharmony_ci .pcrel_idx = ~0, 1696bf215546Sopenharmony_ci }; 1697bf215546Sopenharmony_ci 1698bf215546Sopenharmony_ci assert(nr_prev_reads < ARRAY_SIZE(prev_reads)); 1699bf215546Sopenharmony_ci memcpy(tuple_state.prev_reads, prev_reads, sizeof(prev_reads)); 1700bf215546Sopenharmony_ci 1701bf215546Sopenharmony_ci unsigned idx = max_tuples - clause->tuple_count - 1; 1702bf215546Sopenharmony_ci 1703bf215546Sopenharmony_ci tuple = &clause->tuples[idx]; 1704bf215546Sopenharmony_ci 1705bf215546Sopenharmony_ci if (clause->message && bi_opcode_props[clause->message->op].sr_read && !bi_is_null(clause->message->src[0])) { 1706bf215546Sopenharmony_ci unsigned nr = bi_count_read_registers(clause->message, 0); 1707bf215546Sopenharmony_ci live_after_temp |= (BITFIELD64_MASK(nr) << clause->message->src[0].value); 1708bf215546Sopenharmony_ci } 1709bf215546Sopenharmony_ci 1710bf215546Sopenharmony_ci /* Since we schedule backwards, we schedule ADD first */ 1711bf215546Sopenharmony_ci tuple_state.add = bi_take_instr(ctx, st, &clause_state, &tuple_state, live_after_temp, false); 1712bf215546Sopenharmony_ci tuple->fma = bi_take_instr(ctx, st, &clause_state, &tuple_state, live_after_temp, true); 1713bf215546Sopenharmony_ci tuple->add = tuple_state.add; 1714bf215546Sopenharmony_ci 1715bf215546Sopenharmony_ci /* Update liveness from the new instructions */ 1716bf215546Sopenharmony_ci if (tuple->add) 1717bf215546Sopenharmony_ci *live = bi_postra_liveness_ins(*live, tuple->add); 1718bf215546Sopenharmony_ci 1719bf215546Sopenharmony_ci if (tuple->fma) 1720bf215546Sopenharmony_ci *live = bi_postra_liveness_ins(*live, tuple->fma); 1721bf215546Sopenharmony_ci 1722bf215546Sopenharmony_ci /* Rotate in the new per-tuple liveness */ 1723bf215546Sopenharmony_ci live_after_temp = live_next_tuple; 1724bf215546Sopenharmony_ci live_next_tuple = *live; 1725bf215546Sopenharmony_ci 1726bf215546Sopenharmony_ci /* We may have a message, but only one per clause */ 1727bf215546Sopenharmony_ci if (tuple->add && bi_must_message(tuple->add)) { 1728bf215546Sopenharmony_ci assert(!clause_state.message); 1729bf215546Sopenharmony_ci clause_state.message = true; 1730bf215546Sopenharmony_ci 1731bf215546Sopenharmony_ci clause->message_type = 1732bf215546Sopenharmony_ci bi_message_type_for_instr(tuple->add); 1733bf215546Sopenharmony_ci clause->message = tuple->add; 1734bf215546Sopenharmony_ci 1735bf215546Sopenharmony_ci /* We don't need to set dependencies for blend shaders 1736bf215546Sopenharmony_ci * because the BLEND instruction in the fragment 1737bf215546Sopenharmony_ci * shader should have already done the wait */ 1738bf215546Sopenharmony_ci if (!ctx->inputs->is_blend) { 1739bf215546Sopenharmony_ci switch (tuple->add->op) { 1740bf215546Sopenharmony_ci case BI_OPCODE_ATEST: 1741bf215546Sopenharmony_ci clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH); 1742bf215546Sopenharmony_ci break; 1743bf215546Sopenharmony_ci case BI_OPCODE_LD_TILE: 1744bf215546Sopenharmony_ci case BI_OPCODE_ST_TILE: 1745bf215546Sopenharmony_ci clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR); 1746bf215546Sopenharmony_ci break; 1747bf215546Sopenharmony_ci case BI_OPCODE_BLEND: 1748bf215546Sopenharmony_ci clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_DEPTH); 1749bf215546Sopenharmony_ci clause->dependencies |= (1 << BIFROST_SLOT_ELDEST_COLOUR); 1750bf215546Sopenharmony_ci break; 1751bf215546Sopenharmony_ci default: 1752bf215546Sopenharmony_ci break; 1753bf215546Sopenharmony_ci } 1754bf215546Sopenharmony_ci } 1755bf215546Sopenharmony_ci } 1756bf215546Sopenharmony_ci 1757bf215546Sopenharmony_ci clause_state.consts[idx] = bi_get_const_state(&tuple_state); 1758bf215546Sopenharmony_ci 1759bf215546Sopenharmony_ci /* Before merging constants, eliminate zeroes, otherwise the 1760bf215546Sopenharmony_ci * merging will fight over the #0 that never gets read (and is 1761bf215546Sopenharmony_ci * never marked as read by update_fau) */ 1762bf215546Sopenharmony_ci if (tuple->fma && bi_reads_zero(tuple->fma)) 1763bf215546Sopenharmony_ci bi_rewrite_zero(tuple->fma, true); 1764bf215546Sopenharmony_ci 1765bf215546Sopenharmony_ci /* Rewrite away FAU, constant write is deferred */ 1766bf215546Sopenharmony_ci if (!tuple_state.constant_count) { 1767bf215546Sopenharmony_ci tuple->fau_idx = tuple_state.fau; 1768bf215546Sopenharmony_ci bi_rewrite_fau_to_pass(tuple); 1769bf215546Sopenharmony_ci } 1770bf215546Sopenharmony_ci 1771bf215546Sopenharmony_ci /* Use passthrough register for cross-stage accesses. Since 1772bf215546Sopenharmony_ci * there are just FMA and ADD stages, that means we rewrite to 1773bf215546Sopenharmony_ci * passthrough the sources of the ADD that read from the 1774bf215546Sopenharmony_ci * destination of the FMA */ 1775bf215546Sopenharmony_ci 1776bf215546Sopenharmony_ci if (tuple->fma) { 1777bf215546Sopenharmony_ci bi_use_passthrough(tuple->add, tuple->fma->dest[0], 1778bf215546Sopenharmony_ci BIFROST_SRC_STAGE, false); 1779bf215546Sopenharmony_ci } 1780bf215546Sopenharmony_ci 1781bf215546Sopenharmony_ci /* Don't add an empty tuple, unless the worklist has nothing 1782bf215546Sopenharmony_ci * but a (pseudo)instruction failing to schedule due to a "not 1783bf215546Sopenharmony_ci * last instruction" constraint */ 1784bf215546Sopenharmony_ci 1785bf215546Sopenharmony_ci int some_instruction = __bitset_ffs(st.worklist, BITSET_WORDS(st.count)); 1786bf215546Sopenharmony_ci bool not_last = (some_instruction > 0) && 1787bf215546Sopenharmony_ci bi_must_not_last(st.instructions[some_instruction - 1]); 1788bf215546Sopenharmony_ci 1789bf215546Sopenharmony_ci bool insert_empty = tuple_state.last && not_last; 1790bf215546Sopenharmony_ci 1791bf215546Sopenharmony_ci if (!(tuple->fma || tuple->add || insert_empty)) 1792bf215546Sopenharmony_ci break; 1793bf215546Sopenharmony_ci 1794bf215546Sopenharmony_ci clause->tuple_count++; 1795bf215546Sopenharmony_ci 1796bf215546Sopenharmony_ci /* Adding enough tuple might overflow constants */ 1797bf215546Sopenharmony_ci if (!bi_space_for_more_constants(&clause_state)) 1798bf215546Sopenharmony_ci break; 1799bf215546Sopenharmony_ci 1800bf215546Sopenharmony_ci#ifndef NDEBUG 1801bf215546Sopenharmony_ci /* Don't schedule more than 1 tuple if debugging */ 1802bf215546Sopenharmony_ci if ((bifrost_debug & BIFROST_DBG_NOSCHED) && !insert_empty) 1803bf215546Sopenharmony_ci break; 1804bf215546Sopenharmony_ci#endif 1805bf215546Sopenharmony_ci 1806bf215546Sopenharmony_ci /* Link through the register state */ 1807bf215546Sopenharmony_ci STATIC_ASSERT(sizeof(prev_reads) == sizeof(tuple_state.reg.reads)); 1808bf215546Sopenharmony_ci memcpy(prev_reads, tuple_state.reg.reads, sizeof(prev_reads)); 1809bf215546Sopenharmony_ci nr_prev_reads = tuple_state.reg.nr_reads; 1810bf215546Sopenharmony_ci clause_state.tuple_count++; 1811bf215546Sopenharmony_ci } while(clause->tuple_count < 8); 1812bf215546Sopenharmony_ci 1813bf215546Sopenharmony_ci /* Don't schedule an empty clause */ 1814bf215546Sopenharmony_ci if (!clause->tuple_count) 1815bf215546Sopenharmony_ci return NULL; 1816bf215546Sopenharmony_ci 1817bf215546Sopenharmony_ci /* Before merging, rewrite away any tuples that read only zero */ 1818bf215546Sopenharmony_ci for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) { 1819bf215546Sopenharmony_ci bi_tuple *tuple = &clause->tuples[i]; 1820bf215546Sopenharmony_ci struct bi_const_state *st = &clause_state.consts[i]; 1821bf215546Sopenharmony_ci 1822bf215546Sopenharmony_ci if (st->constant_count == 0 || st->constants[0] || st->constants[1] || st->pcrel) 1823bf215546Sopenharmony_ci continue; 1824bf215546Sopenharmony_ci 1825bf215546Sopenharmony_ci bi_foreach_instr_in_tuple(tuple, ins) 1826bf215546Sopenharmony_ci bi_rewrite_zero(ins, false); 1827bf215546Sopenharmony_ci 1828bf215546Sopenharmony_ci /* Constant has been demoted to FAU, so don't pack it separately */ 1829bf215546Sopenharmony_ci st->constant_count = 0; 1830bf215546Sopenharmony_ci 1831bf215546Sopenharmony_ci /* Default */ 1832bf215546Sopenharmony_ci assert(tuple->fau_idx == BIR_FAU_ZERO); 1833bf215546Sopenharmony_ci } 1834bf215546Sopenharmony_ci 1835bf215546Sopenharmony_ci uint64_t constant_pairs[8] = { 0 }; 1836bf215546Sopenharmony_ci unsigned pcrel_idx = ~0; 1837bf215546Sopenharmony_ci unsigned constant_words = 1838bf215546Sopenharmony_ci bi_merge_constants(clause_state.consts, constant_pairs, &pcrel_idx); 1839bf215546Sopenharmony_ci 1840bf215546Sopenharmony_ci constant_words = bi_apply_constant_modifiers(clause_state.consts, 1841bf215546Sopenharmony_ci constant_pairs, &pcrel_idx, clause->tuple_count, 1842bf215546Sopenharmony_ci constant_words); 1843bf215546Sopenharmony_ci 1844bf215546Sopenharmony_ci clause->pcrel_idx = pcrel_idx; 1845bf215546Sopenharmony_ci 1846bf215546Sopenharmony_ci for (unsigned i = max_tuples - clause->tuple_count; i < max_tuples; ++i) { 1847bf215546Sopenharmony_ci bi_tuple *tuple = &clause->tuples[i]; 1848bf215546Sopenharmony_ci 1849bf215546Sopenharmony_ci /* If no constants, leave FAU as it is, possibly defaulting to 0 */ 1850bf215546Sopenharmony_ci if (clause_state.consts[i].constant_count == 0) 1851bf215546Sopenharmony_ci continue; 1852bf215546Sopenharmony_ci 1853bf215546Sopenharmony_ci /* FAU is already handled */ 1854bf215546Sopenharmony_ci assert(!tuple->fau_idx); 1855bf215546Sopenharmony_ci 1856bf215546Sopenharmony_ci unsigned word_idx = clause_state.consts[i].word_idx; 1857bf215546Sopenharmony_ci assert(word_idx <= 8); 1858bf215546Sopenharmony_ci 1859bf215546Sopenharmony_ci /* We could try to merge regardless of bottom bits as well, but 1860bf215546Sopenharmony_ci * that's probably diminishing returns */ 1861bf215546Sopenharmony_ci uint64_t pair = constant_pairs[word_idx]; 1862bf215546Sopenharmony_ci unsigned lo = pair & 0xF; 1863bf215546Sopenharmony_ci 1864bf215546Sopenharmony_ci tuple->fau_idx = bi_constant_field(word_idx) | lo; 1865bf215546Sopenharmony_ci bi_rewrite_constants_to_pass(tuple, pair, word_idx == pcrel_idx); 1866bf215546Sopenharmony_ci } 1867bf215546Sopenharmony_ci 1868bf215546Sopenharmony_ci clause->constant_count = constant_words; 1869bf215546Sopenharmony_ci memcpy(clause->constants, constant_pairs, sizeof(constant_pairs)); 1870bf215546Sopenharmony_ci 1871bf215546Sopenharmony_ci /* Branches must be last, so this can be factored out */ 1872bf215546Sopenharmony_ci bi_instr *last = clause->tuples[max_tuples - 1].add; 1873bf215546Sopenharmony_ci clause->next_clause_prefetch = !last || (last->op != BI_OPCODE_JUMP); 1874bf215546Sopenharmony_ci clause->block = block; 1875bf215546Sopenharmony_ci 1876bf215546Sopenharmony_ci clause->ftz = (clause_state.ftz == BI_FTZ_STATE_ENABLE); 1877bf215546Sopenharmony_ci 1878bf215546Sopenharmony_ci /* We emit in reverse and emitted to the back of the tuples array, so 1879bf215546Sopenharmony_ci * move it up front for easy indexing */ 1880bf215546Sopenharmony_ci memmove(clause->tuples, 1881bf215546Sopenharmony_ci clause->tuples + (max_tuples - clause->tuple_count), 1882bf215546Sopenharmony_ci clause->tuple_count * sizeof(clause->tuples[0])); 1883bf215546Sopenharmony_ci 1884bf215546Sopenharmony_ci /* Use passthrough register for cross-tuple accesses. Note this is 1885bf215546Sopenharmony_ci * after the memmove, so this is forwards. Skip the first tuple since 1886bf215546Sopenharmony_ci * there is nothing before it to passthrough */ 1887bf215546Sopenharmony_ci 1888bf215546Sopenharmony_ci for (unsigned t = 1; t < clause->tuple_count; ++t) 1889bf215546Sopenharmony_ci bi_rewrite_passthrough(clause->tuples[t - 1], clause->tuples[t]); 1890bf215546Sopenharmony_ci 1891bf215546Sopenharmony_ci return clause; 1892bf215546Sopenharmony_ci} 1893bf215546Sopenharmony_ci 1894bf215546Sopenharmony_cistatic void 1895bf215546Sopenharmony_cibi_schedule_block(bi_context *ctx, bi_block *block) 1896bf215546Sopenharmony_ci{ 1897bf215546Sopenharmony_ci list_inithead(&block->clauses); 1898bf215546Sopenharmony_ci 1899bf215546Sopenharmony_ci /* Copy list to dynamic array */ 1900bf215546Sopenharmony_ci struct bi_worklist st = bi_initialize_worklist(block, 1901bf215546Sopenharmony_ci bifrost_debug & BIFROST_DBG_INORDER, 1902bf215546Sopenharmony_ci ctx->inputs->is_blend); 1903bf215546Sopenharmony_ci 1904bf215546Sopenharmony_ci if (!st.count) { 1905bf215546Sopenharmony_ci bi_free_worklist(st); 1906bf215546Sopenharmony_ci return; 1907bf215546Sopenharmony_ci } 1908bf215546Sopenharmony_ci 1909bf215546Sopenharmony_ci /* We need to track liveness during scheduling in order to determine whether we can use temporary (passthrough) registers */ 1910bf215546Sopenharmony_ci uint64_t live = block->reg_live_out; 1911bf215546Sopenharmony_ci 1912bf215546Sopenharmony_ci /* Schedule as many clauses as needed to fill the block */ 1913bf215546Sopenharmony_ci bi_clause *u = NULL; 1914bf215546Sopenharmony_ci while((u = bi_schedule_clause(ctx, block, st, &live))) 1915bf215546Sopenharmony_ci list_add(&u->link, &block->clauses); 1916bf215546Sopenharmony_ci 1917bf215546Sopenharmony_ci /* Back-to-back bit affects only the last clause of a block, 1918bf215546Sopenharmony_ci * the rest are implicitly true */ 1919bf215546Sopenharmony_ci if (!list_is_empty(&block->clauses)) { 1920bf215546Sopenharmony_ci bi_clause *last_clause = list_last_entry(&block->clauses, bi_clause, link); 1921bf215546Sopenharmony_ci if (bi_reconverge_branches(block)) 1922bf215546Sopenharmony_ci last_clause->flow_control = BIFROST_FLOW_NBTB_UNCONDITIONAL; 1923bf215546Sopenharmony_ci } 1924bf215546Sopenharmony_ci 1925bf215546Sopenharmony_ci /* Reorder instructions to match the new schedule. First remove 1926bf215546Sopenharmony_ci * existing instructions and then recreate the list */ 1927bf215546Sopenharmony_ci 1928bf215546Sopenharmony_ci bi_foreach_instr_in_block_safe(block, ins) { 1929bf215546Sopenharmony_ci list_del(&ins->link); 1930bf215546Sopenharmony_ci } 1931bf215546Sopenharmony_ci 1932bf215546Sopenharmony_ci bi_foreach_clause_in_block(block, clause) { 1933bf215546Sopenharmony_ci for (unsigned i = 0; i < clause->tuple_count; ++i) { 1934bf215546Sopenharmony_ci bi_foreach_instr_in_tuple(&clause->tuples[i], ins) { 1935bf215546Sopenharmony_ci list_addtail(&ins->link, &block->instructions); 1936bf215546Sopenharmony_ci } 1937bf215546Sopenharmony_ci } 1938bf215546Sopenharmony_ci } 1939bf215546Sopenharmony_ci 1940bf215546Sopenharmony_ci block->scheduled = true; 1941bf215546Sopenharmony_ci 1942bf215546Sopenharmony_ci#ifndef NDEBUG 1943bf215546Sopenharmony_ci unsigned i; 1944bf215546Sopenharmony_ci bool incomplete = false; 1945bf215546Sopenharmony_ci 1946bf215546Sopenharmony_ci BITSET_FOREACH_SET(i, st.worklist, st.count) { 1947bf215546Sopenharmony_ci bi_print_instr(st.instructions[i], stderr); 1948bf215546Sopenharmony_ci incomplete = true; 1949bf215546Sopenharmony_ci } 1950bf215546Sopenharmony_ci 1951bf215546Sopenharmony_ci if (incomplete) 1952bf215546Sopenharmony_ci unreachable("The above instructions failed to schedule."); 1953bf215546Sopenharmony_ci#endif 1954bf215546Sopenharmony_ci 1955bf215546Sopenharmony_ci bi_free_worklist(st); 1956bf215546Sopenharmony_ci} 1957bf215546Sopenharmony_ci 1958bf215546Sopenharmony_cistatic bool 1959bf215546Sopenharmony_cibi_check_fau_src(bi_instr *ins, unsigned s, uint32_t *constants, unsigned *cwords, bi_index *fau) 1960bf215546Sopenharmony_ci{ 1961bf215546Sopenharmony_ci bi_index src = ins->src[s]; 1962bf215546Sopenharmony_ci 1963bf215546Sopenharmony_ci /* Staging registers can't have FAU accesses */ 1964bf215546Sopenharmony_ci if (bi_is_staging_src(ins, s)) 1965bf215546Sopenharmony_ci return (src.type != BI_INDEX_CONSTANT) && (src.type != BI_INDEX_FAU); 1966bf215546Sopenharmony_ci 1967bf215546Sopenharmony_ci if (src.type == BI_INDEX_CONSTANT) { 1968bf215546Sopenharmony_ci /* Allow fast zero */ 1969bf215546Sopenharmony_ci if (src.value == 0 && bi_opcode_props[ins->op].fma && bi_reads_zero(ins)) 1970bf215546Sopenharmony_ci return true; 1971bf215546Sopenharmony_ci 1972bf215546Sopenharmony_ci if (!bi_is_null(*fau)) 1973bf215546Sopenharmony_ci return false; 1974bf215546Sopenharmony_ci 1975bf215546Sopenharmony_ci /* Else, try to inline a constant */ 1976bf215546Sopenharmony_ci for (unsigned i = 0; i < *cwords; ++i) { 1977bf215546Sopenharmony_ci if (src.value == constants[i]) 1978bf215546Sopenharmony_ci return true; 1979bf215546Sopenharmony_ci } 1980bf215546Sopenharmony_ci 1981bf215546Sopenharmony_ci if (*cwords >= 2) 1982bf215546Sopenharmony_ci return false; 1983bf215546Sopenharmony_ci 1984bf215546Sopenharmony_ci constants[(*cwords)++] = src.value; 1985bf215546Sopenharmony_ci } else if (src.type == BI_INDEX_FAU) { 1986bf215546Sopenharmony_ci if (*cwords != 0) 1987bf215546Sopenharmony_ci return false; 1988bf215546Sopenharmony_ci 1989bf215546Sopenharmony_ci /* Can only read from one pair of FAU words */ 1990bf215546Sopenharmony_ci if (!bi_is_null(*fau) && (src.value != fau->value)) 1991bf215546Sopenharmony_ci return false; 1992bf215546Sopenharmony_ci 1993bf215546Sopenharmony_ci /* If there is a target, we'll need a PC-relative constant */ 1994bf215546Sopenharmony_ci if (ins->branch_target) 1995bf215546Sopenharmony_ci return false; 1996bf215546Sopenharmony_ci 1997bf215546Sopenharmony_ci *fau = src; 1998bf215546Sopenharmony_ci } 1999bf215546Sopenharmony_ci 2000bf215546Sopenharmony_ci return true; 2001bf215546Sopenharmony_ci} 2002bf215546Sopenharmony_ci 2003bf215546Sopenharmony_civoid 2004bf215546Sopenharmony_cibi_lower_fau(bi_context *ctx) 2005bf215546Sopenharmony_ci{ 2006bf215546Sopenharmony_ci bi_foreach_instr_global_safe(ctx, ins) { 2007bf215546Sopenharmony_ci bi_builder b = bi_init_builder(ctx, bi_before_instr(ins)); 2008bf215546Sopenharmony_ci 2009bf215546Sopenharmony_ci uint32_t constants[2]; 2010bf215546Sopenharmony_ci unsigned cwords = 0; 2011bf215546Sopenharmony_ci bi_index fau = bi_null(); 2012bf215546Sopenharmony_ci 2013bf215546Sopenharmony_ci /* ATEST must have the ATEST datum encoded, not any other 2014bf215546Sopenharmony_ci * uniform. See to it this is the case. */ 2015bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_ATEST) 2016bf215546Sopenharmony_ci fau = ins->src[2]; 2017bf215546Sopenharmony_ci 2018bf215546Sopenharmony_ci /* Dual texturing requires the texture operation descriptor 2019bf215546Sopenharmony_ci * encoded as an immediate so we can fix up. 2020bf215546Sopenharmony_ci */ 2021bf215546Sopenharmony_ci if (ins->op == BI_OPCODE_TEXC) { 2022bf215546Sopenharmony_ci assert(ins->src[3].type == BI_INDEX_CONSTANT); 2023bf215546Sopenharmony_ci constants[cwords++] = ins->src[3].value; 2024bf215546Sopenharmony_ci } 2025bf215546Sopenharmony_ci 2026bf215546Sopenharmony_ci bi_foreach_src(ins, s) { 2027bf215546Sopenharmony_ci if (bi_check_fau_src(ins, s, constants, &cwords, &fau)) continue; 2028bf215546Sopenharmony_ci 2029bf215546Sopenharmony_ci bi_index copy = bi_mov_i32(&b, ins->src[s]); 2030bf215546Sopenharmony_ci ins->src[s] = bi_replace_index(ins->src[s], copy); 2031bf215546Sopenharmony_ci } 2032bf215546Sopenharmony_ci } 2033bf215546Sopenharmony_ci} 2034bf215546Sopenharmony_ci 2035bf215546Sopenharmony_ci/* Only v7 allows specifying a dependency on the tilebuffer for the first 2036bf215546Sopenharmony_ci * clause of a shader. v6 requires adding a NOP clause with the depedency. */ 2037bf215546Sopenharmony_ci 2038bf215546Sopenharmony_cistatic void 2039bf215546Sopenharmony_cibi_add_nop_for_atest(bi_context *ctx) 2040bf215546Sopenharmony_ci{ 2041bf215546Sopenharmony_ci /* Only needed on v6 */ 2042bf215546Sopenharmony_ci if (ctx->arch >= 7) 2043bf215546Sopenharmony_ci return; 2044bf215546Sopenharmony_ci 2045bf215546Sopenharmony_ci if (list_is_empty(&ctx->blocks)) 2046bf215546Sopenharmony_ci return; 2047bf215546Sopenharmony_ci 2048bf215546Sopenharmony_ci /* Fetch the first clause of the shader */ 2049bf215546Sopenharmony_ci bi_block *block = list_first_entry(&ctx->blocks, bi_block, link); 2050bf215546Sopenharmony_ci bi_clause *clause = bi_next_clause(ctx, block, NULL); 2051bf215546Sopenharmony_ci 2052bf215546Sopenharmony_ci if (!clause || !(clause->dependencies & ((1 << BIFROST_SLOT_ELDEST_DEPTH) | 2053bf215546Sopenharmony_ci (1 << BIFROST_SLOT_ELDEST_COLOUR)))) 2054bf215546Sopenharmony_ci return; 2055bf215546Sopenharmony_ci 2056bf215546Sopenharmony_ci /* Add a NOP so we can wait for the dependencies required by the first 2057bf215546Sopenharmony_ci * clause */ 2058bf215546Sopenharmony_ci 2059bf215546Sopenharmony_ci bi_instr *I = rzalloc(ctx, bi_instr); 2060bf215546Sopenharmony_ci I->op = BI_OPCODE_NOP; 2061bf215546Sopenharmony_ci I->dest[0] = bi_null(); 2062bf215546Sopenharmony_ci 2063bf215546Sopenharmony_ci bi_clause *new_clause = ralloc(ctx, bi_clause); 2064bf215546Sopenharmony_ci *new_clause = (bi_clause) { 2065bf215546Sopenharmony_ci .flow_control = BIFROST_FLOW_NBTB, 2066bf215546Sopenharmony_ci .next_clause_prefetch = true, 2067bf215546Sopenharmony_ci .block = clause->block, 2068bf215546Sopenharmony_ci 2069bf215546Sopenharmony_ci .tuple_count = 1, 2070bf215546Sopenharmony_ci .tuples[0] = { .fma = I, }, 2071bf215546Sopenharmony_ci }; 2072bf215546Sopenharmony_ci 2073bf215546Sopenharmony_ci list_add(&new_clause->link, &clause->block->clauses); 2074bf215546Sopenharmony_ci} 2075bf215546Sopenharmony_ci 2076bf215546Sopenharmony_civoid 2077bf215546Sopenharmony_cibi_schedule(bi_context *ctx) 2078bf215546Sopenharmony_ci{ 2079bf215546Sopenharmony_ci /* Fed into both scheduling and DCE */ 2080bf215546Sopenharmony_ci bi_postra_liveness(ctx); 2081bf215546Sopenharmony_ci 2082bf215546Sopenharmony_ci bi_foreach_block(ctx, block) { 2083bf215546Sopenharmony_ci bi_schedule_block(ctx, block); 2084bf215546Sopenharmony_ci } 2085bf215546Sopenharmony_ci 2086bf215546Sopenharmony_ci bi_opt_dce_post_ra(ctx); 2087bf215546Sopenharmony_ci bi_add_nop_for_atest(ctx); 2088bf215546Sopenharmony_ci} 2089