1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2020 Collabora, Ltd. 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci#include "compiler.h" 25bf215546Sopenharmony_ci#include "bi_quirks.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci/* This file contains the final passes of the compiler. Running after 28bf215546Sopenharmony_ci * scheduling and RA, the IR is now finalized, so we need to emit it to actual 29bf215546Sopenharmony_ci * bits on the wire (as well as fixup branches) */ 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cistatic uint64_t 32bf215546Sopenharmony_cibi_pack_header(bi_clause *clause, bi_clause *next_1, bi_clause *next_2) 33bf215546Sopenharmony_ci{ 34bf215546Sopenharmony_ci /* next_dependencies are the union of the dependencies of successors' 35bf215546Sopenharmony_ci * dependencies */ 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci unsigned dependency_wait = next_1 ? next_1->dependencies : 0; 38bf215546Sopenharmony_ci dependency_wait |= next_2 ? next_2->dependencies : 0; 39bf215546Sopenharmony_ci 40bf215546Sopenharmony_ci /* Signal barriers (slot #7) immediately. This is not optimal but good 41bf215546Sopenharmony_ci * enough. Doing better requires extending the IR and scheduler. 42bf215546Sopenharmony_ci */ 43bf215546Sopenharmony_ci if (clause->message_type == BIFROST_MESSAGE_BARRIER) 44bf215546Sopenharmony_ci dependency_wait |= BITFIELD_BIT(7); 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci bool staging_barrier = next_1 ? next_1->staging_barrier : false; 47bf215546Sopenharmony_ci staging_barrier |= next_2 ? next_2->staging_barrier : 0; 48bf215546Sopenharmony_ci 49bf215546Sopenharmony_ci struct bifrost_header header = { 50bf215546Sopenharmony_ci .flow_control = 51bf215546Sopenharmony_ci (next_1 == NULL && next_2 == NULL) ? 52bf215546Sopenharmony_ci BIFROST_FLOW_END : clause->flow_control, 53bf215546Sopenharmony_ci .terminate_discarded_threads = clause->td, 54bf215546Sopenharmony_ci .next_clause_prefetch = clause->next_clause_prefetch && next_1, 55bf215546Sopenharmony_ci .staging_barrier = staging_barrier, 56bf215546Sopenharmony_ci .staging_register = clause->staging_register, 57bf215546Sopenharmony_ci .dependency_wait = dependency_wait, 58bf215546Sopenharmony_ci .dependency_slot = clause->scoreboard_id, 59bf215546Sopenharmony_ci .message_type = clause->message_type, 60bf215546Sopenharmony_ci .next_message_type = next_1 ? next_1->message_type : 0, 61bf215546Sopenharmony_ci .flush_to_zero = clause->ftz ? BIFROST_FTZ_ALWAYS : BIFROST_FTZ_DISABLE 62bf215546Sopenharmony_ci }; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_ci uint64_t u = 0; 65bf215546Sopenharmony_ci memcpy(&u, &header, sizeof(header)); 66bf215546Sopenharmony_ci return u; 67bf215546Sopenharmony_ci} 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci/* Assigns a slot for reading, before anything is written */ 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_cistatic void 72bf215546Sopenharmony_cibi_assign_slot_read(bi_registers *regs, bi_index src) 73bf215546Sopenharmony_ci{ 74bf215546Sopenharmony_ci /* We only assign for registers */ 75bf215546Sopenharmony_ci if (src.type != BI_INDEX_REGISTER) 76bf215546Sopenharmony_ci return; 77bf215546Sopenharmony_ci 78bf215546Sopenharmony_ci /* Check if we already assigned the slot */ 79bf215546Sopenharmony_ci for (unsigned i = 0; i <= 1; ++i) { 80bf215546Sopenharmony_ci if (regs->slot[i] == src.value && regs->enabled[i]) 81bf215546Sopenharmony_ci return; 82bf215546Sopenharmony_ci } 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci if (regs->slot[2] == src.value && regs->slot23.slot2 == BIFROST_OP_READ) 85bf215546Sopenharmony_ci return; 86bf215546Sopenharmony_ci 87bf215546Sopenharmony_ci /* Assign it now */ 88bf215546Sopenharmony_ci 89bf215546Sopenharmony_ci for (unsigned i = 0; i <= 1; ++i) { 90bf215546Sopenharmony_ci if (!regs->enabled[i]) { 91bf215546Sopenharmony_ci regs->slot[i] = src.value; 92bf215546Sopenharmony_ci regs->enabled[i] = true; 93bf215546Sopenharmony_ci return; 94bf215546Sopenharmony_ci } 95bf215546Sopenharmony_ci } 96bf215546Sopenharmony_ci 97bf215546Sopenharmony_ci if (!regs->slot23.slot3) { 98bf215546Sopenharmony_ci regs->slot[2] = src.value; 99bf215546Sopenharmony_ci regs->slot23.slot2 = BIFROST_OP_READ; 100bf215546Sopenharmony_ci return; 101bf215546Sopenharmony_ci } 102bf215546Sopenharmony_ci 103bf215546Sopenharmony_ci bi_print_slots(regs, stderr); 104bf215546Sopenharmony_ci unreachable("Failed to find a free slot for src"); 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic bi_registers 108bf215546Sopenharmony_cibi_assign_slots(bi_tuple *now, bi_tuple *prev) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci /* We assign slots for the main register mechanism. Special ops 111bf215546Sopenharmony_ci * use the data registers, which has its own mechanism entirely 112bf215546Sopenharmony_ci * and thus gets skipped over here. */ 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci bool read_dreg = now->add && bi_opcode_props[now->add->op].sr_read; 115bf215546Sopenharmony_ci bool write_dreg = prev->add && bi_opcode_props[prev->add->op].sr_write; 116bf215546Sopenharmony_ci 117bf215546Sopenharmony_ci /* First, assign reads */ 118bf215546Sopenharmony_ci 119bf215546Sopenharmony_ci if (now->fma) 120bf215546Sopenharmony_ci bi_foreach_src(now->fma, src) 121bf215546Sopenharmony_ci bi_assign_slot_read(&now->regs, (now->fma)->src[src]); 122bf215546Sopenharmony_ci 123bf215546Sopenharmony_ci if (now->add) { 124bf215546Sopenharmony_ci bi_foreach_src(now->add, src) { 125bf215546Sopenharmony_ci /* This is not a real source, we shouldn't assign a 126bf215546Sopenharmony_ci * slot for it. 127bf215546Sopenharmony_ci */ 128bf215546Sopenharmony_ci if (now->add->op == BI_OPCODE_BLEND && src == 4) 129bf215546Sopenharmony_ci continue; 130bf215546Sopenharmony_ci 131bf215546Sopenharmony_ci if (!(src == 0 && read_dreg)) 132bf215546Sopenharmony_ci bi_assign_slot_read(&now->regs, (now->add)->src[src]); 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci } 135bf215546Sopenharmony_ci 136bf215546Sopenharmony_ci /* Next, assign writes. Staging writes are assigned separately, but 137bf215546Sopenharmony_ci * +ATEST wants its destination written to both a staging register 138bf215546Sopenharmony_ci * _and_ a regular write, because it may not generate a message */ 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci if (prev->add && (!write_dreg || prev->add->op == BI_OPCODE_ATEST)) { 141bf215546Sopenharmony_ci bi_index idx = prev->add->dest[0]; 142bf215546Sopenharmony_ci 143bf215546Sopenharmony_ci if (idx.type == BI_INDEX_REGISTER) { 144bf215546Sopenharmony_ci now->regs.slot[3] = idx.value; 145bf215546Sopenharmony_ci now->regs.slot23.slot3 = BIFROST_OP_WRITE; 146bf215546Sopenharmony_ci } 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci if (prev->fma) { 150bf215546Sopenharmony_ci bi_index idx = (prev->fma)->dest[0]; 151bf215546Sopenharmony_ci 152bf215546Sopenharmony_ci if (idx.type == BI_INDEX_REGISTER) { 153bf215546Sopenharmony_ci if (now->regs.slot23.slot3) { 154bf215546Sopenharmony_ci /* Scheduler constraint: cannot read 3 and write 2 */ 155bf215546Sopenharmony_ci assert(!now->regs.slot23.slot2); 156bf215546Sopenharmony_ci now->regs.slot[2] = idx.value; 157bf215546Sopenharmony_ci now->regs.slot23.slot2 = BIFROST_OP_WRITE; 158bf215546Sopenharmony_ci } else { 159bf215546Sopenharmony_ci now->regs.slot[3] = idx.value; 160bf215546Sopenharmony_ci now->regs.slot23.slot3 = BIFROST_OP_WRITE; 161bf215546Sopenharmony_ci now->regs.slot23.slot3_fma = true; 162bf215546Sopenharmony_ci } 163bf215546Sopenharmony_ci } 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci return now->regs; 167bf215546Sopenharmony_ci} 168bf215546Sopenharmony_ci 169bf215546Sopenharmony_cistatic enum bifrost_reg_mode 170bf215546Sopenharmony_cibi_pack_register_mode(bi_registers r) 171bf215546Sopenharmony_ci{ 172bf215546Sopenharmony_ci /* Handle idle as a special case */ 173bf215546Sopenharmony_ci if (!(r.slot23.slot2 | r.slot23.slot3)) 174bf215546Sopenharmony_ci return r.first_instruction ? BIFROST_IDLE_1 : BIFROST_IDLE; 175bf215546Sopenharmony_ci 176bf215546Sopenharmony_ci /* Otherwise, use the LUT */ 177bf215546Sopenharmony_ci for (unsigned i = 0; i < ARRAY_SIZE(bifrost_reg_ctrl_lut); ++i) { 178bf215546Sopenharmony_ci if (memcmp(bifrost_reg_ctrl_lut + i, &r.slot23, sizeof(r.slot23)) == 0) 179bf215546Sopenharmony_ci return i; 180bf215546Sopenharmony_ci } 181bf215546Sopenharmony_ci 182bf215546Sopenharmony_ci bi_print_slots(&r, stderr); 183bf215546Sopenharmony_ci unreachable("Invalid slot assignment"); 184bf215546Sopenharmony_ci} 185bf215546Sopenharmony_ci 186bf215546Sopenharmony_cistatic uint64_t 187bf215546Sopenharmony_cibi_pack_registers(bi_registers regs) 188bf215546Sopenharmony_ci{ 189bf215546Sopenharmony_ci enum bifrost_reg_mode mode = bi_pack_register_mode(regs); 190bf215546Sopenharmony_ci struct bifrost_regs s = { 0 }; 191bf215546Sopenharmony_ci uint64_t packed = 0; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci /* Need to pack 5-bit mode as a 4-bit field. The decoder moves bit 3 to bit 4 for 194bf215546Sopenharmony_ci * first instruction and adds 16 when reg 2 == reg 3 */ 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci unsigned ctrl; 197bf215546Sopenharmony_ci bool r2_equals_r3 = false; 198bf215546Sopenharmony_ci 199bf215546Sopenharmony_ci if (regs.first_instruction) { 200bf215546Sopenharmony_ci /* Bit 3 implicitly must be clear for first instructions. 201bf215546Sopenharmony_ci * The affected patterns all write both ADD/FMA, but that 202bf215546Sopenharmony_ci * is forbidden for the last instruction (whose writes are 203bf215546Sopenharmony_ci * encoded by the first), so this does not add additional 204bf215546Sopenharmony_ci * encoding constraints */ 205bf215546Sopenharmony_ci assert(!(mode & 0x8)); 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci /* Move bit 4 to bit 3, since bit 3 is clear */ 208bf215546Sopenharmony_ci ctrl = (mode & 0x7) | ((mode & 0x10) >> 1); 209bf215546Sopenharmony_ci 210bf215546Sopenharmony_ci /* If we can let r2 equal r3, we have to or the hardware raises 211bf215546Sopenharmony_ci * INSTR_INVALID_ENC (it's unclear why). */ 212bf215546Sopenharmony_ci if (!(regs.slot23.slot2 && regs.slot23.slot3)) 213bf215546Sopenharmony_ci r2_equals_r3 = true; 214bf215546Sopenharmony_ci } else { 215bf215546Sopenharmony_ci /* We force r2=r3 or not for the upper bit */ 216bf215546Sopenharmony_ci ctrl = (mode & 0xF); 217bf215546Sopenharmony_ci r2_equals_r3 = (mode & 0x10); 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci if (regs.enabled[1]) { 221bf215546Sopenharmony_ci /* Gotta save that bit!~ Required by the 63-x trick */ 222bf215546Sopenharmony_ci assert(regs.slot[1] > regs.slot[0]); 223bf215546Sopenharmony_ci assert(regs.enabled[0]); 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci /* Do the 63-x trick, see docs/disasm */ 226bf215546Sopenharmony_ci if (regs.slot[0] > 31) { 227bf215546Sopenharmony_ci regs.slot[0] = 63 - regs.slot[0]; 228bf215546Sopenharmony_ci regs.slot[1] = 63 - regs.slot[1]; 229bf215546Sopenharmony_ci } 230bf215546Sopenharmony_ci 231bf215546Sopenharmony_ci assert(regs.slot[0] <= 31); 232bf215546Sopenharmony_ci assert(regs.slot[1] <= 63); 233bf215546Sopenharmony_ci 234bf215546Sopenharmony_ci s.ctrl = ctrl; 235bf215546Sopenharmony_ci s.reg1 = regs.slot[1]; 236bf215546Sopenharmony_ci s.reg0 = regs.slot[0]; 237bf215546Sopenharmony_ci } else { 238bf215546Sopenharmony_ci /* slot 1 disabled, so set to zero and use slot 1 for ctrl */ 239bf215546Sopenharmony_ci s.ctrl = 0; 240bf215546Sopenharmony_ci s.reg1 = ctrl << 2; 241bf215546Sopenharmony_ci 242bf215546Sopenharmony_ci if (regs.enabled[0]) { 243bf215546Sopenharmony_ci /* Bit 0 upper bit of slot 0 */ 244bf215546Sopenharmony_ci s.reg1 |= (regs.slot[0] >> 5); 245bf215546Sopenharmony_ci 246bf215546Sopenharmony_ci /* Rest of slot 0 in usual spot */ 247bf215546Sopenharmony_ci s.reg0 = (regs.slot[0] & 0b11111); 248bf215546Sopenharmony_ci } else { 249bf215546Sopenharmony_ci /* Bit 1 set if slot 0 also disabled */ 250bf215546Sopenharmony_ci s.reg1 |= (1 << 1); 251bf215546Sopenharmony_ci } 252bf215546Sopenharmony_ci } 253bf215546Sopenharmony_ci 254bf215546Sopenharmony_ci /* Force r2 =/!= r3 as needed */ 255bf215546Sopenharmony_ci if (r2_equals_r3) { 256bf215546Sopenharmony_ci assert(regs.slot[3] == regs.slot[2] || !(regs.slot23.slot2 && regs.slot23.slot3)); 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_ci if (regs.slot23.slot2) 259bf215546Sopenharmony_ci regs.slot[3] = regs.slot[2]; 260bf215546Sopenharmony_ci else 261bf215546Sopenharmony_ci regs.slot[2] = regs.slot[3]; 262bf215546Sopenharmony_ci } else if (!regs.first_instruction) { 263bf215546Sopenharmony_ci /* Enforced by the encoding anyway */ 264bf215546Sopenharmony_ci assert(regs.slot[2] != regs.slot[3]); 265bf215546Sopenharmony_ci } 266bf215546Sopenharmony_ci 267bf215546Sopenharmony_ci s.reg2 = regs.slot[2]; 268bf215546Sopenharmony_ci s.reg3 = regs.slot[3]; 269bf215546Sopenharmony_ci s.fau_idx = regs.fau_idx; 270bf215546Sopenharmony_ci 271bf215546Sopenharmony_ci memcpy(&packed, &s, sizeof(s)); 272bf215546Sopenharmony_ci return packed; 273bf215546Sopenharmony_ci} 274bf215546Sopenharmony_ci 275bf215546Sopenharmony_ci/* We must ensure slot 1 > slot 0 for the 63-x trick to function, so we fix 276bf215546Sopenharmony_ci * this up at pack time. (Scheduling doesn't care.) */ 277bf215546Sopenharmony_ci 278bf215546Sopenharmony_cistatic void 279bf215546Sopenharmony_cibi_flip_slots(bi_registers *regs) 280bf215546Sopenharmony_ci{ 281bf215546Sopenharmony_ci if (regs->enabled[0] && regs->enabled[1] && regs->slot[1] < regs->slot[0]) { 282bf215546Sopenharmony_ci unsigned temp = regs->slot[0]; 283bf215546Sopenharmony_ci regs->slot[0] = regs->slot[1]; 284bf215546Sopenharmony_ci regs->slot[1] = temp; 285bf215546Sopenharmony_ci } 286bf215546Sopenharmony_ci 287bf215546Sopenharmony_ci} 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_cistatic inline enum bifrost_packed_src 290bf215546Sopenharmony_cibi_get_src_slot(bi_registers *regs, unsigned reg) 291bf215546Sopenharmony_ci{ 292bf215546Sopenharmony_ci if (regs->slot[0] == reg && regs->enabled[0]) 293bf215546Sopenharmony_ci return BIFROST_SRC_PORT0; 294bf215546Sopenharmony_ci else if (regs->slot[1] == reg && regs->enabled[1]) 295bf215546Sopenharmony_ci return BIFROST_SRC_PORT1; 296bf215546Sopenharmony_ci else if (regs->slot[2] == reg && regs->slot23.slot2 == BIFROST_OP_READ) 297bf215546Sopenharmony_ci return BIFROST_SRC_PORT2; 298bf215546Sopenharmony_ci else 299bf215546Sopenharmony_ci unreachable("Tried to access register with no port"); 300bf215546Sopenharmony_ci} 301bf215546Sopenharmony_ci 302bf215546Sopenharmony_cistatic inline enum bifrost_packed_src 303bf215546Sopenharmony_cibi_get_src_new(bi_instr *ins, bi_registers *regs, unsigned s) 304bf215546Sopenharmony_ci{ 305bf215546Sopenharmony_ci if (!ins) 306bf215546Sopenharmony_ci return 0; 307bf215546Sopenharmony_ci 308bf215546Sopenharmony_ci bi_index src = ins->src[s]; 309bf215546Sopenharmony_ci 310bf215546Sopenharmony_ci if (src.type == BI_INDEX_REGISTER) 311bf215546Sopenharmony_ci return bi_get_src_slot(regs, src.value); 312bf215546Sopenharmony_ci else if (src.type == BI_INDEX_PASS) 313bf215546Sopenharmony_ci return src.value; 314bf215546Sopenharmony_ci else { 315bf215546Sopenharmony_ci /* TODO make safer */ 316bf215546Sopenharmony_ci return BIFROST_SRC_STAGE; 317bf215546Sopenharmony_ci } 318bf215546Sopenharmony_ci} 319bf215546Sopenharmony_ci 320bf215546Sopenharmony_cistatic struct bi_packed_tuple 321bf215546Sopenharmony_cibi_pack_tuple(bi_clause *clause, bi_tuple *tuple, bi_tuple *prev, bool first_tuple, gl_shader_stage stage) 322bf215546Sopenharmony_ci{ 323bf215546Sopenharmony_ci bi_assign_slots(tuple, prev); 324bf215546Sopenharmony_ci tuple->regs.fau_idx = tuple->fau_idx; 325bf215546Sopenharmony_ci tuple->regs.first_instruction = first_tuple; 326bf215546Sopenharmony_ci 327bf215546Sopenharmony_ci bi_flip_slots(&tuple->regs); 328bf215546Sopenharmony_ci 329bf215546Sopenharmony_ci bool sr_read = tuple->add && 330bf215546Sopenharmony_ci bi_opcode_props[(tuple->add)->op].sr_read; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci uint64_t reg = bi_pack_registers(tuple->regs); 333bf215546Sopenharmony_ci uint64_t fma = bi_pack_fma(tuple->fma, 334bf215546Sopenharmony_ci bi_get_src_new(tuple->fma, &tuple->regs, 0), 335bf215546Sopenharmony_ci bi_get_src_new(tuple->fma, &tuple->regs, 1), 336bf215546Sopenharmony_ci bi_get_src_new(tuple->fma, &tuple->regs, 2), 337bf215546Sopenharmony_ci bi_get_src_new(tuple->fma, &tuple->regs, 3)); 338bf215546Sopenharmony_ci 339bf215546Sopenharmony_ci uint64_t add = bi_pack_add(tuple->add, 340bf215546Sopenharmony_ci bi_get_src_new(tuple->add, &tuple->regs, sr_read + 0), 341bf215546Sopenharmony_ci bi_get_src_new(tuple->add, &tuple->regs, sr_read + 1), 342bf215546Sopenharmony_ci bi_get_src_new(tuple->add, &tuple->regs, sr_read + 2), 343bf215546Sopenharmony_ci 0); 344bf215546Sopenharmony_ci 345bf215546Sopenharmony_ci if (tuple->add) { 346bf215546Sopenharmony_ci bi_instr *add = tuple->add; 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci bool sr_write = bi_opcode_props[add->op].sr_write && 349bf215546Sopenharmony_ci !bi_is_null(add->dest[0]); 350bf215546Sopenharmony_ci 351bf215546Sopenharmony_ci if (sr_read && !bi_is_null(add->src[0])) { 352bf215546Sopenharmony_ci assert(add->src[0].type == BI_INDEX_REGISTER); 353bf215546Sopenharmony_ci clause->staging_register = add->src[0].value; 354bf215546Sopenharmony_ci 355bf215546Sopenharmony_ci if (sr_write) 356bf215546Sopenharmony_ci assert(bi_is_equiv(add->src[0], add->dest[0])); 357bf215546Sopenharmony_ci } else if (sr_write) { 358bf215546Sopenharmony_ci assert(add->dest[0].type == BI_INDEX_REGISTER); 359bf215546Sopenharmony_ci clause->staging_register = add->dest[0].value; 360bf215546Sopenharmony_ci } 361bf215546Sopenharmony_ci } 362bf215546Sopenharmony_ci 363bf215546Sopenharmony_ci struct bi_packed_tuple packed = { 364bf215546Sopenharmony_ci .lo = reg | (fma << 35) | ((add & 0b111111) << 58), 365bf215546Sopenharmony_ci .hi = add >> 6 366bf215546Sopenharmony_ci }; 367bf215546Sopenharmony_ci 368bf215546Sopenharmony_ci return packed; 369bf215546Sopenharmony_ci} 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci/* A block contains at most one PC-relative constant, from a terminal branch. 372bf215546Sopenharmony_ci * Find the last instruction and if it is a relative branch, fix up the 373bf215546Sopenharmony_ci * PC-relative constant to contain the absolute offset. This occurs at pack 374bf215546Sopenharmony_ci * time instead of schedule time because the number of quadwords between each 375bf215546Sopenharmony_ci * block is not known until after all other passes have finished. 376bf215546Sopenharmony_ci */ 377bf215546Sopenharmony_ci 378bf215546Sopenharmony_cistatic void 379bf215546Sopenharmony_cibi_assign_branch_offset(bi_context *ctx, bi_block *block) 380bf215546Sopenharmony_ci{ 381bf215546Sopenharmony_ci if (list_is_empty(&block->clauses)) 382bf215546Sopenharmony_ci return; 383bf215546Sopenharmony_ci 384bf215546Sopenharmony_ci bi_clause *clause = list_last_entry(&block->clauses, bi_clause, link); 385bf215546Sopenharmony_ci bi_instr *br = bi_last_instr_in_clause(clause); 386bf215546Sopenharmony_ci 387bf215546Sopenharmony_ci if (!br->branch_target) 388bf215546Sopenharmony_ci return; 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci /* Put it in the high place */ 391bf215546Sopenharmony_ci int32_t qwords = bi_block_offset(ctx, clause, br->branch_target); 392bf215546Sopenharmony_ci int32_t bytes = qwords * 16; 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci /* Copy so we can toy with the sign without undefined behaviour */ 395bf215546Sopenharmony_ci uint32_t raw = 0; 396bf215546Sopenharmony_ci memcpy(&raw, &bytes, sizeof(raw)); 397bf215546Sopenharmony_ci 398bf215546Sopenharmony_ci /* Clear off top bits for A1/B1 bits */ 399bf215546Sopenharmony_ci raw &= ~0xF0000000; 400bf215546Sopenharmony_ci 401bf215546Sopenharmony_ci /* Put in top 32-bits */ 402bf215546Sopenharmony_ci assert(clause->pcrel_idx < 8); 403bf215546Sopenharmony_ci clause->constants[clause->pcrel_idx] |= ((uint64_t) raw) << 32ull; 404bf215546Sopenharmony_ci} 405bf215546Sopenharmony_ci 406bf215546Sopenharmony_cistatic void 407bf215546Sopenharmony_cibi_pack_constants(unsigned tuple_count, uint64_t *constants, 408bf215546Sopenharmony_ci unsigned word_idx, unsigned constant_words, bool ec0_packed, 409bf215546Sopenharmony_ci struct util_dynarray *emission) 410bf215546Sopenharmony_ci{ 411bf215546Sopenharmony_ci unsigned index = (word_idx << 1) + ec0_packed; 412bf215546Sopenharmony_ci 413bf215546Sopenharmony_ci /* Do more constants follow */ 414bf215546Sopenharmony_ci bool more = (word_idx + 1) < constant_words; 415bf215546Sopenharmony_ci 416bf215546Sopenharmony_ci /* Indexed first by tuple count and second by constant word number, 417bf215546Sopenharmony_ci * indicates the position in the clause */ 418bf215546Sopenharmony_ci unsigned pos_lookup[8][3] = { 419bf215546Sopenharmony_ci { 0 }, 420bf215546Sopenharmony_ci { 1 }, 421bf215546Sopenharmony_ci { 3 }, 422bf215546Sopenharmony_ci { 2, 5 }, 423bf215546Sopenharmony_ci { 4, 8 }, 424bf215546Sopenharmony_ci { 7, 11, 14 }, 425bf215546Sopenharmony_ci { 6, 10, 13 }, 426bf215546Sopenharmony_ci { 9, 12 } 427bf215546Sopenharmony_ci }; 428bf215546Sopenharmony_ci 429bf215546Sopenharmony_ci /* Compute the pos, and check everything is reasonable */ 430bf215546Sopenharmony_ci assert((tuple_count - 1) < 8); 431bf215546Sopenharmony_ci assert(word_idx < 3); 432bf215546Sopenharmony_ci unsigned pos = pos_lookup[tuple_count - 1][word_idx]; 433bf215546Sopenharmony_ci assert(pos != 0 || (tuple_count == 1 && word_idx == 0)); 434bf215546Sopenharmony_ci 435bf215546Sopenharmony_ci struct bifrost_fmt_constant quad = { 436bf215546Sopenharmony_ci .pos = pos, 437bf215546Sopenharmony_ci .tag = more ? BIFROST_FMTC_CONSTANTS : BIFROST_FMTC_FINAL, 438bf215546Sopenharmony_ci .imm_1 = constants[index + 0] >> 4, 439bf215546Sopenharmony_ci .imm_2 = constants[index + 1] >> 4, 440bf215546Sopenharmony_ci }; 441bf215546Sopenharmony_ci 442bf215546Sopenharmony_ci util_dynarray_append(emission, struct bifrost_fmt_constant, quad); 443bf215546Sopenharmony_ci} 444bf215546Sopenharmony_ci 445bf215546Sopenharmony_ciuint8_t 446bf215546Sopenharmony_cibi_pack_literal(enum bi_clause_subword literal) 447bf215546Sopenharmony_ci{ 448bf215546Sopenharmony_ci assert(literal >= BI_CLAUSE_SUBWORD_LITERAL_0); 449bf215546Sopenharmony_ci assert(literal <= BI_CLAUSE_SUBWORD_LITERAL_7); 450bf215546Sopenharmony_ci 451bf215546Sopenharmony_ci return (literal - BI_CLAUSE_SUBWORD_LITERAL_0); 452bf215546Sopenharmony_ci} 453bf215546Sopenharmony_ci 454bf215546Sopenharmony_cistatic inline uint8_t 455bf215546Sopenharmony_cibi_clause_upper(unsigned val, 456bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 457bf215546Sopenharmony_ci ASSERTED unsigned tuple_count) 458bf215546Sopenharmony_ci{ 459bf215546Sopenharmony_ci assert(val < tuple_count); 460bf215546Sopenharmony_ci 461bf215546Sopenharmony_ci /* top 3-bits of 78-bits is tuple >> 75 == (tuple >> 64) >> 11 */ 462bf215546Sopenharmony_ci struct bi_packed_tuple tuple = tuples[val]; 463bf215546Sopenharmony_ci return (tuple.hi >> 11); 464bf215546Sopenharmony_ci} 465bf215546Sopenharmony_ci 466bf215546Sopenharmony_ciuint8_t 467bf215546Sopenharmony_cibi_pack_upper(enum bi_clause_subword upper, 468bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 469bf215546Sopenharmony_ci ASSERTED unsigned tuple_count) 470bf215546Sopenharmony_ci{ 471bf215546Sopenharmony_ci assert(upper >= BI_CLAUSE_SUBWORD_UPPER_0); 472bf215546Sopenharmony_ci assert(upper <= BI_CLAUSE_SUBWORD_UPPER_7); 473bf215546Sopenharmony_ci 474bf215546Sopenharmony_ci return bi_clause_upper(upper - BI_CLAUSE_SUBWORD_UPPER_0, tuples, 475bf215546Sopenharmony_ci tuple_count); 476bf215546Sopenharmony_ci} 477bf215546Sopenharmony_ci 478bf215546Sopenharmony_ciuint64_t 479bf215546Sopenharmony_cibi_pack_tuple_bits(enum bi_clause_subword idx, 480bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 481bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 482bf215546Sopenharmony_ci unsigned offset, unsigned nbits) 483bf215546Sopenharmony_ci{ 484bf215546Sopenharmony_ci assert(idx >= BI_CLAUSE_SUBWORD_TUPLE_0); 485bf215546Sopenharmony_ci assert(idx <= BI_CLAUSE_SUBWORD_TUPLE_7); 486bf215546Sopenharmony_ci 487bf215546Sopenharmony_ci unsigned val = (idx - BI_CLAUSE_SUBWORD_TUPLE_0); 488bf215546Sopenharmony_ci assert(val < tuple_count); 489bf215546Sopenharmony_ci 490bf215546Sopenharmony_ci struct bi_packed_tuple tuple = tuples[val]; 491bf215546Sopenharmony_ci 492bf215546Sopenharmony_ci assert(offset + nbits < 78); 493bf215546Sopenharmony_ci assert(nbits <= 64); 494bf215546Sopenharmony_ci 495bf215546Sopenharmony_ci /* (X >> start) & m 496bf215546Sopenharmony_ci * = (((hi << 64) | lo) >> start) & m 497bf215546Sopenharmony_ci * = (((hi << 64) >> start) | (lo >> start)) & m 498bf215546Sopenharmony_ci * = { ((hi << (64 - start)) | (lo >> start)) & m if start <= 64 499bf215546Sopenharmony_ci * { ((hi >> (start - 64)) | (lo >> start)) & m if start >= 64 500bf215546Sopenharmony_ci * = { ((hi << (64 - start)) & m) | ((lo >> start) & m) if start <= 64 501bf215546Sopenharmony_ci * { ((hi >> (start - 64)) & m) | ((lo >> start) & m) if start >= 64 502bf215546Sopenharmony_ci * 503bf215546Sopenharmony_ci * By setting m = 2^64 - 1, we justify doing the respective shifts as 504bf215546Sopenharmony_ci * 64-bit integers. Zero special cased to avoid undefined behaviour. 505bf215546Sopenharmony_ci */ 506bf215546Sopenharmony_ci 507bf215546Sopenharmony_ci uint64_t lo = (tuple.lo >> offset); 508bf215546Sopenharmony_ci uint64_t hi = (offset == 0) ? 0 509bf215546Sopenharmony_ci : (offset > 64) ? (tuple.hi >> (offset - 64)) 510bf215546Sopenharmony_ci : (tuple.hi << (64 - offset)); 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_ci return (lo | hi) & ((1ULL << nbits) - 1); 513bf215546Sopenharmony_ci} 514bf215546Sopenharmony_ci 515bf215546Sopenharmony_cistatic inline uint16_t 516bf215546Sopenharmony_cibi_pack_lu(enum bi_clause_subword word, 517bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 518bf215546Sopenharmony_ci ASSERTED unsigned tuple_count) 519bf215546Sopenharmony_ci{ 520bf215546Sopenharmony_ci return (word >= BI_CLAUSE_SUBWORD_UPPER_0) ? 521bf215546Sopenharmony_ci bi_pack_upper(word, tuples, tuple_count) : 522bf215546Sopenharmony_ci bi_pack_literal(word); 523bf215546Sopenharmony_ci} 524bf215546Sopenharmony_ci 525bf215546Sopenharmony_ciuint8_t 526bf215546Sopenharmony_cibi_pack_sync(enum bi_clause_subword t1, 527bf215546Sopenharmony_ci enum bi_clause_subword t2, 528bf215546Sopenharmony_ci enum bi_clause_subword t3, 529bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 530bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 531bf215546Sopenharmony_ci bool z) 532bf215546Sopenharmony_ci{ 533bf215546Sopenharmony_ci uint8_t sync = 534bf215546Sopenharmony_ci (bi_pack_lu(t3, tuples, tuple_count) << 0) | 535bf215546Sopenharmony_ci (bi_pack_lu(t2, tuples, tuple_count) << 3); 536bf215546Sopenharmony_ci 537bf215546Sopenharmony_ci if (t1 == BI_CLAUSE_SUBWORD_Z) 538bf215546Sopenharmony_ci sync |= z << 6; 539bf215546Sopenharmony_ci else 540bf215546Sopenharmony_ci sync |= bi_pack_literal(t1) << 6; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci return sync; 543bf215546Sopenharmony_ci} 544bf215546Sopenharmony_ci 545bf215546Sopenharmony_cistatic inline uint64_t 546bf215546Sopenharmony_cibi_pack_t_ec(enum bi_clause_subword word, 547bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 548bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 549bf215546Sopenharmony_ci uint64_t ec0) 550bf215546Sopenharmony_ci{ 551bf215546Sopenharmony_ci if (word == BI_CLAUSE_SUBWORD_CONSTANT) 552bf215546Sopenharmony_ci return ec0; 553bf215546Sopenharmony_ci else 554bf215546Sopenharmony_ci return bi_pack_tuple_bits(word, tuples, tuple_count, 0, 60); 555bf215546Sopenharmony_ci} 556bf215546Sopenharmony_ci 557bf215546Sopenharmony_cistatic uint32_t 558bf215546Sopenharmony_cibi_pack_subwords_56(enum bi_clause_subword t, 559bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 560bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 561bf215546Sopenharmony_ci uint64_t header, uint64_t ec0, 562bf215546Sopenharmony_ci unsigned tuple_subword) 563bf215546Sopenharmony_ci{ 564bf215546Sopenharmony_ci switch (t) { 565bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_HEADER: 566bf215546Sopenharmony_ci return (header & ((1 << 30) - 1)); 567bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_RESERVED: 568bf215546Sopenharmony_ci return 0; 569bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_CONSTANT: 570bf215546Sopenharmony_ci return (ec0 >> 15) & ((1 << 30) - 1); 571bf215546Sopenharmony_ci default: 572bf215546Sopenharmony_ci return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 30); 573bf215546Sopenharmony_ci } 574bf215546Sopenharmony_ci} 575bf215546Sopenharmony_ci 576bf215546Sopenharmony_cistatic uint16_t 577bf215546Sopenharmony_cibi_pack_subword(enum bi_clause_subword t, unsigned format, 578bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 579bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 580bf215546Sopenharmony_ci uint64_t header, uint64_t ec0, unsigned m0, 581bf215546Sopenharmony_ci unsigned tuple_subword) 582bf215546Sopenharmony_ci{ 583bf215546Sopenharmony_ci switch (t) { 584bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_HEADER: 585bf215546Sopenharmony_ci return header >> 30; 586bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_M: 587bf215546Sopenharmony_ci return m0; 588bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_CONSTANT: 589bf215546Sopenharmony_ci return (format == 5 || format == 10) ? 590bf215546Sopenharmony_ci (ec0 & ((1 << 15) - 1)) : 591bf215546Sopenharmony_ci (ec0 >> (15 + 30)); 592bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_UPPER_23: 593bf215546Sopenharmony_ci return (bi_clause_upper(2, tuples, tuple_count) << 12) | 594bf215546Sopenharmony_ci (bi_clause_upper(3, tuples, tuple_count) << 9); 595bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_UPPER_56: 596bf215546Sopenharmony_ci return (bi_clause_upper(5, tuples, tuple_count) << 12) | 597bf215546Sopenharmony_ci (bi_clause_upper(6, tuples, tuple_count) << 9); 598bf215546Sopenharmony_ci case BI_CLAUSE_SUBWORD_UPPER_0 ... BI_CLAUSE_SUBWORD_UPPER_7: 599bf215546Sopenharmony_ci return bi_pack_upper(t, tuples, tuple_count) << 12; 600bf215546Sopenharmony_ci default: 601bf215546Sopenharmony_ci return bi_pack_tuple_bits(t, tuples, tuple_count, tuple_subword * 15, 15); 602bf215546Sopenharmony_ci } 603bf215546Sopenharmony_ci} 604bf215546Sopenharmony_ci 605bf215546Sopenharmony_ci/* EC0 is 60-bits (bottom 4 already shifted off) */ 606bf215546Sopenharmony_civoid 607bf215546Sopenharmony_cibi_pack_format(struct util_dynarray *emission, 608bf215546Sopenharmony_ci unsigned index, 609bf215546Sopenharmony_ci struct bi_packed_tuple *tuples, 610bf215546Sopenharmony_ci ASSERTED unsigned tuple_count, 611bf215546Sopenharmony_ci uint64_t header, uint64_t ec0, 612bf215546Sopenharmony_ci unsigned m0, bool z) 613bf215546Sopenharmony_ci{ 614bf215546Sopenharmony_ci struct bi_clause_format format = bi_clause_formats[index]; 615bf215546Sopenharmony_ci 616bf215546Sopenharmony_ci uint8_t sync = bi_pack_sync(format.tag_1, format.tag_2, format.tag_3, 617bf215546Sopenharmony_ci tuples, tuple_count, z); 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_ci uint64_t s0_s3 = bi_pack_t_ec(format.s0_s3, tuples, tuple_count, ec0); 620bf215546Sopenharmony_ci 621bf215546Sopenharmony_ci uint16_t s4 = bi_pack_subword(format.s4, format.format, tuples, tuple_count, header, ec0, m0, 4); 622bf215546Sopenharmony_ci 623bf215546Sopenharmony_ci uint32_t s5_s6 = bi_pack_subwords_56(format.s5_s6, 624bf215546Sopenharmony_ci tuples, tuple_count, header, ec0, 625bf215546Sopenharmony_ci (format.format == 2 || format.format == 7) ? 0 : 3); 626bf215546Sopenharmony_ci 627bf215546Sopenharmony_ci uint64_t s7 = bi_pack_subword(format.s7, format.format, tuples, tuple_count, header, ec0, m0, 2); 628bf215546Sopenharmony_ci 629bf215546Sopenharmony_ci /* Now that subwords are packed, split into 64-bit halves and emit */ 630bf215546Sopenharmony_ci uint64_t lo = sync | ((s0_s3 & ((1ull << 56) - 1)) << 8); 631bf215546Sopenharmony_ci uint64_t hi = (s0_s3 >> 56) | ((uint64_t) s4 << 4) | ((uint64_t) s5_s6 << 19) | ((uint64_t) s7 << 49); 632bf215546Sopenharmony_ci 633bf215546Sopenharmony_ci util_dynarray_append(emission, uint64_t, lo); 634bf215546Sopenharmony_ci util_dynarray_append(emission, uint64_t, hi); 635bf215546Sopenharmony_ci} 636bf215546Sopenharmony_ci 637bf215546Sopenharmony_cistatic void 638bf215546Sopenharmony_cibi_pack_clause(bi_context *ctx, bi_clause *clause, 639bf215546Sopenharmony_ci bi_clause *next_1, bi_clause *next_2, 640bf215546Sopenharmony_ci struct util_dynarray *emission, gl_shader_stage stage) 641bf215546Sopenharmony_ci{ 642bf215546Sopenharmony_ci struct bi_packed_tuple ins[8] = { 0 }; 643bf215546Sopenharmony_ci 644bf215546Sopenharmony_ci for (unsigned i = 0; i < clause->tuple_count; ++i) { 645bf215546Sopenharmony_ci unsigned prev = ((i == 0) ? clause->tuple_count : i) - 1; 646bf215546Sopenharmony_ci ins[i] = bi_pack_tuple(clause, &clause->tuples[i], 647bf215546Sopenharmony_ci &clause->tuples[prev], i == 0, stage); 648bf215546Sopenharmony_ci 649bf215546Sopenharmony_ci bi_instr *add = clause->tuples[i].add; 650bf215546Sopenharmony_ci 651bf215546Sopenharmony_ci /* Different GPUs support different forms of the CLPER.i32 652bf215546Sopenharmony_ci * instruction. Check we use the right one for the target. 653bf215546Sopenharmony_ci */ 654bf215546Sopenharmony_ci if (add && add->op == BI_OPCODE_CLPER_OLD_I32) 655bf215546Sopenharmony_ci assert(ctx->quirks & BIFROST_LIMITED_CLPER); 656bf215546Sopenharmony_ci else if (add && add->op == BI_OPCODE_CLPER_I32) 657bf215546Sopenharmony_ci assert(!(ctx->quirks & BIFROST_LIMITED_CLPER)); 658bf215546Sopenharmony_ci } 659bf215546Sopenharmony_ci 660bf215546Sopenharmony_ci bool ec0_packed = bi_ec0_packed(clause->tuple_count); 661bf215546Sopenharmony_ci 662bf215546Sopenharmony_ci if (ec0_packed) 663bf215546Sopenharmony_ci clause->constant_count = MAX2(clause->constant_count, 1); 664bf215546Sopenharmony_ci 665bf215546Sopenharmony_ci unsigned constant_quads = 666bf215546Sopenharmony_ci DIV_ROUND_UP(clause->constant_count - (ec0_packed ? 1 : 0), 2); 667bf215546Sopenharmony_ci 668bf215546Sopenharmony_ci uint64_t header = bi_pack_header(clause, next_1, next_2); 669bf215546Sopenharmony_ci uint64_t ec0 = (clause->constants[0] >> 4); 670bf215546Sopenharmony_ci unsigned m0 = (clause->pcrel_idx == 0) ? 4 : 0; 671bf215546Sopenharmony_ci 672bf215546Sopenharmony_ci unsigned counts[8] = { 673bf215546Sopenharmony_ci 1, 2, 3, 3, 4, 5, 5, 6 674bf215546Sopenharmony_ci }; 675bf215546Sopenharmony_ci 676bf215546Sopenharmony_ci unsigned indices[8][6] = { 677bf215546Sopenharmony_ci { 1 }, 678bf215546Sopenharmony_ci { 0, 2 }, 679bf215546Sopenharmony_ci { 0, 3, 4 }, 680bf215546Sopenharmony_ci { 0, 3, 6 }, 681bf215546Sopenharmony_ci { 0, 3, 7, 8 }, 682bf215546Sopenharmony_ci { 0, 3, 5, 9, 10 }, 683bf215546Sopenharmony_ci { 0, 3, 5, 9, 11 }, 684bf215546Sopenharmony_ci { 0, 3, 5, 9, 12, 13 }, 685bf215546Sopenharmony_ci }; 686bf215546Sopenharmony_ci 687bf215546Sopenharmony_ci unsigned count = counts[clause->tuple_count - 1]; 688bf215546Sopenharmony_ci 689bf215546Sopenharmony_ci for (unsigned pos = 0; pos < count; ++pos) { 690bf215546Sopenharmony_ci ASSERTED unsigned idx = indices[clause->tuple_count - 1][pos]; 691bf215546Sopenharmony_ci assert(bi_clause_formats[idx].pos == pos); 692bf215546Sopenharmony_ci assert((bi_clause_formats[idx].tag_1 == BI_CLAUSE_SUBWORD_Z) == 693bf215546Sopenharmony_ci (pos == count - 1)); 694bf215546Sopenharmony_ci 695bf215546Sopenharmony_ci /* Whether to end the clause immediately after the last tuple */ 696bf215546Sopenharmony_ci bool z = (constant_quads == 0); 697bf215546Sopenharmony_ci 698bf215546Sopenharmony_ci bi_pack_format(emission, indices[clause->tuple_count - 1][pos], 699bf215546Sopenharmony_ci ins, clause->tuple_count, header, ec0, m0, 700bf215546Sopenharmony_ci z); 701bf215546Sopenharmony_ci } 702bf215546Sopenharmony_ci 703bf215546Sopenharmony_ci /* Pack the remaining constants */ 704bf215546Sopenharmony_ci 705bf215546Sopenharmony_ci for (unsigned pos = 0; pos < constant_quads; ++pos) { 706bf215546Sopenharmony_ci bi_pack_constants(clause->tuple_count, clause->constants, 707bf215546Sopenharmony_ci pos, constant_quads, ec0_packed, emission); 708bf215546Sopenharmony_ci } 709bf215546Sopenharmony_ci} 710bf215546Sopenharmony_ci 711bf215546Sopenharmony_cistatic void 712bf215546Sopenharmony_cibi_collect_blend_ret_addr(bi_context *ctx, struct util_dynarray *emission, 713bf215546Sopenharmony_ci const bi_clause *clause) 714bf215546Sopenharmony_ci{ 715bf215546Sopenharmony_ci /* No need to collect return addresses when we're in a blend shader. */ 716bf215546Sopenharmony_ci if (ctx->inputs->is_blend) 717bf215546Sopenharmony_ci return; 718bf215546Sopenharmony_ci 719bf215546Sopenharmony_ci const bi_tuple *tuple = &clause->tuples[clause->tuple_count - 1]; 720bf215546Sopenharmony_ci const bi_instr *ins = tuple->add; 721bf215546Sopenharmony_ci 722bf215546Sopenharmony_ci if (!ins || ins->op != BI_OPCODE_BLEND) 723bf215546Sopenharmony_ci return; 724bf215546Sopenharmony_ci 725bf215546Sopenharmony_ci 726bf215546Sopenharmony_ci unsigned loc = tuple->regs.fau_idx - BIR_FAU_BLEND_0; 727bf215546Sopenharmony_ci assert(loc < ARRAY_SIZE(ctx->info.bifrost->blend)); 728bf215546Sopenharmony_ci assert(!ctx->info.bifrost->blend[loc].return_offset); 729bf215546Sopenharmony_ci ctx->info.bifrost->blend[loc].return_offset = 730bf215546Sopenharmony_ci util_dynarray_num_elements(emission, uint8_t); 731bf215546Sopenharmony_ci assert(!(ctx->info.bifrost->blend[loc].return_offset & 0x7)); 732bf215546Sopenharmony_ci} 733bf215546Sopenharmony_ci 734bf215546Sopenharmony_ciunsigned 735bf215546Sopenharmony_cibi_pack(bi_context *ctx, struct util_dynarray *emission) 736bf215546Sopenharmony_ci{ 737bf215546Sopenharmony_ci unsigned previous_size = emission->size; 738bf215546Sopenharmony_ci 739bf215546Sopenharmony_ci bi_foreach_block(ctx, block) { 740bf215546Sopenharmony_ci bi_assign_branch_offset(ctx, block); 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci bi_foreach_clause_in_block(block, clause) { 743bf215546Sopenharmony_ci bool is_last = (clause->link.next == &block->clauses); 744bf215546Sopenharmony_ci 745bf215546Sopenharmony_ci /* Get the succeeding clauses, either two successors of 746bf215546Sopenharmony_ci * the block for the last clause in the block or just 747bf215546Sopenharmony_ci * the next clause within the block */ 748bf215546Sopenharmony_ci 749bf215546Sopenharmony_ci bi_clause *next = NULL, *next_2 = NULL; 750bf215546Sopenharmony_ci 751bf215546Sopenharmony_ci if (is_last) { 752bf215546Sopenharmony_ci next = bi_next_clause(ctx, block->successors[0], NULL); 753bf215546Sopenharmony_ci next_2 = bi_next_clause(ctx, block->successors[1], NULL); 754bf215546Sopenharmony_ci } else { 755bf215546Sopenharmony_ci next = bi_next_clause(ctx, block, clause); 756bf215546Sopenharmony_ci } 757bf215546Sopenharmony_ci 758bf215546Sopenharmony_ci 759bf215546Sopenharmony_ci previous_size = emission->size; 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci bi_pack_clause(ctx, clause, next, next_2, emission, ctx->stage); 762bf215546Sopenharmony_ci 763bf215546Sopenharmony_ci if (!is_last) 764bf215546Sopenharmony_ci bi_collect_blend_ret_addr(ctx, emission, clause); 765bf215546Sopenharmony_ci } 766bf215546Sopenharmony_ci } 767bf215546Sopenharmony_ci 768bf215546Sopenharmony_ci return emission->size - previous_size; 769bf215546Sopenharmony_ci} 770