1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_ir.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include "common/sid.h" 28bf215546Sopenharmony_ci 29bf215546Sopenharmony_ci#include <map> 30bf215546Sopenharmony_ci#include <stack> 31bf215546Sopenharmony_ci#include <vector> 32bf215546Sopenharmony_ci 33bf215546Sopenharmony_cinamespace aco { 34bf215546Sopenharmony_ci 35bf215546Sopenharmony_cinamespace { 36bf215546Sopenharmony_ci 37bf215546Sopenharmony_ci/** 38bf215546Sopenharmony_ci * The general idea of this pass is: 39bf215546Sopenharmony_ci * The CFG is traversed in reverse postorder (forward) and loops are processed 40bf215546Sopenharmony_ci * several times until no progress is made. 41bf215546Sopenharmony_ci * Per BB two wait_ctx is maintained: an in-context and out-context. 42bf215546Sopenharmony_ci * The in-context is the joined out-contexts of the predecessors. 43bf215546Sopenharmony_ci * The context contains a map: gpr -> wait_entry 44bf215546Sopenharmony_ci * consisting of the information about the cnt values to be waited for. 45bf215546Sopenharmony_ci * Note: After merge-nodes, it might occur that for the same register 46bf215546Sopenharmony_ci * multiple cnt values are to be waited for. 47bf215546Sopenharmony_ci * 48bf215546Sopenharmony_ci * The values are updated according to the encountered instructions: 49bf215546Sopenharmony_ci * - additional events increment the counter of waits of the same type 50bf215546Sopenharmony_ci * - or erase gprs with counters higher than to be waited for. 51bf215546Sopenharmony_ci */ 52bf215546Sopenharmony_ci 53bf215546Sopenharmony_ci// TODO: do a more clever insertion of wait_cnt (lgkm_cnt) 54bf215546Sopenharmony_ci// when there is a load followed by a use of a previous load 55bf215546Sopenharmony_ci 56bf215546Sopenharmony_ci/* Instructions of the same event will finish in-order except for smem 57bf215546Sopenharmony_ci * and maybe flat. Instructions of different events may not finish in-order. */ 58bf215546Sopenharmony_cienum wait_event : uint16_t { 59bf215546Sopenharmony_ci event_smem = 1 << 0, 60bf215546Sopenharmony_ci event_lds = 1 << 1, 61bf215546Sopenharmony_ci event_gds = 1 << 2, 62bf215546Sopenharmony_ci event_vmem = 1 << 3, 63bf215546Sopenharmony_ci event_vmem_store = 1 << 4, /* GFX10+ */ 64bf215546Sopenharmony_ci event_flat = 1 << 5, 65bf215546Sopenharmony_ci event_exp_pos = 1 << 6, 66bf215546Sopenharmony_ci event_exp_param = 1 << 7, 67bf215546Sopenharmony_ci event_exp_mrt_null = 1 << 8, 68bf215546Sopenharmony_ci event_gds_gpr_lock = 1 << 9, 69bf215546Sopenharmony_ci event_vmem_gpr_lock = 1 << 10, 70bf215546Sopenharmony_ci event_sendmsg = 1 << 11, 71bf215546Sopenharmony_ci num_events = 12, 72bf215546Sopenharmony_ci}; 73bf215546Sopenharmony_ci 74bf215546Sopenharmony_cienum counter_type : uint8_t { 75bf215546Sopenharmony_ci counter_exp = 1 << 0, 76bf215546Sopenharmony_ci counter_lgkm = 1 << 1, 77bf215546Sopenharmony_ci counter_vm = 1 << 2, 78bf215546Sopenharmony_ci counter_vs = 1 << 3, 79bf215546Sopenharmony_ci num_counters = 4, 80bf215546Sopenharmony_ci}; 81bf215546Sopenharmony_ci 82bf215546Sopenharmony_cienum vmem_type : uint8_t { 83bf215546Sopenharmony_ci vmem_nosampler = 1 << 0, 84bf215546Sopenharmony_ci vmem_sampler = 1 << 1, 85bf215546Sopenharmony_ci vmem_bvh = 1 << 2, 86bf215546Sopenharmony_ci}; 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic const uint16_t exp_events = 89bf215546Sopenharmony_ci event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | event_vmem_gpr_lock; 90bf215546Sopenharmony_cistatic const uint16_t lgkm_events = event_smem | event_lds | event_gds | event_flat | event_sendmsg; 91bf215546Sopenharmony_cistatic const uint16_t vm_events = event_vmem | event_flat; 92bf215546Sopenharmony_cistatic const uint16_t vs_events = event_vmem_store; 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_ciuint8_t 95bf215546Sopenharmony_ciget_counters_for_event(wait_event ev) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci switch (ev) { 98bf215546Sopenharmony_ci case event_smem: 99bf215546Sopenharmony_ci case event_lds: 100bf215546Sopenharmony_ci case event_gds: 101bf215546Sopenharmony_ci case event_sendmsg: return counter_lgkm; 102bf215546Sopenharmony_ci case event_vmem: return counter_vm; 103bf215546Sopenharmony_ci case event_vmem_store: return counter_vs; 104bf215546Sopenharmony_ci case event_flat: return counter_vm | counter_lgkm; 105bf215546Sopenharmony_ci case event_exp_pos: 106bf215546Sopenharmony_ci case event_exp_param: 107bf215546Sopenharmony_ci case event_exp_mrt_null: 108bf215546Sopenharmony_ci case event_gds_gpr_lock: 109bf215546Sopenharmony_ci case event_vmem_gpr_lock: return counter_exp; 110bf215546Sopenharmony_ci default: return 0; 111bf215546Sopenharmony_ci } 112bf215546Sopenharmony_ci} 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_cistruct wait_entry { 115bf215546Sopenharmony_ci wait_imm imm; 116bf215546Sopenharmony_ci uint16_t events; /* use wait_event notion */ 117bf215546Sopenharmony_ci uint8_t counters; /* use counter_type notion */ 118bf215546Sopenharmony_ci bool wait_on_read : 1; 119bf215546Sopenharmony_ci bool logical : 1; 120bf215546Sopenharmony_ci uint8_t vmem_types : 4; 121bf215546Sopenharmony_ci 122bf215546Sopenharmony_ci wait_entry(wait_event event_, wait_imm imm_, bool logical_, bool wait_on_read_) 123bf215546Sopenharmony_ci : imm(imm_), events(event_), counters(get_counters_for_event(event_)), 124bf215546Sopenharmony_ci wait_on_read(wait_on_read_), logical(logical_), vmem_types(0) 125bf215546Sopenharmony_ci {} 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_ci bool join(const wait_entry& other) 128bf215546Sopenharmony_ci { 129bf215546Sopenharmony_ci bool changed = (other.events & ~events) || (other.counters & ~counters) || 130bf215546Sopenharmony_ci (other.wait_on_read && !wait_on_read) || (other.vmem_types & !vmem_types); 131bf215546Sopenharmony_ci events |= other.events; 132bf215546Sopenharmony_ci counters |= other.counters; 133bf215546Sopenharmony_ci changed |= imm.combine(other.imm); 134bf215546Sopenharmony_ci wait_on_read |= other.wait_on_read; 135bf215546Sopenharmony_ci vmem_types |= other.vmem_types; 136bf215546Sopenharmony_ci assert(logical == other.logical); 137bf215546Sopenharmony_ci return changed; 138bf215546Sopenharmony_ci } 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_ci void remove_counter(counter_type counter) 141bf215546Sopenharmony_ci { 142bf215546Sopenharmony_ci counters &= ~counter; 143bf215546Sopenharmony_ci 144bf215546Sopenharmony_ci if (counter == counter_lgkm) { 145bf215546Sopenharmony_ci imm.lgkm = wait_imm::unset_counter; 146bf215546Sopenharmony_ci events &= ~(event_smem | event_lds | event_gds | event_sendmsg); 147bf215546Sopenharmony_ci } 148bf215546Sopenharmony_ci 149bf215546Sopenharmony_ci if (counter == counter_vm) { 150bf215546Sopenharmony_ci imm.vm = wait_imm::unset_counter; 151bf215546Sopenharmony_ci events &= ~event_vmem; 152bf215546Sopenharmony_ci vmem_types = 0; 153bf215546Sopenharmony_ci } 154bf215546Sopenharmony_ci 155bf215546Sopenharmony_ci if (counter == counter_exp) { 156bf215546Sopenharmony_ci imm.exp = wait_imm::unset_counter; 157bf215546Sopenharmony_ci events &= ~(event_exp_pos | event_exp_param | event_exp_mrt_null | event_gds_gpr_lock | 158bf215546Sopenharmony_ci event_vmem_gpr_lock); 159bf215546Sopenharmony_ci } 160bf215546Sopenharmony_ci 161bf215546Sopenharmony_ci if (counter == counter_vs) { 162bf215546Sopenharmony_ci imm.vs = wait_imm::unset_counter; 163bf215546Sopenharmony_ci events &= ~event_vmem_store; 164bf215546Sopenharmony_ci } 165bf215546Sopenharmony_ci 166bf215546Sopenharmony_ci if (!(counters & counter_lgkm) && !(counters & counter_vm)) 167bf215546Sopenharmony_ci events &= ~event_flat; 168bf215546Sopenharmony_ci } 169bf215546Sopenharmony_ci}; 170bf215546Sopenharmony_ci 171bf215546Sopenharmony_cistruct wait_ctx { 172bf215546Sopenharmony_ci Program* program; 173bf215546Sopenharmony_ci enum amd_gfx_level gfx_level; 174bf215546Sopenharmony_ci uint16_t max_vm_cnt; 175bf215546Sopenharmony_ci uint16_t max_exp_cnt; 176bf215546Sopenharmony_ci uint16_t max_lgkm_cnt; 177bf215546Sopenharmony_ci uint16_t max_vs_cnt; 178bf215546Sopenharmony_ci uint16_t unordered_events = event_smem | event_flat; 179bf215546Sopenharmony_ci 180bf215546Sopenharmony_ci uint8_t vm_cnt = 0; 181bf215546Sopenharmony_ci uint8_t exp_cnt = 0; 182bf215546Sopenharmony_ci uint8_t lgkm_cnt = 0; 183bf215546Sopenharmony_ci uint8_t vs_cnt = 0; 184bf215546Sopenharmony_ci bool pending_flat_lgkm = false; 185bf215546Sopenharmony_ci bool pending_flat_vm = false; 186bf215546Sopenharmony_ci bool pending_s_buffer_store = false; /* GFX10 workaround */ 187bf215546Sopenharmony_ci 188bf215546Sopenharmony_ci wait_imm barrier_imm[storage_count]; 189bf215546Sopenharmony_ci uint16_t barrier_events[storage_count] = {}; /* use wait_event notion */ 190bf215546Sopenharmony_ci 191bf215546Sopenharmony_ci std::map<PhysReg, wait_entry> gpr_map; 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci wait_ctx() {} 194bf215546Sopenharmony_ci wait_ctx(Program* program_) 195bf215546Sopenharmony_ci : program(program_), gfx_level(program_->gfx_level), 196bf215546Sopenharmony_ci max_vm_cnt(program_->gfx_level >= GFX9 ? 62 : 14), max_exp_cnt(6), 197bf215546Sopenharmony_ci max_lgkm_cnt(program_->gfx_level >= GFX10 ? 62 : 14), 198bf215546Sopenharmony_ci max_vs_cnt(program_->gfx_level >= GFX10 ? 62 : 0), 199bf215546Sopenharmony_ci unordered_events(event_smem | (program_->gfx_level < GFX10 ? event_flat : 0)) 200bf215546Sopenharmony_ci {} 201bf215546Sopenharmony_ci 202bf215546Sopenharmony_ci bool join(const wait_ctx* other, bool logical) 203bf215546Sopenharmony_ci { 204bf215546Sopenharmony_ci bool changed = other->exp_cnt > exp_cnt || other->vm_cnt > vm_cnt || 205bf215546Sopenharmony_ci other->lgkm_cnt > lgkm_cnt || other->vs_cnt > vs_cnt || 206bf215546Sopenharmony_ci (other->pending_flat_lgkm && !pending_flat_lgkm) || 207bf215546Sopenharmony_ci (other->pending_flat_vm && !pending_flat_vm); 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci exp_cnt = std::max(exp_cnt, other->exp_cnt); 210bf215546Sopenharmony_ci vm_cnt = std::max(vm_cnt, other->vm_cnt); 211bf215546Sopenharmony_ci lgkm_cnt = std::max(lgkm_cnt, other->lgkm_cnt); 212bf215546Sopenharmony_ci vs_cnt = std::max(vs_cnt, other->vs_cnt); 213bf215546Sopenharmony_ci pending_flat_lgkm |= other->pending_flat_lgkm; 214bf215546Sopenharmony_ci pending_flat_vm |= other->pending_flat_vm; 215bf215546Sopenharmony_ci pending_s_buffer_store |= other->pending_s_buffer_store; 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci for (const auto& entry : other->gpr_map) { 218bf215546Sopenharmony_ci if (entry.second.logical != logical) 219bf215546Sopenharmony_ci continue; 220bf215546Sopenharmony_ci 221bf215546Sopenharmony_ci using iterator = std::map<PhysReg, wait_entry>::iterator; 222bf215546Sopenharmony_ci const std::pair<iterator, bool> insert_pair = gpr_map.insert(entry); 223bf215546Sopenharmony_ci if (insert_pair.second) { 224bf215546Sopenharmony_ci changed = true; 225bf215546Sopenharmony_ci } else { 226bf215546Sopenharmony_ci changed |= insert_pair.first->second.join(entry.second); 227bf215546Sopenharmony_ci } 228bf215546Sopenharmony_ci } 229bf215546Sopenharmony_ci 230bf215546Sopenharmony_ci for (unsigned i = 0; i < storage_count; i++) { 231bf215546Sopenharmony_ci changed |= barrier_imm[i].combine(other->barrier_imm[i]); 232bf215546Sopenharmony_ci changed |= (other->barrier_events[i] & ~barrier_events[i]) != 0; 233bf215546Sopenharmony_ci barrier_events[i] |= other->barrier_events[i]; 234bf215546Sopenharmony_ci } 235bf215546Sopenharmony_ci 236bf215546Sopenharmony_ci return changed; 237bf215546Sopenharmony_ci } 238bf215546Sopenharmony_ci 239bf215546Sopenharmony_ci void wait_and_remove_from_entry(PhysReg reg, wait_entry& entry, counter_type counter) 240bf215546Sopenharmony_ci { 241bf215546Sopenharmony_ci entry.remove_counter(counter); 242bf215546Sopenharmony_ci } 243bf215546Sopenharmony_ci}; 244bf215546Sopenharmony_ci 245bf215546Sopenharmony_ciuint8_t 246bf215546Sopenharmony_ciget_vmem_type(Instruction* instr) 247bf215546Sopenharmony_ci{ 248bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::image_bvh64_intersect_ray) 249bf215546Sopenharmony_ci return vmem_bvh; 250bf215546Sopenharmony_ci else if (instr->isMIMG() && !instr->operands[1].isUndefined() && 251bf215546Sopenharmony_ci instr->operands[1].regClass() == s4) 252bf215546Sopenharmony_ci return vmem_sampler; 253bf215546Sopenharmony_ci else if (instr->isVMEM() || instr->isScratch() || instr->isGlobal()) 254bf215546Sopenharmony_ci return vmem_nosampler; 255bf215546Sopenharmony_ci return 0; 256bf215546Sopenharmony_ci} 257bf215546Sopenharmony_ci 258bf215546Sopenharmony_civoid 259bf215546Sopenharmony_cicheck_instr(wait_ctx& ctx, wait_imm& wait, Instruction* instr) 260bf215546Sopenharmony_ci{ 261bf215546Sopenharmony_ci for (const Operand op : instr->operands) { 262bf215546Sopenharmony_ci if (op.isConstant() || op.isUndefined()) 263bf215546Sopenharmony_ci continue; 264bf215546Sopenharmony_ci 265bf215546Sopenharmony_ci /* check consecutively read gprs */ 266bf215546Sopenharmony_ci for (unsigned j = 0; j < op.size(); j++) { 267bf215546Sopenharmony_ci PhysReg reg{op.physReg() + j}; 268bf215546Sopenharmony_ci std::map<PhysReg, wait_entry>::iterator it = ctx.gpr_map.find(reg); 269bf215546Sopenharmony_ci if (it == ctx.gpr_map.end() || !it->second.wait_on_read) 270bf215546Sopenharmony_ci continue; 271bf215546Sopenharmony_ci 272bf215546Sopenharmony_ci wait.combine(it->second.imm); 273bf215546Sopenharmony_ci } 274bf215546Sopenharmony_ci } 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci for (const Definition& def : instr->definitions) { 277bf215546Sopenharmony_ci /* check consecutively written gprs */ 278bf215546Sopenharmony_ci for (unsigned j = 0; j < def.getTemp().size(); j++) { 279bf215546Sopenharmony_ci PhysReg reg{def.physReg() + j}; 280bf215546Sopenharmony_ci 281bf215546Sopenharmony_ci std::map<PhysReg, wait_entry>::iterator it = ctx.gpr_map.find(reg); 282bf215546Sopenharmony_ci if (it == ctx.gpr_map.end()) 283bf215546Sopenharmony_ci continue; 284bf215546Sopenharmony_ci 285bf215546Sopenharmony_ci /* Vector Memory reads and writes return in the order they were issued */ 286bf215546Sopenharmony_ci uint8_t vmem_type = get_vmem_type(instr); 287bf215546Sopenharmony_ci if (vmem_type && ((it->second.events & vm_events) == event_vmem) && 288bf215546Sopenharmony_ci it->second.vmem_types == vmem_type) 289bf215546Sopenharmony_ci continue; 290bf215546Sopenharmony_ci 291bf215546Sopenharmony_ci /* LDS reads and writes return in the order they were issued. same for GDS */ 292bf215546Sopenharmony_ci if (instr->isDS() && 293bf215546Sopenharmony_ci (it->second.events & lgkm_events) == (instr->ds().gds ? event_gds : event_lds)) 294bf215546Sopenharmony_ci continue; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci wait.combine(it->second.imm); 297bf215546Sopenharmony_ci } 298bf215546Sopenharmony_ci } 299bf215546Sopenharmony_ci} 300bf215546Sopenharmony_ci 301bf215546Sopenharmony_cibool 302bf215546Sopenharmony_ciparse_wait_instr(wait_ctx& ctx, wait_imm& imm, Instruction* instr) 303bf215546Sopenharmony_ci{ 304bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::s_waitcnt_vscnt && 305bf215546Sopenharmony_ci instr->definitions[0].physReg() == sgpr_null) { 306bf215546Sopenharmony_ci imm.vs = std::min<uint8_t>(imm.vs, instr->sopk().imm); 307bf215546Sopenharmony_ci return true; 308bf215546Sopenharmony_ci } else if (instr->opcode == aco_opcode::s_waitcnt) { 309bf215546Sopenharmony_ci imm.combine(wait_imm(ctx.gfx_level, instr->sopp().imm)); 310bf215546Sopenharmony_ci return true; 311bf215546Sopenharmony_ci } 312bf215546Sopenharmony_ci return false; 313bf215546Sopenharmony_ci} 314bf215546Sopenharmony_ci 315bf215546Sopenharmony_civoid 316bf215546Sopenharmony_ciperform_barrier(wait_ctx& ctx, wait_imm& imm, memory_sync_info sync, unsigned semantics) 317bf215546Sopenharmony_ci{ 318bf215546Sopenharmony_ci sync_scope subgroup_scope = 319bf215546Sopenharmony_ci ctx.program->workgroup_size <= ctx.program->wave_size ? scope_workgroup : scope_subgroup; 320bf215546Sopenharmony_ci if ((sync.semantics & semantics) && sync.scope > subgroup_scope) { 321bf215546Sopenharmony_ci unsigned storage = sync.storage; 322bf215546Sopenharmony_ci while (storage) { 323bf215546Sopenharmony_ci unsigned idx = u_bit_scan(&storage); 324bf215546Sopenharmony_ci 325bf215546Sopenharmony_ci /* LDS is private to the workgroup */ 326bf215546Sopenharmony_ci sync_scope bar_scope_lds = MIN2(sync.scope, scope_workgroup); 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci uint16_t events = ctx.barrier_events[idx]; 329bf215546Sopenharmony_ci if (bar_scope_lds <= subgroup_scope) 330bf215546Sopenharmony_ci events &= ~event_lds; 331bf215546Sopenharmony_ci 332bf215546Sopenharmony_ci /* in non-WGP, the L1 (L0 on GFX10+) cache keeps all memory operations 333bf215546Sopenharmony_ci * in-order for the same workgroup */ 334bf215546Sopenharmony_ci if (!ctx.program->wgp_mode && sync.scope <= scope_workgroup) 335bf215546Sopenharmony_ci events &= ~(event_vmem | event_vmem_store | event_smem); 336bf215546Sopenharmony_ci 337bf215546Sopenharmony_ci if (events) 338bf215546Sopenharmony_ci imm.combine(ctx.barrier_imm[idx]); 339bf215546Sopenharmony_ci } 340bf215546Sopenharmony_ci } 341bf215546Sopenharmony_ci} 342bf215546Sopenharmony_ci 343bf215546Sopenharmony_civoid 344bf215546Sopenharmony_ciforce_waitcnt(wait_ctx& ctx, wait_imm& imm) 345bf215546Sopenharmony_ci{ 346bf215546Sopenharmony_ci if (ctx.vm_cnt) 347bf215546Sopenharmony_ci imm.vm = 0; 348bf215546Sopenharmony_ci if (ctx.exp_cnt) 349bf215546Sopenharmony_ci imm.exp = 0; 350bf215546Sopenharmony_ci if (ctx.lgkm_cnt) 351bf215546Sopenharmony_ci imm.lgkm = 0; 352bf215546Sopenharmony_ci 353bf215546Sopenharmony_ci if (ctx.gfx_level >= GFX10) { 354bf215546Sopenharmony_ci if (ctx.vs_cnt) 355bf215546Sopenharmony_ci imm.vs = 0; 356bf215546Sopenharmony_ci } 357bf215546Sopenharmony_ci} 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_civoid 360bf215546Sopenharmony_cikill(wait_imm& imm, Instruction* instr, wait_ctx& ctx, memory_sync_info sync_info) 361bf215546Sopenharmony_ci{ 362bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::s_setpc_b64 || (debug_flags & DEBUG_FORCE_WAITCNT)) { 363bf215546Sopenharmony_ci /* Force emitting waitcnt states right after the instruction if there is 364bf215546Sopenharmony_ci * something to wait for. This is also applied for s_setpc_b64 to ensure 365bf215546Sopenharmony_ci * waitcnt states are inserted before jumping to the PS epilog. 366bf215546Sopenharmony_ci */ 367bf215546Sopenharmony_ci force_waitcnt(ctx, imm); 368bf215546Sopenharmony_ci } 369bf215546Sopenharmony_ci 370bf215546Sopenharmony_ci if (ctx.exp_cnt || ctx.vm_cnt || ctx.lgkm_cnt) 371bf215546Sopenharmony_ci check_instr(ctx, imm, instr); 372bf215546Sopenharmony_ci 373bf215546Sopenharmony_ci /* It's required to wait for scalar stores before "writing back" data. 374bf215546Sopenharmony_ci * It shouldn't cost anything anyways since we're about to do s_endpgm. 375bf215546Sopenharmony_ci */ 376bf215546Sopenharmony_ci if (ctx.lgkm_cnt && instr->opcode == aco_opcode::s_dcache_wb) { 377bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX8); 378bf215546Sopenharmony_ci imm.lgkm = 0; 379bf215546Sopenharmony_ci } 380bf215546Sopenharmony_ci 381bf215546Sopenharmony_ci if (ctx.gfx_level >= GFX10 && instr->isSMEM()) { 382bf215546Sopenharmony_ci /* GFX10: A store followed by a load at the same address causes a problem because 383bf215546Sopenharmony_ci * the load doesn't load the correct values unless we wait for the store first. 384bf215546Sopenharmony_ci * This is NOT mitigated by an s_nop. 385bf215546Sopenharmony_ci * 386bf215546Sopenharmony_ci * TODO: Refine this when we have proper alias analysis. 387bf215546Sopenharmony_ci */ 388bf215546Sopenharmony_ci if (ctx.pending_s_buffer_store && !instr->smem().definitions.empty() && 389bf215546Sopenharmony_ci !instr->smem().sync.can_reorder()) { 390bf215546Sopenharmony_ci imm.lgkm = 0; 391bf215546Sopenharmony_ci } 392bf215546Sopenharmony_ci } 393bf215546Sopenharmony_ci 394bf215546Sopenharmony_ci if (ctx.program->early_rast && instr->opcode == aco_opcode::exp) { 395bf215546Sopenharmony_ci if (instr->exp().dest >= V_008DFC_SQ_EXP_POS && instr->exp().dest < V_008DFC_SQ_EXP_PRIM) { 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci /* With early_rast, the HW will start clipping and rasterization after the 1st DONE pos 398bf215546Sopenharmony_ci * export. Wait for all stores (and atomics) to complete, so PS can read them. 399bf215546Sopenharmony_ci * TODO: This only really applies to DONE pos exports. 400bf215546Sopenharmony_ci * Consider setting the DONE bit earlier. 401bf215546Sopenharmony_ci */ 402bf215546Sopenharmony_ci if (ctx.vs_cnt > 0) 403bf215546Sopenharmony_ci imm.vs = 0; 404bf215546Sopenharmony_ci if (ctx.vm_cnt > 0) 405bf215546Sopenharmony_ci imm.vm = 0; 406bf215546Sopenharmony_ci } 407bf215546Sopenharmony_ci } 408bf215546Sopenharmony_ci 409bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_barrier) 410bf215546Sopenharmony_ci perform_barrier(ctx, imm, instr->barrier().sync, semantic_acqrel); 411bf215546Sopenharmony_ci else 412bf215546Sopenharmony_ci perform_barrier(ctx, imm, sync_info, semantic_release); 413bf215546Sopenharmony_ci 414bf215546Sopenharmony_ci if (!imm.empty()) { 415bf215546Sopenharmony_ci if (ctx.pending_flat_vm && imm.vm != wait_imm::unset_counter) 416bf215546Sopenharmony_ci imm.vm = 0; 417bf215546Sopenharmony_ci if (ctx.pending_flat_lgkm && imm.lgkm != wait_imm::unset_counter) 418bf215546Sopenharmony_ci imm.lgkm = 0; 419bf215546Sopenharmony_ci 420bf215546Sopenharmony_ci /* reset counters */ 421bf215546Sopenharmony_ci ctx.exp_cnt = std::min(ctx.exp_cnt, imm.exp); 422bf215546Sopenharmony_ci ctx.vm_cnt = std::min(ctx.vm_cnt, imm.vm); 423bf215546Sopenharmony_ci ctx.lgkm_cnt = std::min(ctx.lgkm_cnt, imm.lgkm); 424bf215546Sopenharmony_ci ctx.vs_cnt = std::min(ctx.vs_cnt, imm.vs); 425bf215546Sopenharmony_ci 426bf215546Sopenharmony_ci /* update barrier wait imms */ 427bf215546Sopenharmony_ci for (unsigned i = 0; i < storage_count; i++) { 428bf215546Sopenharmony_ci wait_imm& bar = ctx.barrier_imm[i]; 429bf215546Sopenharmony_ci uint16_t& bar_ev = ctx.barrier_events[i]; 430bf215546Sopenharmony_ci if (bar.exp != wait_imm::unset_counter && imm.exp <= bar.exp) { 431bf215546Sopenharmony_ci bar.exp = wait_imm::unset_counter; 432bf215546Sopenharmony_ci bar_ev &= ~exp_events; 433bf215546Sopenharmony_ci } 434bf215546Sopenharmony_ci if (bar.vm != wait_imm::unset_counter && imm.vm <= bar.vm) { 435bf215546Sopenharmony_ci bar.vm = wait_imm::unset_counter; 436bf215546Sopenharmony_ci bar_ev &= ~(vm_events & ~event_flat); 437bf215546Sopenharmony_ci } 438bf215546Sopenharmony_ci if (bar.lgkm != wait_imm::unset_counter && imm.lgkm <= bar.lgkm) { 439bf215546Sopenharmony_ci bar.lgkm = wait_imm::unset_counter; 440bf215546Sopenharmony_ci bar_ev &= ~(lgkm_events & ~event_flat); 441bf215546Sopenharmony_ci } 442bf215546Sopenharmony_ci if (bar.vs != wait_imm::unset_counter && imm.vs <= bar.vs) { 443bf215546Sopenharmony_ci bar.vs = wait_imm::unset_counter; 444bf215546Sopenharmony_ci bar_ev &= ~vs_events; 445bf215546Sopenharmony_ci } 446bf215546Sopenharmony_ci if (bar.vm == wait_imm::unset_counter && bar.lgkm == wait_imm::unset_counter) 447bf215546Sopenharmony_ci bar_ev &= ~event_flat; 448bf215546Sopenharmony_ci } 449bf215546Sopenharmony_ci 450bf215546Sopenharmony_ci /* remove all gprs with higher counter from map */ 451bf215546Sopenharmony_ci std::map<PhysReg, wait_entry>::iterator it = ctx.gpr_map.begin(); 452bf215546Sopenharmony_ci while (it != ctx.gpr_map.end()) { 453bf215546Sopenharmony_ci if (imm.exp != wait_imm::unset_counter && imm.exp <= it->second.imm.exp) 454bf215546Sopenharmony_ci ctx.wait_and_remove_from_entry(it->first, it->second, counter_exp); 455bf215546Sopenharmony_ci if (imm.vm != wait_imm::unset_counter && imm.vm <= it->second.imm.vm) 456bf215546Sopenharmony_ci ctx.wait_and_remove_from_entry(it->first, it->second, counter_vm); 457bf215546Sopenharmony_ci if (imm.lgkm != wait_imm::unset_counter && imm.lgkm <= it->second.imm.lgkm) 458bf215546Sopenharmony_ci ctx.wait_and_remove_from_entry(it->first, it->second, counter_lgkm); 459bf215546Sopenharmony_ci if (imm.vs != wait_imm::unset_counter && imm.vs <= it->second.imm.vs) 460bf215546Sopenharmony_ci ctx.wait_and_remove_from_entry(it->first, it->second, counter_vs); 461bf215546Sopenharmony_ci if (!it->second.counters) 462bf215546Sopenharmony_ci it = ctx.gpr_map.erase(it); 463bf215546Sopenharmony_ci else 464bf215546Sopenharmony_ci it++; 465bf215546Sopenharmony_ci } 466bf215546Sopenharmony_ci } 467bf215546Sopenharmony_ci 468bf215546Sopenharmony_ci if (imm.vm == 0) 469bf215546Sopenharmony_ci ctx.pending_flat_vm = false; 470bf215546Sopenharmony_ci if (imm.lgkm == 0) { 471bf215546Sopenharmony_ci ctx.pending_flat_lgkm = false; 472bf215546Sopenharmony_ci ctx.pending_s_buffer_store = false; 473bf215546Sopenharmony_ci } 474bf215546Sopenharmony_ci} 475bf215546Sopenharmony_ci 476bf215546Sopenharmony_civoid 477bf215546Sopenharmony_ciupdate_barrier_counter(uint8_t* ctr, unsigned max) 478bf215546Sopenharmony_ci{ 479bf215546Sopenharmony_ci if (*ctr != wait_imm::unset_counter && *ctr < max) 480bf215546Sopenharmony_ci (*ctr)++; 481bf215546Sopenharmony_ci} 482bf215546Sopenharmony_ci 483bf215546Sopenharmony_civoid 484bf215546Sopenharmony_ciupdate_barrier_imm(wait_ctx& ctx, uint8_t counters, wait_event event, memory_sync_info sync) 485bf215546Sopenharmony_ci{ 486bf215546Sopenharmony_ci for (unsigned i = 0; i < storage_count; i++) { 487bf215546Sopenharmony_ci wait_imm& bar = ctx.barrier_imm[i]; 488bf215546Sopenharmony_ci uint16_t& bar_ev = ctx.barrier_events[i]; 489bf215546Sopenharmony_ci if (sync.storage & (1 << i) && !(sync.semantics & semantic_private)) { 490bf215546Sopenharmony_ci bar_ev |= event; 491bf215546Sopenharmony_ci if (counters & counter_lgkm) 492bf215546Sopenharmony_ci bar.lgkm = 0; 493bf215546Sopenharmony_ci if (counters & counter_vm) 494bf215546Sopenharmony_ci bar.vm = 0; 495bf215546Sopenharmony_ci if (counters & counter_exp) 496bf215546Sopenharmony_ci bar.exp = 0; 497bf215546Sopenharmony_ci if (counters & counter_vs) 498bf215546Sopenharmony_ci bar.vs = 0; 499bf215546Sopenharmony_ci } else if (!(bar_ev & ctx.unordered_events) && !(ctx.unordered_events & event)) { 500bf215546Sopenharmony_ci if (counters & counter_lgkm && (bar_ev & lgkm_events) == event) 501bf215546Sopenharmony_ci update_barrier_counter(&bar.lgkm, ctx.max_lgkm_cnt); 502bf215546Sopenharmony_ci if (counters & counter_vm && (bar_ev & vm_events) == event) 503bf215546Sopenharmony_ci update_barrier_counter(&bar.vm, ctx.max_vm_cnt); 504bf215546Sopenharmony_ci if (counters & counter_exp && (bar_ev & exp_events) == event) 505bf215546Sopenharmony_ci update_barrier_counter(&bar.exp, ctx.max_exp_cnt); 506bf215546Sopenharmony_ci if (counters & counter_vs && (bar_ev & vs_events) == event) 507bf215546Sopenharmony_ci update_barrier_counter(&bar.vs, ctx.max_vs_cnt); 508bf215546Sopenharmony_ci } 509bf215546Sopenharmony_ci } 510bf215546Sopenharmony_ci} 511bf215546Sopenharmony_ci 512bf215546Sopenharmony_civoid 513bf215546Sopenharmony_ciupdate_counters(wait_ctx& ctx, wait_event event, memory_sync_info sync = memory_sync_info()) 514bf215546Sopenharmony_ci{ 515bf215546Sopenharmony_ci uint8_t counters = get_counters_for_event(event); 516bf215546Sopenharmony_ci 517bf215546Sopenharmony_ci if (counters & counter_lgkm && ctx.lgkm_cnt <= ctx.max_lgkm_cnt) 518bf215546Sopenharmony_ci ctx.lgkm_cnt++; 519bf215546Sopenharmony_ci if (counters & counter_vm && ctx.vm_cnt <= ctx.max_vm_cnt) 520bf215546Sopenharmony_ci ctx.vm_cnt++; 521bf215546Sopenharmony_ci if (counters & counter_exp && ctx.exp_cnt <= ctx.max_exp_cnt) 522bf215546Sopenharmony_ci ctx.exp_cnt++; 523bf215546Sopenharmony_ci if (counters & counter_vs && ctx.vs_cnt <= ctx.max_vs_cnt) 524bf215546Sopenharmony_ci ctx.vs_cnt++; 525bf215546Sopenharmony_ci 526bf215546Sopenharmony_ci update_barrier_imm(ctx, counters, event, sync); 527bf215546Sopenharmony_ci 528bf215546Sopenharmony_ci if (ctx.unordered_events & event) 529bf215546Sopenharmony_ci return; 530bf215546Sopenharmony_ci 531bf215546Sopenharmony_ci if (ctx.pending_flat_lgkm) 532bf215546Sopenharmony_ci counters &= ~counter_lgkm; 533bf215546Sopenharmony_ci if (ctx.pending_flat_vm) 534bf215546Sopenharmony_ci counters &= ~counter_vm; 535bf215546Sopenharmony_ci 536bf215546Sopenharmony_ci for (std::pair<const PhysReg, wait_entry>& e : ctx.gpr_map) { 537bf215546Sopenharmony_ci wait_entry& entry = e.second; 538bf215546Sopenharmony_ci 539bf215546Sopenharmony_ci if (entry.events & ctx.unordered_events) 540bf215546Sopenharmony_ci continue; 541bf215546Sopenharmony_ci 542bf215546Sopenharmony_ci assert(entry.events); 543bf215546Sopenharmony_ci 544bf215546Sopenharmony_ci if ((counters & counter_exp) && (entry.events & exp_events) == event && 545bf215546Sopenharmony_ci entry.imm.exp < ctx.max_exp_cnt) 546bf215546Sopenharmony_ci entry.imm.exp++; 547bf215546Sopenharmony_ci if ((counters & counter_lgkm) && (entry.events & lgkm_events) == event && 548bf215546Sopenharmony_ci entry.imm.lgkm < ctx.max_lgkm_cnt) 549bf215546Sopenharmony_ci entry.imm.lgkm++; 550bf215546Sopenharmony_ci if ((counters & counter_vm) && (entry.events & vm_events) == event && 551bf215546Sopenharmony_ci entry.imm.vm < ctx.max_vm_cnt) 552bf215546Sopenharmony_ci entry.imm.vm++; 553bf215546Sopenharmony_ci if ((counters & counter_vs) && (entry.events & vs_events) == event && 554bf215546Sopenharmony_ci entry.imm.vs < ctx.max_vs_cnt) 555bf215546Sopenharmony_ci entry.imm.vs++; 556bf215546Sopenharmony_ci } 557bf215546Sopenharmony_ci} 558bf215546Sopenharmony_ci 559bf215546Sopenharmony_civoid 560bf215546Sopenharmony_ciupdate_counters_for_flat_load(wait_ctx& ctx, memory_sync_info sync = memory_sync_info()) 561bf215546Sopenharmony_ci{ 562bf215546Sopenharmony_ci assert(ctx.gfx_level < GFX10); 563bf215546Sopenharmony_ci 564bf215546Sopenharmony_ci if (ctx.lgkm_cnt <= ctx.max_lgkm_cnt) 565bf215546Sopenharmony_ci ctx.lgkm_cnt++; 566bf215546Sopenharmony_ci if (ctx.vm_cnt <= ctx.max_vm_cnt) 567bf215546Sopenharmony_ci ctx.vm_cnt++; 568bf215546Sopenharmony_ci 569bf215546Sopenharmony_ci update_barrier_imm(ctx, counter_vm | counter_lgkm, event_flat, sync); 570bf215546Sopenharmony_ci 571bf215546Sopenharmony_ci for (std::pair<PhysReg, wait_entry> e : ctx.gpr_map) { 572bf215546Sopenharmony_ci if (e.second.counters & counter_vm) 573bf215546Sopenharmony_ci e.second.imm.vm = 0; 574bf215546Sopenharmony_ci if (e.second.counters & counter_lgkm) 575bf215546Sopenharmony_ci e.second.imm.lgkm = 0; 576bf215546Sopenharmony_ci } 577bf215546Sopenharmony_ci ctx.pending_flat_lgkm = true; 578bf215546Sopenharmony_ci ctx.pending_flat_vm = true; 579bf215546Sopenharmony_ci} 580bf215546Sopenharmony_ci 581bf215546Sopenharmony_civoid 582bf215546Sopenharmony_ciinsert_wait_entry(wait_ctx& ctx, PhysReg reg, RegClass rc, wait_event event, bool wait_on_read, 583bf215546Sopenharmony_ci uint8_t vmem_types = 0) 584bf215546Sopenharmony_ci{ 585bf215546Sopenharmony_ci uint16_t counters = get_counters_for_event(event); 586bf215546Sopenharmony_ci wait_imm imm; 587bf215546Sopenharmony_ci if (counters & counter_lgkm) 588bf215546Sopenharmony_ci imm.lgkm = 0; 589bf215546Sopenharmony_ci if (counters & counter_vm) 590bf215546Sopenharmony_ci imm.vm = 0; 591bf215546Sopenharmony_ci if (counters & counter_exp) 592bf215546Sopenharmony_ci imm.exp = 0; 593bf215546Sopenharmony_ci if (counters & counter_vs) 594bf215546Sopenharmony_ci imm.vs = 0; 595bf215546Sopenharmony_ci 596bf215546Sopenharmony_ci wait_entry new_entry(event, imm, !rc.is_linear(), wait_on_read); 597bf215546Sopenharmony_ci new_entry.vmem_types |= vmem_types; 598bf215546Sopenharmony_ci 599bf215546Sopenharmony_ci for (unsigned i = 0; i < rc.size(); i++) { 600bf215546Sopenharmony_ci auto it = ctx.gpr_map.emplace(PhysReg{reg.reg() + i}, new_entry); 601bf215546Sopenharmony_ci if (!it.second) 602bf215546Sopenharmony_ci it.first->second.join(new_entry); 603bf215546Sopenharmony_ci } 604bf215546Sopenharmony_ci} 605bf215546Sopenharmony_ci 606bf215546Sopenharmony_civoid 607bf215546Sopenharmony_ciinsert_wait_entry(wait_ctx& ctx, Operand op, wait_event event, uint8_t vmem_types = 0) 608bf215546Sopenharmony_ci{ 609bf215546Sopenharmony_ci if (!op.isConstant() && !op.isUndefined()) 610bf215546Sopenharmony_ci insert_wait_entry(ctx, op.physReg(), op.regClass(), event, false, vmem_types); 611bf215546Sopenharmony_ci} 612bf215546Sopenharmony_ci 613bf215546Sopenharmony_civoid 614bf215546Sopenharmony_ciinsert_wait_entry(wait_ctx& ctx, Definition def, wait_event event, uint8_t vmem_types = 0) 615bf215546Sopenharmony_ci{ 616bf215546Sopenharmony_ci insert_wait_entry(ctx, def.physReg(), def.regClass(), event, true, vmem_types); 617bf215546Sopenharmony_ci} 618bf215546Sopenharmony_ci 619bf215546Sopenharmony_civoid 620bf215546Sopenharmony_cigen(Instruction* instr, wait_ctx& ctx) 621bf215546Sopenharmony_ci{ 622bf215546Sopenharmony_ci switch (instr->format) { 623bf215546Sopenharmony_ci case Format::EXP: { 624bf215546Sopenharmony_ci Export_instruction& exp_instr = instr->exp(); 625bf215546Sopenharmony_ci 626bf215546Sopenharmony_ci wait_event ev; 627bf215546Sopenharmony_ci if (exp_instr.dest <= 9) 628bf215546Sopenharmony_ci ev = event_exp_mrt_null; 629bf215546Sopenharmony_ci else if (exp_instr.dest <= 15) 630bf215546Sopenharmony_ci ev = event_exp_pos; 631bf215546Sopenharmony_ci else 632bf215546Sopenharmony_ci ev = event_exp_param; 633bf215546Sopenharmony_ci update_counters(ctx, ev); 634bf215546Sopenharmony_ci 635bf215546Sopenharmony_ci /* insert new entries for exported vgprs */ 636bf215546Sopenharmony_ci for (unsigned i = 0; i < 4; i++) { 637bf215546Sopenharmony_ci if (exp_instr.enabled_mask & (1 << i)) { 638bf215546Sopenharmony_ci unsigned idx = exp_instr.compressed ? i >> 1 : i; 639bf215546Sopenharmony_ci assert(idx < exp_instr.operands.size()); 640bf215546Sopenharmony_ci insert_wait_entry(ctx, exp_instr.operands[idx], ev); 641bf215546Sopenharmony_ci } 642bf215546Sopenharmony_ci } 643bf215546Sopenharmony_ci insert_wait_entry(ctx, exec, s2, ev, false); 644bf215546Sopenharmony_ci break; 645bf215546Sopenharmony_ci } 646bf215546Sopenharmony_ci case Format::FLAT: { 647bf215546Sopenharmony_ci FLAT_instruction& flat = instr->flat(); 648bf215546Sopenharmony_ci if (ctx.gfx_level < GFX10 && !instr->definitions.empty()) 649bf215546Sopenharmony_ci update_counters_for_flat_load(ctx, flat.sync); 650bf215546Sopenharmony_ci else 651bf215546Sopenharmony_ci update_counters(ctx, event_flat, flat.sync); 652bf215546Sopenharmony_ci 653bf215546Sopenharmony_ci if (!instr->definitions.empty()) 654bf215546Sopenharmony_ci insert_wait_entry(ctx, instr->definitions[0], event_flat); 655bf215546Sopenharmony_ci break; 656bf215546Sopenharmony_ci } 657bf215546Sopenharmony_ci case Format::SMEM: { 658bf215546Sopenharmony_ci SMEM_instruction& smem = instr->smem(); 659bf215546Sopenharmony_ci update_counters(ctx, event_smem, smem.sync); 660bf215546Sopenharmony_ci 661bf215546Sopenharmony_ci if (!instr->definitions.empty()) 662bf215546Sopenharmony_ci insert_wait_entry(ctx, instr->definitions[0], event_smem); 663bf215546Sopenharmony_ci else if (ctx.gfx_level >= GFX10 && !smem.sync.can_reorder()) 664bf215546Sopenharmony_ci ctx.pending_s_buffer_store = true; 665bf215546Sopenharmony_ci 666bf215546Sopenharmony_ci break; 667bf215546Sopenharmony_ci } 668bf215546Sopenharmony_ci case Format::DS: { 669bf215546Sopenharmony_ci DS_instruction& ds = instr->ds(); 670bf215546Sopenharmony_ci update_counters(ctx, ds.gds ? event_gds : event_lds, ds.sync); 671bf215546Sopenharmony_ci if (ds.gds) 672bf215546Sopenharmony_ci update_counters(ctx, event_gds_gpr_lock); 673bf215546Sopenharmony_ci 674bf215546Sopenharmony_ci if (!instr->definitions.empty()) 675bf215546Sopenharmony_ci insert_wait_entry(ctx, instr->definitions[0], ds.gds ? event_gds : event_lds); 676bf215546Sopenharmony_ci 677bf215546Sopenharmony_ci if (ds.gds) { 678bf215546Sopenharmony_ci for (const Operand& op : instr->operands) 679bf215546Sopenharmony_ci insert_wait_entry(ctx, op, event_gds_gpr_lock); 680bf215546Sopenharmony_ci insert_wait_entry(ctx, exec, s2, event_gds_gpr_lock, false); 681bf215546Sopenharmony_ci } 682bf215546Sopenharmony_ci break; 683bf215546Sopenharmony_ci } 684bf215546Sopenharmony_ci case Format::MUBUF: 685bf215546Sopenharmony_ci case Format::MTBUF: 686bf215546Sopenharmony_ci case Format::MIMG: 687bf215546Sopenharmony_ci case Format::GLOBAL: 688bf215546Sopenharmony_ci case Format::SCRATCH: { 689bf215546Sopenharmony_ci wait_event ev = 690bf215546Sopenharmony_ci !instr->definitions.empty() || ctx.gfx_level < GFX10 ? event_vmem : event_vmem_store; 691bf215546Sopenharmony_ci update_counters(ctx, ev, get_sync_info(instr)); 692bf215546Sopenharmony_ci 693bf215546Sopenharmony_ci if (!instr->definitions.empty()) 694bf215546Sopenharmony_ci insert_wait_entry(ctx, instr->definitions[0], ev, get_vmem_type(instr)); 695bf215546Sopenharmony_ci 696bf215546Sopenharmony_ci if (ctx.gfx_level == GFX6 && instr->format != Format::MIMG && instr->operands.size() == 4) { 697bf215546Sopenharmony_ci update_counters(ctx, event_vmem_gpr_lock); 698bf215546Sopenharmony_ci insert_wait_entry(ctx, instr->operands[3], event_vmem_gpr_lock); 699bf215546Sopenharmony_ci } else if (ctx.gfx_level == GFX6 && instr->isMIMG() && !instr->operands[2].isUndefined()) { 700bf215546Sopenharmony_ci update_counters(ctx, event_vmem_gpr_lock); 701bf215546Sopenharmony_ci insert_wait_entry(ctx, instr->operands[2], event_vmem_gpr_lock); 702bf215546Sopenharmony_ci } 703bf215546Sopenharmony_ci 704bf215546Sopenharmony_ci break; 705bf215546Sopenharmony_ci } 706bf215546Sopenharmony_ci case Format::SOPP: { 707bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::s_sendmsg || instr->opcode == aco_opcode::s_sendmsghalt) 708bf215546Sopenharmony_ci update_counters(ctx, event_sendmsg); 709bf215546Sopenharmony_ci break; 710bf215546Sopenharmony_ci } 711bf215546Sopenharmony_ci default: break; 712bf215546Sopenharmony_ci } 713bf215546Sopenharmony_ci} 714bf215546Sopenharmony_ci 715bf215546Sopenharmony_civoid 716bf215546Sopenharmony_ciemit_waitcnt(wait_ctx& ctx, std::vector<aco_ptr<Instruction>>& instructions, wait_imm& imm) 717bf215546Sopenharmony_ci{ 718bf215546Sopenharmony_ci if (imm.vs != wait_imm::unset_counter) { 719bf215546Sopenharmony_ci assert(ctx.gfx_level >= GFX10); 720bf215546Sopenharmony_ci SOPK_instruction* waitcnt_vs = 721bf215546Sopenharmony_ci create_instruction<SOPK_instruction>(aco_opcode::s_waitcnt_vscnt, Format::SOPK, 0, 1); 722bf215546Sopenharmony_ci waitcnt_vs->definitions[0] = Definition(sgpr_null, s1); 723bf215546Sopenharmony_ci waitcnt_vs->imm = imm.vs; 724bf215546Sopenharmony_ci instructions.emplace_back(waitcnt_vs); 725bf215546Sopenharmony_ci imm.vs = wait_imm::unset_counter; 726bf215546Sopenharmony_ci } 727bf215546Sopenharmony_ci if (!imm.empty()) { 728bf215546Sopenharmony_ci SOPP_instruction* waitcnt = 729bf215546Sopenharmony_ci create_instruction<SOPP_instruction>(aco_opcode::s_waitcnt, Format::SOPP, 0, 0); 730bf215546Sopenharmony_ci waitcnt->imm = imm.pack(ctx.gfx_level); 731bf215546Sopenharmony_ci waitcnt->block = -1; 732bf215546Sopenharmony_ci instructions.emplace_back(waitcnt); 733bf215546Sopenharmony_ci } 734bf215546Sopenharmony_ci imm = wait_imm(); 735bf215546Sopenharmony_ci} 736bf215546Sopenharmony_ci 737bf215546Sopenharmony_civoid 738bf215546Sopenharmony_cihandle_block(Program* program, Block& block, wait_ctx& ctx) 739bf215546Sopenharmony_ci{ 740bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>> new_instructions; 741bf215546Sopenharmony_ci 742bf215546Sopenharmony_ci wait_imm queued_imm; 743bf215546Sopenharmony_ci 744bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block.instructions) { 745bf215546Sopenharmony_ci bool is_wait = parse_wait_instr(ctx, queued_imm, instr.get()); 746bf215546Sopenharmony_ci 747bf215546Sopenharmony_ci memory_sync_info sync_info = get_sync_info(instr.get()); 748bf215546Sopenharmony_ci kill(queued_imm, instr.get(), ctx, sync_info); 749bf215546Sopenharmony_ci 750bf215546Sopenharmony_ci gen(instr.get(), ctx); 751bf215546Sopenharmony_ci 752bf215546Sopenharmony_ci if (instr->format != Format::PSEUDO_BARRIER && !is_wait) { 753bf215546Sopenharmony_ci if (!queued_imm.empty()) 754bf215546Sopenharmony_ci emit_waitcnt(ctx, new_instructions, queued_imm); 755bf215546Sopenharmony_ci 756bf215546Sopenharmony_ci new_instructions.emplace_back(std::move(instr)); 757bf215546Sopenharmony_ci perform_barrier(ctx, queued_imm, sync_info, semantic_acquire); 758bf215546Sopenharmony_ci } 759bf215546Sopenharmony_ci } 760bf215546Sopenharmony_ci 761bf215546Sopenharmony_ci if (!queued_imm.empty()) 762bf215546Sopenharmony_ci emit_waitcnt(ctx, new_instructions, queued_imm); 763bf215546Sopenharmony_ci 764bf215546Sopenharmony_ci block.instructions.swap(new_instructions); 765bf215546Sopenharmony_ci} 766bf215546Sopenharmony_ci 767bf215546Sopenharmony_ci} /* end namespace */ 768bf215546Sopenharmony_ci 769bf215546Sopenharmony_civoid 770bf215546Sopenharmony_ciinsert_wait_states(Program* program) 771bf215546Sopenharmony_ci{ 772bf215546Sopenharmony_ci /* per BB ctx */ 773bf215546Sopenharmony_ci std::vector<bool> done(program->blocks.size()); 774bf215546Sopenharmony_ci std::vector<wait_ctx> in_ctx(program->blocks.size(), wait_ctx(program)); 775bf215546Sopenharmony_ci std::vector<wait_ctx> out_ctx(program->blocks.size(), wait_ctx(program)); 776bf215546Sopenharmony_ci 777bf215546Sopenharmony_ci std::stack<unsigned, std::vector<unsigned>> loop_header_indices; 778bf215546Sopenharmony_ci unsigned loop_progress = 0; 779bf215546Sopenharmony_ci 780bf215546Sopenharmony_ci if (program->stage.has(SWStage::VS) && program->info.vs.dynamic_inputs) { 781bf215546Sopenharmony_ci for (Definition def : program->vs_inputs) { 782bf215546Sopenharmony_ci update_counters(in_ctx[0], event_vmem); 783bf215546Sopenharmony_ci insert_wait_entry(in_ctx[0], def, event_vmem); 784bf215546Sopenharmony_ci } 785bf215546Sopenharmony_ci } 786bf215546Sopenharmony_ci 787bf215546Sopenharmony_ci for (unsigned i = 0; i < program->blocks.size();) { 788bf215546Sopenharmony_ci Block& current = program->blocks[i++]; 789bf215546Sopenharmony_ci wait_ctx ctx = in_ctx[current.index]; 790bf215546Sopenharmony_ci 791bf215546Sopenharmony_ci if (current.kind & block_kind_loop_header) { 792bf215546Sopenharmony_ci loop_header_indices.push(current.index); 793bf215546Sopenharmony_ci } else if (current.kind & block_kind_loop_exit) { 794bf215546Sopenharmony_ci bool repeat = false; 795bf215546Sopenharmony_ci if (loop_progress == loop_header_indices.size()) { 796bf215546Sopenharmony_ci i = loop_header_indices.top(); 797bf215546Sopenharmony_ci repeat = true; 798bf215546Sopenharmony_ci } 799bf215546Sopenharmony_ci loop_header_indices.pop(); 800bf215546Sopenharmony_ci loop_progress = std::min<unsigned>(loop_progress, loop_header_indices.size()); 801bf215546Sopenharmony_ci if (repeat) 802bf215546Sopenharmony_ci continue; 803bf215546Sopenharmony_ci } 804bf215546Sopenharmony_ci 805bf215546Sopenharmony_ci bool changed = false; 806bf215546Sopenharmony_ci for (unsigned b : current.linear_preds) 807bf215546Sopenharmony_ci changed |= ctx.join(&out_ctx[b], false); 808bf215546Sopenharmony_ci for (unsigned b : current.logical_preds) 809bf215546Sopenharmony_ci changed |= ctx.join(&out_ctx[b], true); 810bf215546Sopenharmony_ci 811bf215546Sopenharmony_ci if (done[current.index] && !changed) { 812bf215546Sopenharmony_ci in_ctx[current.index] = std::move(ctx); 813bf215546Sopenharmony_ci continue; 814bf215546Sopenharmony_ci } else { 815bf215546Sopenharmony_ci in_ctx[current.index] = ctx; 816bf215546Sopenharmony_ci } 817bf215546Sopenharmony_ci 818bf215546Sopenharmony_ci if (current.instructions.empty()) { 819bf215546Sopenharmony_ci out_ctx[current.index] = std::move(ctx); 820bf215546Sopenharmony_ci continue; 821bf215546Sopenharmony_ci } 822bf215546Sopenharmony_ci 823bf215546Sopenharmony_ci loop_progress = std::max<unsigned>(loop_progress, current.loop_nest_depth); 824bf215546Sopenharmony_ci done[current.index] = true; 825bf215546Sopenharmony_ci 826bf215546Sopenharmony_ci handle_block(program, current, ctx); 827bf215546Sopenharmony_ci 828bf215546Sopenharmony_ci out_ctx[current.index] = std::move(ctx); 829bf215546Sopenharmony_ci } 830bf215546Sopenharmony_ci} 831bf215546Sopenharmony_ci 832bf215546Sopenharmony_ci} // namespace aco 833