1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright © 2018 Valve Corporation 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21bf215546Sopenharmony_ci * IN THE SOFTWARE. 22bf215546Sopenharmony_ci * 23bf215546Sopenharmony_ci */ 24bf215546Sopenharmony_ci 25bf215546Sopenharmony_ci#include "aco_ir.h" 26bf215546Sopenharmony_ci 27bf215546Sopenharmony_ci#include <algorithm> 28bf215546Sopenharmony_ci#include <map> 29bf215546Sopenharmony_ci#include <vector> 30bf215546Sopenharmony_ci 31bf215546Sopenharmony_cinamespace aco { 32bf215546Sopenharmony_cinamespace { 33bf215546Sopenharmony_ci 34bf215546Sopenharmony_cistruct phi_info_item { 35bf215546Sopenharmony_ci Definition def; 36bf215546Sopenharmony_ci Operand op; 37bf215546Sopenharmony_ci}; 38bf215546Sopenharmony_ci 39bf215546Sopenharmony_cistruct ssa_elimination_ctx { 40bf215546Sopenharmony_ci /* The outer vectors should be indexed by block index. The inner vectors store phi information 41bf215546Sopenharmony_ci * for each block. */ 42bf215546Sopenharmony_ci std::vector<std::vector<phi_info_item>> logical_phi_info; 43bf215546Sopenharmony_ci std::vector<std::vector<phi_info_item>> linear_phi_info; 44bf215546Sopenharmony_ci std::vector<bool> empty_blocks; 45bf215546Sopenharmony_ci std::vector<bool> blocks_incoming_exec_used; 46bf215546Sopenharmony_ci Program* program; 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci ssa_elimination_ctx(Program* program_) 49bf215546Sopenharmony_ci : logical_phi_info(program_->blocks.size()), linear_phi_info(program_->blocks.size()), 50bf215546Sopenharmony_ci empty_blocks(program_->blocks.size(), true), 51bf215546Sopenharmony_ci blocks_incoming_exec_used(program_->blocks.size(), true), program(program_) 52bf215546Sopenharmony_ci {} 53bf215546Sopenharmony_ci}; 54bf215546Sopenharmony_ci 55bf215546Sopenharmony_civoid 56bf215546Sopenharmony_cicollect_phi_info(ssa_elimination_ctx& ctx) 57bf215546Sopenharmony_ci{ 58bf215546Sopenharmony_ci for (Block& block : ctx.program->blocks) { 59bf215546Sopenharmony_ci for (aco_ptr<Instruction>& phi : block.instructions) { 60bf215546Sopenharmony_ci if (phi->opcode != aco_opcode::p_phi && phi->opcode != aco_opcode::p_linear_phi) 61bf215546Sopenharmony_ci break; 62bf215546Sopenharmony_ci 63bf215546Sopenharmony_ci for (unsigned i = 0; i < phi->operands.size(); i++) { 64bf215546Sopenharmony_ci if (phi->operands[i].isUndefined()) 65bf215546Sopenharmony_ci continue; 66bf215546Sopenharmony_ci if (phi->operands[i].physReg() == phi->definitions[0].physReg()) 67bf215546Sopenharmony_ci continue; 68bf215546Sopenharmony_ci 69bf215546Sopenharmony_ci assert(phi->definitions[0].size() == phi->operands[i].size()); 70bf215546Sopenharmony_ci 71bf215546Sopenharmony_ci std::vector<unsigned>& preds = 72bf215546Sopenharmony_ci phi->opcode == aco_opcode::p_phi ? block.logical_preds : block.linear_preds; 73bf215546Sopenharmony_ci uint32_t pred_idx = preds[i]; 74bf215546Sopenharmony_ci auto& info_vec = phi->opcode == aco_opcode::p_phi ? ctx.logical_phi_info[pred_idx] 75bf215546Sopenharmony_ci : ctx.linear_phi_info[pred_idx]; 76bf215546Sopenharmony_ci info_vec.push_back({phi->definitions[0], phi->operands[i]}); 77bf215546Sopenharmony_ci ctx.empty_blocks[pred_idx] = false; 78bf215546Sopenharmony_ci } 79bf215546Sopenharmony_ci } 80bf215546Sopenharmony_ci } 81bf215546Sopenharmony_ci} 82bf215546Sopenharmony_ci 83bf215546Sopenharmony_civoid 84bf215546Sopenharmony_ciinsert_parallelcopies(ssa_elimination_ctx& ctx) 85bf215546Sopenharmony_ci{ 86bf215546Sopenharmony_ci /* insert the parallelcopies from logical phis before p_logical_end */ 87bf215546Sopenharmony_ci for (unsigned block_idx = 0; block_idx < ctx.program->blocks.size(); ++block_idx) { 88bf215546Sopenharmony_ci auto& logical_phi_info = ctx.logical_phi_info[block_idx]; 89bf215546Sopenharmony_ci if (logical_phi_info.empty()) 90bf215546Sopenharmony_ci continue; 91bf215546Sopenharmony_ci 92bf215546Sopenharmony_ci Block& block = ctx.program->blocks[block_idx]; 93bf215546Sopenharmony_ci unsigned idx = block.instructions.size() - 1; 94bf215546Sopenharmony_ci while (block.instructions[idx]->opcode != aco_opcode::p_logical_end) { 95bf215546Sopenharmony_ci assert(idx > 0); 96bf215546Sopenharmony_ci idx--; 97bf215546Sopenharmony_ci } 98bf215546Sopenharmony_ci 99bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::iterator it = std::next(block.instructions.begin(), idx); 100bf215546Sopenharmony_ci aco_ptr<Pseudo_instruction> pc{ 101bf215546Sopenharmony_ci create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 102bf215546Sopenharmony_ci logical_phi_info.size(), logical_phi_info.size())}; 103bf215546Sopenharmony_ci unsigned i = 0; 104bf215546Sopenharmony_ci for (auto& phi_info : logical_phi_info) { 105bf215546Sopenharmony_ci pc->definitions[i] = phi_info.def; 106bf215546Sopenharmony_ci pc->operands[i] = phi_info.op; 107bf215546Sopenharmony_ci i++; 108bf215546Sopenharmony_ci } 109bf215546Sopenharmony_ci /* this shouldn't be needed since we're only copying vgprs */ 110bf215546Sopenharmony_ci pc->tmp_in_scc = false; 111bf215546Sopenharmony_ci block.instructions.insert(it, std::move(pc)); 112bf215546Sopenharmony_ci } 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_ci /* insert parallelcopies for the linear phis at the end of blocks just before the branch */ 115bf215546Sopenharmony_ci for (unsigned block_idx = 0; block_idx < ctx.program->blocks.size(); ++block_idx) { 116bf215546Sopenharmony_ci auto& linear_phi_info = ctx.linear_phi_info[block_idx]; 117bf215546Sopenharmony_ci if (linear_phi_info.empty()) 118bf215546Sopenharmony_ci continue; 119bf215546Sopenharmony_ci 120bf215546Sopenharmony_ci Block& block = ctx.program->blocks[block_idx]; 121bf215546Sopenharmony_ci std::vector<aco_ptr<Instruction>>::iterator it = block.instructions.end(); 122bf215546Sopenharmony_ci --it; 123bf215546Sopenharmony_ci assert((*it)->isBranch()); 124bf215546Sopenharmony_ci PhysReg scratch_sgpr = (*it)->definitions[0].physReg(); 125bf215546Sopenharmony_ci aco_ptr<Pseudo_instruction> pc{ 126bf215546Sopenharmony_ci create_instruction<Pseudo_instruction>(aco_opcode::p_parallelcopy, Format::PSEUDO, 127bf215546Sopenharmony_ci linear_phi_info.size(), linear_phi_info.size())}; 128bf215546Sopenharmony_ci unsigned i = 0; 129bf215546Sopenharmony_ci for (auto& phi_info : linear_phi_info) { 130bf215546Sopenharmony_ci pc->definitions[i] = phi_info.def; 131bf215546Sopenharmony_ci pc->operands[i] = phi_info.op; 132bf215546Sopenharmony_ci i++; 133bf215546Sopenharmony_ci } 134bf215546Sopenharmony_ci pc->tmp_in_scc = block.scc_live_out; 135bf215546Sopenharmony_ci pc->scratch_sgpr = scratch_sgpr; 136bf215546Sopenharmony_ci block.instructions.insert(it, std::move(pc)); 137bf215546Sopenharmony_ci } 138bf215546Sopenharmony_ci} 139bf215546Sopenharmony_ci 140bf215546Sopenharmony_cibool 141bf215546Sopenharmony_ciis_empty_block(Block* block, bool ignore_exec_writes) 142bf215546Sopenharmony_ci{ 143bf215546Sopenharmony_ci /* check if this block is empty and the exec mask is not needed */ 144bf215546Sopenharmony_ci for (aco_ptr<Instruction>& instr : block->instructions) { 145bf215546Sopenharmony_ci switch (instr->opcode) { 146bf215546Sopenharmony_ci case aco_opcode::p_linear_phi: 147bf215546Sopenharmony_ci case aco_opcode::p_phi: 148bf215546Sopenharmony_ci case aco_opcode::p_logical_start: 149bf215546Sopenharmony_ci case aco_opcode::p_logical_end: 150bf215546Sopenharmony_ci case aco_opcode::p_branch: break; 151bf215546Sopenharmony_ci case aco_opcode::p_parallelcopy: 152bf215546Sopenharmony_ci for (unsigned i = 0; i < instr->definitions.size(); i++) { 153bf215546Sopenharmony_ci if (ignore_exec_writes && instr->definitions[i].physReg() == exec) 154bf215546Sopenharmony_ci continue; 155bf215546Sopenharmony_ci if (instr->definitions[i].physReg() != instr->operands[i].physReg()) 156bf215546Sopenharmony_ci return false; 157bf215546Sopenharmony_ci } 158bf215546Sopenharmony_ci break; 159bf215546Sopenharmony_ci case aco_opcode::s_andn2_b64: 160bf215546Sopenharmony_ci case aco_opcode::s_andn2_b32: 161bf215546Sopenharmony_ci if (ignore_exec_writes && instr->definitions[0].physReg() == exec) 162bf215546Sopenharmony_ci break; 163bf215546Sopenharmony_ci return false; 164bf215546Sopenharmony_ci default: return false; 165bf215546Sopenharmony_ci } 166bf215546Sopenharmony_ci } 167bf215546Sopenharmony_ci return true; 168bf215546Sopenharmony_ci} 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_civoid 171bf215546Sopenharmony_citry_remove_merge_block(ssa_elimination_ctx& ctx, Block* block) 172bf215546Sopenharmony_ci{ 173bf215546Sopenharmony_ci /* check if the successor is another merge block which restores exec */ 174bf215546Sopenharmony_ci // TODO: divergent loops also restore exec 175bf215546Sopenharmony_ci if (block->linear_succs.size() != 1 || 176bf215546Sopenharmony_ci !(ctx.program->blocks[block->linear_succs[0]].kind & block_kind_merge)) 177bf215546Sopenharmony_ci return; 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci /* check if this block is empty */ 180bf215546Sopenharmony_ci if (!is_empty_block(block, true)) 181bf215546Sopenharmony_ci return; 182bf215546Sopenharmony_ci 183bf215546Sopenharmony_ci /* keep the branch instruction and remove the rest */ 184bf215546Sopenharmony_ci aco_ptr<Instruction> branch = std::move(block->instructions.back()); 185bf215546Sopenharmony_ci block->instructions.clear(); 186bf215546Sopenharmony_ci block->instructions.emplace_back(std::move(branch)); 187bf215546Sopenharmony_ci} 188bf215546Sopenharmony_ci 189bf215546Sopenharmony_civoid 190bf215546Sopenharmony_citry_remove_invert_block(ssa_elimination_ctx& ctx, Block* block) 191bf215546Sopenharmony_ci{ 192bf215546Sopenharmony_ci assert(block->linear_succs.size() == 2); 193bf215546Sopenharmony_ci /* only remove this block if the successor got removed as well */ 194bf215546Sopenharmony_ci if (block->linear_succs[0] != block->linear_succs[1]) 195bf215546Sopenharmony_ci return; 196bf215546Sopenharmony_ci 197bf215546Sopenharmony_ci /* check if block is otherwise empty */ 198bf215546Sopenharmony_ci if (!is_empty_block(block, true)) 199bf215546Sopenharmony_ci return; 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci unsigned succ_idx = block->linear_succs[0]; 202bf215546Sopenharmony_ci assert(block->linear_preds.size() == 2); 203bf215546Sopenharmony_ci for (unsigned i = 0; i < 2; i++) { 204bf215546Sopenharmony_ci Block* pred = &ctx.program->blocks[block->linear_preds[i]]; 205bf215546Sopenharmony_ci pred->linear_succs[0] = succ_idx; 206bf215546Sopenharmony_ci ctx.program->blocks[succ_idx].linear_preds[i] = pred->index; 207bf215546Sopenharmony_ci 208bf215546Sopenharmony_ci Pseudo_branch_instruction& branch = pred->instructions.back()->branch(); 209bf215546Sopenharmony_ci assert(branch.isBranch()); 210bf215546Sopenharmony_ci branch.target[0] = succ_idx; 211bf215546Sopenharmony_ci branch.target[1] = succ_idx; 212bf215546Sopenharmony_ci } 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci block->instructions.clear(); 215bf215546Sopenharmony_ci block->linear_preds.clear(); 216bf215546Sopenharmony_ci block->linear_succs.clear(); 217bf215546Sopenharmony_ci} 218bf215546Sopenharmony_ci 219bf215546Sopenharmony_civoid 220bf215546Sopenharmony_citry_remove_simple_block(ssa_elimination_ctx& ctx, Block* block) 221bf215546Sopenharmony_ci{ 222bf215546Sopenharmony_ci if (!is_empty_block(block, false)) 223bf215546Sopenharmony_ci return; 224bf215546Sopenharmony_ci 225bf215546Sopenharmony_ci Block& pred = ctx.program->blocks[block->linear_preds[0]]; 226bf215546Sopenharmony_ci Block& succ = ctx.program->blocks[block->linear_succs[0]]; 227bf215546Sopenharmony_ci Pseudo_branch_instruction& branch = pred.instructions.back()->branch(); 228bf215546Sopenharmony_ci if (branch.opcode == aco_opcode::p_branch) { 229bf215546Sopenharmony_ci branch.target[0] = succ.index; 230bf215546Sopenharmony_ci branch.target[1] = succ.index; 231bf215546Sopenharmony_ci } else if (branch.target[0] == block->index) { 232bf215546Sopenharmony_ci branch.target[0] = succ.index; 233bf215546Sopenharmony_ci } else if (branch.target[0] == succ.index) { 234bf215546Sopenharmony_ci assert(branch.target[1] == block->index); 235bf215546Sopenharmony_ci branch.target[1] = succ.index; 236bf215546Sopenharmony_ci branch.opcode = aco_opcode::p_branch; 237bf215546Sopenharmony_ci } else if (branch.target[1] == block->index) { 238bf215546Sopenharmony_ci /* check if there is a fall-through path from block to succ */ 239bf215546Sopenharmony_ci bool falls_through = block->index < succ.index; 240bf215546Sopenharmony_ci for (unsigned j = block->index + 1; falls_through && j < succ.index; j++) { 241bf215546Sopenharmony_ci assert(ctx.program->blocks[j].index == j); 242bf215546Sopenharmony_ci if (!ctx.program->blocks[j].instructions.empty()) 243bf215546Sopenharmony_ci falls_through = false; 244bf215546Sopenharmony_ci } 245bf215546Sopenharmony_ci if (falls_through) { 246bf215546Sopenharmony_ci branch.target[1] = succ.index; 247bf215546Sopenharmony_ci } else { 248bf215546Sopenharmony_ci /* check if there is a fall-through path for the alternative target */ 249bf215546Sopenharmony_ci if (block->index >= branch.target[0]) 250bf215546Sopenharmony_ci return; 251bf215546Sopenharmony_ci for (unsigned j = block->index + 1; j < branch.target[0]; j++) { 252bf215546Sopenharmony_ci if (!ctx.program->blocks[j].instructions.empty()) 253bf215546Sopenharmony_ci return; 254bf215546Sopenharmony_ci } 255bf215546Sopenharmony_ci 256bf215546Sopenharmony_ci /* This is a (uniform) break or continue block. The branch condition has to be inverted. */ 257bf215546Sopenharmony_ci if (branch.opcode == aco_opcode::p_cbranch_z) 258bf215546Sopenharmony_ci branch.opcode = aco_opcode::p_cbranch_nz; 259bf215546Sopenharmony_ci else if (branch.opcode == aco_opcode::p_cbranch_nz) 260bf215546Sopenharmony_ci branch.opcode = aco_opcode::p_cbranch_z; 261bf215546Sopenharmony_ci else 262bf215546Sopenharmony_ci assert(false); 263bf215546Sopenharmony_ci /* also invert the linear successors */ 264bf215546Sopenharmony_ci pred.linear_succs[0] = pred.linear_succs[1]; 265bf215546Sopenharmony_ci pred.linear_succs[1] = succ.index; 266bf215546Sopenharmony_ci branch.target[1] = branch.target[0]; 267bf215546Sopenharmony_ci branch.target[0] = succ.index; 268bf215546Sopenharmony_ci } 269bf215546Sopenharmony_ci } else { 270bf215546Sopenharmony_ci assert(false); 271bf215546Sopenharmony_ci } 272bf215546Sopenharmony_ci 273bf215546Sopenharmony_ci if (branch.target[0] == branch.target[1]) 274bf215546Sopenharmony_ci branch.opcode = aco_opcode::p_branch; 275bf215546Sopenharmony_ci 276bf215546Sopenharmony_ci for (unsigned i = 0; i < pred.linear_succs.size(); i++) 277bf215546Sopenharmony_ci if (pred.linear_succs[i] == block->index) 278bf215546Sopenharmony_ci pred.linear_succs[i] = succ.index; 279bf215546Sopenharmony_ci 280bf215546Sopenharmony_ci for (unsigned i = 0; i < succ.linear_preds.size(); i++) 281bf215546Sopenharmony_ci if (succ.linear_preds[i] == block->index) 282bf215546Sopenharmony_ci succ.linear_preds[i] = pred.index; 283bf215546Sopenharmony_ci 284bf215546Sopenharmony_ci block->instructions.clear(); 285bf215546Sopenharmony_ci block->linear_preds.clear(); 286bf215546Sopenharmony_ci block->linear_succs.clear(); 287bf215546Sopenharmony_ci} 288bf215546Sopenharmony_ci 289bf215546Sopenharmony_cibool 290bf215546Sopenharmony_ciinstr_writes_exec(Instruction* instr) 291bf215546Sopenharmony_ci{ 292bf215546Sopenharmony_ci for (Definition& def : instr->definitions) 293bf215546Sopenharmony_ci if (def.physReg() == exec || def.physReg() == exec_hi) 294bf215546Sopenharmony_ci return true; 295bf215546Sopenharmony_ci 296bf215546Sopenharmony_ci return false; 297bf215546Sopenharmony_ci} 298bf215546Sopenharmony_ci 299bf215546Sopenharmony_civoid 300bf215546Sopenharmony_cieliminate_useless_exec_writes_in_block(ssa_elimination_ctx& ctx, Block& block) 301bf215546Sopenharmony_ci{ 302bf215546Sopenharmony_ci /* Check if any successor needs the outgoing exec mask from the current block. */ 303bf215546Sopenharmony_ci 304bf215546Sopenharmony_ci bool exec_write_used; 305bf215546Sopenharmony_ci 306bf215546Sopenharmony_ci if (!ctx.logical_phi_info[block.index].empty()) { 307bf215546Sopenharmony_ci exec_write_used = true; 308bf215546Sopenharmony_ci } else { 309bf215546Sopenharmony_ci bool copy_to_exec = false; 310bf215546Sopenharmony_ci bool copy_from_exec = false; 311bf215546Sopenharmony_ci 312bf215546Sopenharmony_ci for (const auto& successor_phi_info : ctx.linear_phi_info[block.index]) { 313bf215546Sopenharmony_ci copy_to_exec |= successor_phi_info.def.physReg() == exec; 314bf215546Sopenharmony_ci copy_from_exec |= successor_phi_info.op.physReg() == exec; 315bf215546Sopenharmony_ci } 316bf215546Sopenharmony_ci 317bf215546Sopenharmony_ci if (copy_from_exec) 318bf215546Sopenharmony_ci exec_write_used = true; 319bf215546Sopenharmony_ci else if (copy_to_exec) 320bf215546Sopenharmony_ci exec_write_used = false; 321bf215546Sopenharmony_ci else 322bf215546Sopenharmony_ci /* blocks_incoming_exec_used is initialized to true, so this is correct even for loops. */ 323bf215546Sopenharmony_ci exec_write_used = 324bf215546Sopenharmony_ci std::any_of(block.linear_succs.begin(), block.linear_succs.end(), 325bf215546Sopenharmony_ci [&ctx](int succ_idx) { return ctx.blocks_incoming_exec_used[succ_idx]; }); 326bf215546Sopenharmony_ci } 327bf215546Sopenharmony_ci 328bf215546Sopenharmony_ci /* Go through all instructions and eliminate useless exec writes. */ 329bf215546Sopenharmony_ci 330bf215546Sopenharmony_ci for (int i = block.instructions.size() - 1; i >= 0; --i) { 331bf215546Sopenharmony_ci aco_ptr<Instruction>& instr = block.instructions[i]; 332bf215546Sopenharmony_ci 333bf215546Sopenharmony_ci /* We already take information from phis into account before the loop, so let's just break on 334bf215546Sopenharmony_ci * phis. */ 335bf215546Sopenharmony_ci if (instr->opcode == aco_opcode::p_linear_phi || instr->opcode == aco_opcode::p_phi) 336bf215546Sopenharmony_ci break; 337bf215546Sopenharmony_ci 338bf215546Sopenharmony_ci /* See if the current instruction needs or writes exec. */ 339bf215546Sopenharmony_ci bool needs_exec = needs_exec_mask(instr.get()); 340bf215546Sopenharmony_ci bool writes_exec = instr_writes_exec(instr.get()); 341bf215546Sopenharmony_ci 342bf215546Sopenharmony_ci /* See if we found an unused exec write. */ 343bf215546Sopenharmony_ci if (writes_exec && !exec_write_used) { 344bf215546Sopenharmony_ci instr.reset(); 345bf215546Sopenharmony_ci continue; 346bf215546Sopenharmony_ci } 347bf215546Sopenharmony_ci 348bf215546Sopenharmony_ci /* For a newly encountered exec write, clear the used flag. */ 349bf215546Sopenharmony_ci if (writes_exec) 350bf215546Sopenharmony_ci exec_write_used = false; 351bf215546Sopenharmony_ci 352bf215546Sopenharmony_ci /* If the current instruction needs exec, mark it as used. */ 353bf215546Sopenharmony_ci exec_write_used |= needs_exec; 354bf215546Sopenharmony_ci } 355bf215546Sopenharmony_ci 356bf215546Sopenharmony_ci /* Remember if the current block needs an incoming exec mask from its predecessors. */ 357bf215546Sopenharmony_ci ctx.blocks_incoming_exec_used[block.index] = exec_write_used; 358bf215546Sopenharmony_ci 359bf215546Sopenharmony_ci /* Cleanup: remove deleted instructions from the vector. */ 360bf215546Sopenharmony_ci auto new_end = std::remove(block.instructions.begin(), block.instructions.end(), nullptr); 361bf215546Sopenharmony_ci block.instructions.resize(new_end - block.instructions.begin()); 362bf215546Sopenharmony_ci} 363bf215546Sopenharmony_ci 364bf215546Sopenharmony_civoid 365bf215546Sopenharmony_cijump_threading(ssa_elimination_ctx& ctx) 366bf215546Sopenharmony_ci{ 367bf215546Sopenharmony_ci for (int i = ctx.program->blocks.size() - 1; i >= 0; i--) { 368bf215546Sopenharmony_ci Block* block = &ctx.program->blocks[i]; 369bf215546Sopenharmony_ci eliminate_useless_exec_writes_in_block(ctx, *block); 370bf215546Sopenharmony_ci 371bf215546Sopenharmony_ci if (!ctx.empty_blocks[i]) 372bf215546Sopenharmony_ci continue; 373bf215546Sopenharmony_ci 374bf215546Sopenharmony_ci if (block->kind & block_kind_invert) { 375bf215546Sopenharmony_ci try_remove_invert_block(ctx, block); 376bf215546Sopenharmony_ci continue; 377bf215546Sopenharmony_ci } 378bf215546Sopenharmony_ci 379bf215546Sopenharmony_ci if (block->linear_succs.size() > 1) 380bf215546Sopenharmony_ci continue; 381bf215546Sopenharmony_ci 382bf215546Sopenharmony_ci if (block->kind & block_kind_merge || block->kind & block_kind_loop_exit) 383bf215546Sopenharmony_ci try_remove_merge_block(ctx, block); 384bf215546Sopenharmony_ci 385bf215546Sopenharmony_ci if (block->linear_preds.size() == 1) 386bf215546Sopenharmony_ci try_remove_simple_block(ctx, block); 387bf215546Sopenharmony_ci } 388bf215546Sopenharmony_ci} 389bf215546Sopenharmony_ci 390bf215546Sopenharmony_ci} /* end namespace */ 391bf215546Sopenharmony_ci 392bf215546Sopenharmony_civoid 393bf215546Sopenharmony_cissa_elimination(Program* program) 394bf215546Sopenharmony_ci{ 395bf215546Sopenharmony_ci ssa_elimination_ctx ctx(program); 396bf215546Sopenharmony_ci 397bf215546Sopenharmony_ci /* Collect information about every phi-instruction */ 398bf215546Sopenharmony_ci collect_phi_info(ctx); 399bf215546Sopenharmony_ci 400bf215546Sopenharmony_ci /* eliminate empty blocks */ 401bf215546Sopenharmony_ci jump_threading(ctx); 402bf215546Sopenharmony_ci 403bf215546Sopenharmony_ci /* insert parallelcopies from SSA elimination */ 404bf215546Sopenharmony_ci insert_parallelcopies(ctx); 405bf215546Sopenharmony_ci} 406bf215546Sopenharmony_ci} // namespace aco 407