1/** 2 * Copyright (c) 2021-2022 Huawei Device Co., Ltd. 3 * Licensed under the Apache License, Version 2.0 (the "License"); 4 * you may not use this file except in compliance with the License. 5 * You may obtain a copy of the License at 6 * 7 * http://www.apache.org/licenses/LICENSE-2.0 8 * 9 * Unless required by applicable law or agreed to in writing, software 10 * distributed under the License is distributed on an "AS IS" BASIS, 11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 * See the License for the specific language governing permissions and 13 * limitations under the License. 14 */ 15 16#include "reg_encoder.h" 17#include "common.h" 18#include "compiler/optimizer/ir/basicblock.h" 19 20namespace panda::bytecodeopt { 21 22static bool IsIntrinsicRange(Inst *inst) 23{ 24 if (inst->GetOpcode() != compiler::Opcode::Intrinsic) { 25 return false; 26 } 27#ifdef ENABLE_BYTECODE_OPT 28 switch (inst->CastToIntrinsic()->GetIntrinsicId()) { 29 case compiler::RuntimeInterface::IntrinsicId::CALLRANGE_IMM8_IMM8_V8: 30 case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLRANGE_PREF_IMM16_V8: 31 case compiler::RuntimeInterface::IntrinsicId::CALLTHISRANGE_IMM8_IMM8_V8: 32 case compiler::RuntimeInterface::IntrinsicId::WIDE_CALLTHISRANGE_PREF_IMM16_V8: 33 case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM8_IMM8_V8: 34 case compiler::RuntimeInterface::IntrinsicId::NEWOBJRANGE_IMM16_IMM8_V8: 35 case compiler::RuntimeInterface::IntrinsicId::WIDE_NEWOBJRANGE_PREF_IMM16_V8: 36 case compiler::RuntimeInterface::IntrinsicId::SUPERCALLTHISRANGE_IMM8_IMM8_V8: 37 case compiler::RuntimeInterface::IntrinsicId::SUPERCALLARROWRANGE_IMM8_IMM8_V8: 38 case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLTHISRANGE_PREF_IMM16_V8: 39 case compiler::RuntimeInterface::IntrinsicId::WIDE_SUPERCALLARROWRANGE_PREF_IMM16_V8: 40 case compiler::RuntimeInterface::IntrinsicId::CREATEOBJECTWITHEXCLUDEDKEYS_IMM8_V8_V8: 41 case compiler::RuntimeInterface::IntrinsicId::WIDE_CREATEOBJECTWITHEXCLUDEDKEYS_PREF_IMM16_V8_V8: 42 return true; 43 default: 44 return false; 45 } 46#endif 47 return false; 48} 49 50static bool CanHoldRange(Inst *inst) 51{ 52 switch (inst->GetOpcode()) { 53 case compiler::Opcode::Intrinsic: 54 return IsIntrinsicRange(inst); 55 default: 56 return false; 57 } 58} 59 60static compiler::Register CalculateNumNeededRangeTemps(const compiler::Graph *graph) 61{ 62 compiler::Register ret = 0; 63 64 for (auto bb : graph->GetBlocksRPO()) { 65 for (const auto &inst : bb->AllInsts()) { 66 if (!CanHoldRange(inst)) { 67 continue; 68 } 69 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0); 70 if (ret < nargs) { 71 if (nargs > MAX_NUM_NON_RANGE_ARGS || IsIntrinsicRange(inst)) { 72 ret = nargs; 73 } 74 } 75 } 76 } 77 78 return ret; 79} 80 81bool RegEncoder::RunImpl() 82{ 83 ASSERT(state_ == RegEncoderState::IDLE); 84 85 num_max_range_input_ = CalculateNumNeededRangeTemps(GetGraph()); 86 87 state_ = RegEncoderState::RENUMBER_ARGS; 88 if (!RenumberArgRegs()) { 89 return false; 90 } 91 92 state_ = RegEncoderState::RESERVE_TEMPS; 93 ASSERT(num_temps_ == 0); 94 95 const auto num_regs = GetNumRegs(); 96 97 auto max_num_temps = num_temps_; 98 CalculateNumNeededTemps(); 99 100 while (max_num_temps != num_temps_) { 101 ASSERT(num_temps_ > max_num_temps); 102 103 if (num_regs > compiler::VIRTUAL_FRAME_SIZE - num_temps_) { // to avoid overflow 104 return false; // no more free registers left in the frame 105 } 106 107 auto delta = static_cast<compiler::Register>(num_temps_ - max_num_temps); 108 range_temps_start_ += delta; 109 110 RenumberRegs(MIN_REGISTER_NUMBER, delta); 111 112 max_num_temps = num_temps_; 113 CalculateNumNeededTemps(); 114 } 115 116 if (num_temps_ > 0 || num_max_range_input_ > 0) { 117 state_ = RegEncoderState::INSERT_SPILLS; 118 InsertSpills(); 119 120 auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>(); 121 for (compiler::Register r = 0; r < num_regs; r++) { 122 usage_mask->at(num_regs + num_temps_ - r - 1) = usage_mask->at(num_regs - r - 1); 123 } 124 std::fill(usage_mask->begin(), usage_mask->begin() + num_temps_, true); 125 } 126 127 SaveNumLocalsToGraph(GetNumLocalsFromGraph() + num_temps_); 128 state_ = RegEncoderState::IDLE; 129 130 return true; 131} 132 133static panda::compiler::DataType::Type GetRegType(panda::compiler::DataType::Type type) 134{ 135 if (type == panda::compiler::DataType::REFERENCE) { 136 return type; 137 } 138 if (panda::compiler::DataType::Is32Bits(type, Arch::NONE)) { 139 return panda::compiler::DataType::UINT32; 140 } 141 return panda::compiler::DataType::UINT64; 142} 143 144static bool RegNeedsRenumbering(panda::compiler::Register r) 145{ 146 return r != panda::compiler::ACC_REG_ID && r != panda::compiler::INVALID_REG; 147} 148 149static panda::compiler::Register RenumberReg(const panda::compiler::Register r, const panda::compiler::Register delta) 150{ 151 if (r == panda::compiler::ACC_REG_ID) { 152 return r; 153 } 154 return r + delta; 155} 156 157static void RenumberSpillFillRegs(panda::compiler::SpillFillInst *inst, const panda::compiler::Register min_reg, 158 const panda::compiler::Register delta) 159{ 160 for (auto &sf : inst->GetSpillFills()) { 161 if (sf.SrcType() == compiler::LocationType::REGISTER && sf.SrcValue() >= min_reg) { 162 sf.SetSrc(compiler::Location::MakeRegister(RenumberReg(sf.SrcValue(), delta))); 163 } 164 if (sf.DstType() == compiler::LocationType::REGISTER && sf.DstValue() >= min_reg) { 165 sf.SetDst(compiler::Location::MakeRegister(RenumberReg(sf.DstValue(), delta))); 166 } 167 } 168} 169 170void RegEncoder::RenumberRegs(const compiler::Register min_reg, const compiler::Register delta) 171{ 172 // Renumbering always advances register number `delta` positions forward, 173 // wrapping around on overflows with well-defined behavour. 174 // Hence the requirement to keep delta unsigned. 175 static_assert(std::is_unsigned<compiler::Register>::value, "compiler::Register must be unsigned"); 176 ASSERT(delta > 0); 177 178 for (auto *bb : GetGraph()->GetBlocksRPO()) { 179 for (auto inst : bb->AllInsts()) { 180 // Renumber output of any instruction, if applicable: 181 if (RegNeedsRenumbering(inst->GetDstReg()) && inst->GetDstReg() >= min_reg) { 182 inst->SetDstReg(RenumberReg(inst->GetDstReg(), delta)); 183 } 184 185 if (inst->IsPhi() || inst->IsCatchPhi()) { 186 continue; 187 } 188 189 // Renumber inputs and outputs of SpillFill instructions: 190 if (inst->IsSpillFill()) { 191 RenumberSpillFillRegs(inst->CastToSpillFill(), min_reg, delta); 192 continue; 193 } 194 195 // Fix inputs of common instructions: 196 for (size_t i = 0; i < inst->GetInputsCount(); i++) { 197 if (RegNeedsRenumbering(inst->GetSrcReg(i)) && inst->GetSrcReg(i) >= min_reg) { 198 inst->SetSrcReg(i, RenumberReg(inst->GetSrcReg(i), delta)); 199 } 200 } 201 } 202 } 203} 204 205bool RegEncoder::RenumberArgRegs() 206{ 207 const auto usage_mask = GetGraph()->GetUsedRegs<compiler::DataType::INT64>(); 208 ASSERT(usage_mask->size() == compiler::VIRTUAL_FRAME_SIZE); 209 210 auto frame_size = static_cast<compiler::Register>(usage_mask->size()); 211 const auto num_args = GetNumArgsFromGraph(); 212 ASSERT(frame_size >= num_args); 213 214 auto num_non_args = static_cast<compiler::Register>(frame_size - num_args); 215 if (num_max_range_input_ > num_non_args) { 216 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough"; 217 return false; 218 } 219 220 compiler::Register num_locals = 0; 221 if (num_non_args != 0) { 222 while (num_locals != num_non_args && usage_mask->at(num_locals)) { 223 ++num_locals; 224 } 225 } 226 227 compiler::Register num_temps = 0; 228 if (num_locals != num_non_args) { 229 compiler::Register r = num_non_args - 1; 230 while (r < num_non_args && usage_mask->at(r)) { 231 ++num_temps; 232 --r; 233 } 234 } 235 236 if (num_locals + num_temps > num_non_args - num_max_range_input_) { 237 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: The free regs for range call are not enough"; 238 return false; 239 } 240 241 range_temps_start_ = num_locals; 242 243 bool do_renumber = true; 244 245 if (num_non_args == 0 && num_max_range_input_ == 0) { // all registers are arguments: no need to renumber 246 do_renumber = false; 247 } 248 249 // All free regs will be just enough to encode call.rang: no need to renumber 250 if (num_locals + num_temps + num_max_range_input_ == num_non_args) { 251 do_renumber = false; 252 } 253 254 if (num_temps + num_args == 0) { // no temps and no args: nothing to renumber 255 do_renumber = false; 256 } 257 258 if (do_renumber) { 259 const auto min_reg = static_cast<compiler::Register>(num_non_args - num_temps); 260 ASSERT(min_reg > MIN_REGISTER_NUMBER); 261 262 // Assert that if temps are present, they are marked allocated in the mask: 263 for (compiler::Register r = min_reg; r < min_reg + num_temps; r++) { 264 ASSERT(usage_mask->at(r)); 265 } 266 267 // Assert that there are no used regs between locals and temps + arguments: 268 for (compiler::Register r = num_locals; r < min_reg; r++) { 269 ASSERT(!usage_mask->at(r)); 270 } 271 272 auto delta = static_cast<compiler::Register>(num_locals + num_temps + num_max_range_input_ - num_non_args); 273 RenumberRegs(min_reg, delta); 274 275 for (compiler::Register r = min_reg; r < frame_size; r++) { 276 usage_mask->at(RenumberReg(r, delta)) = usage_mask->at(r); 277 usage_mask->at(r) = false; 278 } 279 } 280 281 SaveNumLocalsToGraph(num_locals + num_temps + num_max_range_input_); 282 return true; 283} 284 285void RegEncoder::InsertSpills() 286{ 287 ASSERT(num_max_range_input_ > 0 || (num_temps_ > 0 && num_temps_ <= MAX_NUM_INPUTS)); 288 289 for (auto *bb : GetGraph()->GetBlocksRPO()) { 290 for (auto inst : bb->AllInstsSafe()) { 291 if (inst->GetInputsCount() == 0) { 292 continue; 293 } 294 295 VisitInstruction(inst); 296 // TODO(aantipina): Enable assert here for GetStatus() as soon code generation is fully supported 297 } 298 } 299} 300 301void RegEncoder::CalculateNumNeededTemps() 302{ 303 num_temps_ = 0; 304 305 for (auto bb : GetGraph()->GetBlocksRPO()) { 306 for (auto inst : bb->AllInstsSafe()) { 307 if (inst->GetInputsCount() == 0) { 308 continue; 309 } 310 311 VisitInstruction(inst); 312 // TODO(aantipina): Enable here for GetStatus() as soon code generation is fully supported 313 } 314 } 315 316 LOG(DEBUG, BYTECODE_OPTIMIZER) << GetGraph()->GetRuntime()->GetMethodFullName(GetGraph()->GetMethod()) 317 << ": num_temps_ = " << std::to_string(num_temps_); 318} 319 320template <typename T> 321static void AddMoveBefore(Inst *inst, const T &sp_container) 322{ 323 if (sp_container.empty()) { 324 return; 325 } 326 auto sf_inst = inst->GetBasicBlock()->GetGraph()->CreateInstSpillFill(); 327 for (auto const &[src, dst] : sp_container) { 328 ASSERT(src != compiler::ACC_REG_ID); 329 sf_inst->AddMove(src, dst.reg, GetRegType(dst.type)); 330 LOG(DEBUG, BYTECODE_OPTIMIZER) << "RegEncoder: Move v" << static_cast<int>(dst.reg) << " <- v" 331 << static_cast<int>(src) << " was added"; 332 } 333 inst->GetBasicBlock()->InsertBefore(sf_inst, inst); 334} 335 336static bool IsAccReadPosition(compiler::Inst *inst, size_t pos) 337{ 338 // Calls can have accumulator at any position, return false for them 339 return !inst->IsCall() && inst->IsAccRead() && pos == AccReadIndex(inst); 340} 341 342void RegEncoder::InsertSpillsForDynInputsInst(compiler::Inst *inst) 343{ 344 ASSERT(state_ == RegEncoderState::INSERT_SPILLS); 345 ASSERT(inst->IsIntrinsic()); 346 347 RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter()); // src -> (dst, src_type), non-callrange 348 RegContentVec spill_vec(GetGraph()->GetLocalAllocator()->Adapter()); // spill_vec is used to handle callrange 349 350 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0); 351 size_t start = 0; 352 bool range = IsIntrinsicRange(inst) || (nargs - start > MAX_NUM_NON_RANGE_ARGS && CanHoldRange(inst)); 353 354 compiler::Register temp = range ? range_temps_start_ : 0; 355 356 for (size_t i = start; i < nargs; ++i) { 357 auto src_reg = inst->GetSrcReg(i); 358 auto type = inst->GetInputType(i); 359 360 // do not spillfill for acc-read position. For example, Intrinsic.FSTARR32 361 if (IsAccReadPosition(inst, i)) { 362 continue; 363 } 364 365 if (!range) { 366 if (!RegNeedsRenumbering(src_reg) || src_reg < NUM_COMPACTLY_ENCODED_REGS) { 367 continue; 368 } 369 370 auto res = spill_map.emplace(src_reg, RegContent(temp, type)); 371 if (res.second) { 372 inst->SetSrcReg(i, temp++); 373 } else { 374 // Such register is already in map. 375 // It can be ok for cases like: CallStatic v49, v49 376 // Such instructions can be generated by optimizer too. 377 const RegContent ®_cont = res.first->second; 378 inst->SetSrcReg(i, reg_cont.reg); 379 } 380 } else { 381 spill_vec.emplace_back(src_reg, RegContent(temp, type)); 382 inst->SetSrcReg(i, temp++); 383 } 384 } 385 386 AddMoveBefore(inst, spill_map); 387 AddMoveBefore(inst, spill_vec); 388} 389 390void RegEncoder::InsertSpillsForInst(compiler::Inst *inst) 391{ 392 ASSERT(state_ == RegEncoderState::INSERT_SPILLS); 393 394 RegContentMap spill_map(GetGraph()->GetLocalAllocator()->Adapter()); // src -> (dst, src_type) 395 396 if (inst->IsOperandsDynamic()) { 397 InsertSpillsForDynInputsInst(inst); 398 return; 399 } 400 401 compiler::Register temp = 0; 402 for (size_t i = 0; i < inst->GetInputsCount(); i++) { 403 auto reg = inst->GetSrcReg(i); 404 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) { 405 auto res = spill_map.emplace(reg, RegContent(temp, GetRegType(inst->GetInputType(i)))); 406 if (res.second) { 407 inst->SetSrcReg(i, temp++); 408 } else { 409 // Such register is already in map. 410 // It can be ok for cases like: and v49, v49 411 // Such instructions can be generated by optimizer too. 412 const RegContent ®_cont = res.first->second; 413 inst->SetSrcReg(i, reg_cont.reg); 414 } 415 } 416 } 417 418 AddMoveBefore(inst, spill_map); 419} 420 421static void IncTempsIfNeeded(const compiler::Register reg, compiler::Register &num_temps) 422{ 423 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) { 424 num_temps++; 425 } 426} 427 428void RegEncoder::CalculateNumNeededTempsForInst(compiler::Inst *inst) 429{ 430 ASSERT(state_ == RegEncoderState::RESERVE_TEMPS); 431 432 compiler::Register num_temps = 0; 433 434 if (inst->IsOperandsDynamic()) { 435 if (IsIntrinsicRange(inst)) { 436 return; 437 } 438 ASSERT(inst->IsIntrinsic()); 439 440 auto nargs = inst->GetInputsCount() - (inst->RequireState() ? 1 : 0); 441 size_t start = 0; 442 443 if (nargs - start > MAX_NUM_NON_RANGE_ARGS) { // is call.range 444 return; 445 } 446 447 for (size_t i = start; i < nargs; i++) { 448 if (IsAccReadPosition(inst, i)) { 449 continue; 450 } 451 auto reg = inst->GetSrcReg(i); 452 if (RegNeedsRenumbering(reg) && reg >= NUM_COMPACTLY_ENCODED_REGS) { 453 num_temps++; 454 } 455 } 456 } else { 457 for (size_t i = 0; i < inst->GetInputsCount(); i++) { 458 IncTempsIfNeeded(inst->GetSrcReg(i), num_temps); 459 } 460 } 461 462 ASSERT(num_temps <= MAX_NUM_INPUTS); 463 464 num_temps_ = std::max(num_temps, num_temps_); 465} 466 467void RegEncoder::Check4Width(compiler::Inst *inst) 468{ 469 switch (state_) { 470 case RegEncoderState::RESERVE_TEMPS: { 471 CalculateNumNeededTempsForInst(inst); 472 break; 473 } 474 case RegEncoderState::INSERT_SPILLS: { 475 InsertSpillsForInst(inst); 476 break; 477 } 478 default: 479 UNREACHABLE(); 480 } 481} 482 483void RegEncoder::Check8Width([[maybe_unused]] compiler::Inst *inst) 484{ 485 // TODO(aantipina): implement after it became possible to use register numbers more than 256 (#2697) 486} 487 488void RegEncoder::VisitIntrinsic(GraphVisitor *visitor, Inst *inst) 489{ 490 auto re = static_cast<RegEncoder *>(visitor); 491 if (IsIntrinsicRange(inst)) { 492 re->Check4Width(inst->CastToIntrinsic()); 493 return; 494 } 495 496 re->Check8Width(inst->CastToIntrinsic()); 497} 498 499#include "generated/check_width.cpp" 500} // namespace panda::bytecodeopt 501