1// Copyright 2017 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/wasm/baseline/liftoff-assembler.h"
6
7#include <sstream>
8
9#include "src/base/optional.h"
10#include "src/base/platform/wrappers.h"
11#include "src/codegen/assembler-inl.h"
12#include "src/codegen/macro-assembler-inl.h"
13#include "src/compiler/linkage.h"
14#include "src/compiler/wasm-compiler.h"
15#include "src/utils/ostreams.h"
16#include "src/wasm/baseline/liftoff-register.h"
17#include "src/wasm/function-body-decoder-impl.h"
18#include "src/wasm/object-access.h"
19#include "src/wasm/wasm-linkage.h"
20#include "src/wasm/wasm-opcodes.h"
21
22namespace v8 {
23namespace internal {
24namespace wasm {
25
26using VarState = LiftoffAssembler::VarState;
27using ValueKindSig = LiftoffAssembler::ValueKindSig;
28
29constexpr ValueKind LiftoffAssembler::kPointerKind;
30constexpr ValueKind LiftoffAssembler::kTaggedKind;
31constexpr ValueKind LiftoffAssembler::kSmiKind;
32
33namespace {
34
35class StackTransferRecipe {
36  struct RegisterMove {
37    LiftoffRegister src;
38    ValueKind kind;
39    constexpr RegisterMove(LiftoffRegister src, ValueKind kind)
40        : src(src), kind(kind) {}
41  };
42
43  struct RegisterLoad {
44    enum LoadKind : uint8_t {
45      kNop,           // no-op, used for high fp of a fp pair.
46      kConstant,      // load a constant value into a register.
47      kStack,         // fill a register from a stack slot.
48      kLowHalfStack,  // fill a register from the low half of a stack slot.
49      kHighHalfStack  // fill a register from the high half of a stack slot.
50    };
51
52    LoadKind load_kind;
53    ValueKind kind;
54    int32_t value;  // i32 constant value or stack offset, depending on kind.
55
56    // Named constructors.
57    static RegisterLoad Const(WasmValue constant) {
58      if (constant.type().kind() == kI32) {
59        return {kConstant, kI32, constant.to_i32()};
60      }
61      DCHECK_EQ(kI64, constant.type().kind());
62      int32_t i32_const = static_cast<int32_t>(constant.to_i64());
63      DCHECK_EQ(constant.to_i64(), i32_const);
64      return {kConstant, kI64, i32_const};
65    }
66    static RegisterLoad Stack(int32_t offset, ValueKind kind) {
67      return {kStack, kind, offset};
68    }
69    static RegisterLoad HalfStack(int32_t offset, RegPairHalf half) {
70      return {half == kLowWord ? kLowHalfStack : kHighHalfStack, kI32, offset};
71    }
72    static RegisterLoad Nop() {
73      // ValueKind does not matter.
74      return {kNop, kI32, 0};
75    }
76
77   private:
78    RegisterLoad(LoadKind load_kind, ValueKind kind, int32_t value)
79        : load_kind(load_kind), kind(kind), value(value) {}
80  };
81
82 public:
83  explicit StackTransferRecipe(LiftoffAssembler* wasm_asm) : asm_(wasm_asm) {}
84  StackTransferRecipe(const StackTransferRecipe&) = delete;
85  StackTransferRecipe& operator=(const StackTransferRecipe&) = delete;
86  ~StackTransferRecipe() { Execute(); }
87
88  void Execute() {
89    // First, execute register moves. Then load constants and stack values into
90    // registers.
91    ExecuteMoves();
92    DCHECK(move_dst_regs_.is_empty());
93    ExecuteLoads();
94    DCHECK(load_dst_regs_.is_empty());
95  }
96
97  V8_INLINE void TransferStackSlot(const VarState& dst, const VarState& src) {
98    DCHECK(CheckCompatibleStackSlotTypes(dst.kind(), src.kind()));
99    if (dst.is_reg()) {
100      LoadIntoRegister(dst.reg(), src, src.offset());
101      return;
102    }
103    if (dst.is_const()) {
104      DCHECK_EQ(dst.i32_const(), src.i32_const());
105      return;
106    }
107    DCHECK(dst.is_stack());
108    switch (src.loc()) {
109      case VarState::kStack:
110        if (src.offset() != dst.offset()) {
111          asm_->MoveStackValue(dst.offset(), src.offset(), src.kind());
112        }
113        break;
114      case VarState::kRegister:
115        asm_->Spill(dst.offset(), src.reg(), src.kind());
116        break;
117      case VarState::kIntConst:
118        asm_->Spill(dst.offset(), src.constant());
119        break;
120    }
121  }
122
123  V8_INLINE void LoadIntoRegister(LiftoffRegister dst,
124                                  const LiftoffAssembler::VarState& src,
125                                  uint32_t src_offset) {
126    switch (src.loc()) {
127      case VarState::kStack:
128        LoadStackSlot(dst, src_offset, src.kind());
129        break;
130      case VarState::kRegister:
131        DCHECK_EQ(dst.reg_class(), src.reg_class());
132        if (dst != src.reg()) MoveRegister(dst, src.reg(), src.kind());
133        break;
134      case VarState::kIntConst:
135        LoadConstant(dst, src.constant());
136        break;
137    }
138  }
139
140  void LoadI64HalfIntoRegister(LiftoffRegister dst,
141                               const LiftoffAssembler::VarState& src,
142                               int offset, RegPairHalf half) {
143    // Use CHECK such that the remaining code is statically dead if
144    // {kNeedI64RegPair} is false.
145    CHECK(kNeedI64RegPair);
146    DCHECK_EQ(kI64, src.kind());
147    switch (src.loc()) {
148      case VarState::kStack:
149        LoadI64HalfStackSlot(dst, offset, half);
150        break;
151      case VarState::kRegister: {
152        LiftoffRegister src_half =
153            half == kLowWord ? src.reg().low() : src.reg().high();
154        if (dst != src_half) MoveRegister(dst, src_half, kI32);
155        break;
156      }
157      case VarState::kIntConst:
158        int32_t value = src.i32_const();
159        // The high word is the sign extension of the low word.
160        if (half == kHighWord) value = value >> 31;
161        LoadConstant(dst, WasmValue(value));
162        break;
163    }
164  }
165
166  void MoveRegister(LiftoffRegister dst, LiftoffRegister src, ValueKind kind) {
167    DCHECK_NE(dst, src);
168    DCHECK_EQ(dst.reg_class(), src.reg_class());
169    DCHECK_EQ(reg_class_for(kind), src.reg_class());
170    if (src.is_gp_pair()) {
171      DCHECK_EQ(kI64, kind);
172      if (dst.low() != src.low()) MoveRegister(dst.low(), src.low(), kI32);
173      if (dst.high() != src.high()) MoveRegister(dst.high(), src.high(), kI32);
174      return;
175    }
176    if (src.is_fp_pair()) {
177      DCHECK_EQ(kS128, kind);
178      if (dst.low() != src.low()) {
179        MoveRegister(dst.low(), src.low(), kF64);
180        MoveRegister(dst.high(), src.high(), kF64);
181      }
182      return;
183    }
184    if (move_dst_regs_.has(dst)) {
185      DCHECK_EQ(register_move(dst)->src, src);
186      // Non-fp registers can only occur with the exact same type.
187      DCHECK_IMPLIES(!dst.is_fp(), register_move(dst)->kind == kind);
188      // It can happen that one fp register holds both the f32 zero and the f64
189      // zero, as the initial value for local variables. Move the value as f64
190      // in that case.
191      if (kind == kF64) register_move(dst)->kind = kF64;
192      return;
193    }
194    move_dst_regs_.set(dst);
195    ++*src_reg_use_count(src);
196    *register_move(dst) = {src, kind};
197  }
198
199  void LoadConstant(LiftoffRegister dst, WasmValue value) {
200    DCHECK(!load_dst_regs_.has(dst));
201    load_dst_regs_.set(dst);
202    if (dst.is_gp_pair()) {
203      DCHECK_EQ(kI64, value.type().kind());
204      int64_t i64 = value.to_i64();
205      *register_load(dst.low()) =
206          RegisterLoad::Const(WasmValue(static_cast<int32_t>(i64)));
207      *register_load(dst.high()) =
208          RegisterLoad::Const(WasmValue(static_cast<int32_t>(i64 >> 32)));
209    } else {
210      *register_load(dst) = RegisterLoad::Const(value);
211    }
212  }
213
214  void LoadStackSlot(LiftoffRegister dst, uint32_t stack_offset,
215                     ValueKind kind) {
216    if (load_dst_regs_.has(dst)) {
217      // It can happen that we spilled the same register to different stack
218      // slots, and then we reload them later into the same dst register.
219      // In that case, it is enough to load one of the stack slots.
220      return;
221    }
222    load_dst_regs_.set(dst);
223    if (dst.is_gp_pair()) {
224      DCHECK_EQ(kI64, kind);
225      *register_load(dst.low()) =
226          RegisterLoad::HalfStack(stack_offset, kLowWord);
227      *register_load(dst.high()) =
228          RegisterLoad::HalfStack(stack_offset, kHighWord);
229    } else if (dst.is_fp_pair()) {
230      DCHECK_EQ(kS128, kind);
231      // Only need register_load for low_gp since we load 128 bits at one go.
232      // Both low and high need to be set in load_dst_regs_ but when iterating
233      // over it, both low and high will be cleared, so we won't load twice.
234      *register_load(dst.low()) = RegisterLoad::Stack(stack_offset, kind);
235      *register_load(dst.high()) = RegisterLoad::Nop();
236    } else {
237      *register_load(dst) = RegisterLoad::Stack(stack_offset, kind);
238    }
239  }
240
241  void LoadI64HalfStackSlot(LiftoffRegister dst, int offset, RegPairHalf half) {
242    if (load_dst_regs_.has(dst)) {
243      // It can happen that we spilled the same register to different stack
244      // slots, and then we reload them later into the same dst register.
245      // In that case, it is enough to load one of the stack slots.
246      return;
247    }
248    load_dst_regs_.set(dst);
249    *register_load(dst) = RegisterLoad::HalfStack(offset, half);
250  }
251
252 private:
253  using MovesStorage =
254      std::aligned_storage<kAfterMaxLiftoffRegCode * sizeof(RegisterMove),
255                           alignof(RegisterMove)>::type;
256  using LoadsStorage =
257      std::aligned_storage<kAfterMaxLiftoffRegCode * sizeof(RegisterLoad),
258                           alignof(RegisterLoad)>::type;
259
260  ASSERT_TRIVIALLY_COPYABLE(RegisterMove);
261  ASSERT_TRIVIALLY_COPYABLE(RegisterLoad);
262
263  MovesStorage register_moves_;  // uninitialized
264  LoadsStorage register_loads_;  // uninitialized
265  int src_reg_use_count_[kAfterMaxLiftoffRegCode] = {0};
266  LiftoffRegList move_dst_regs_;
267  LiftoffRegList load_dst_regs_;
268  LiftoffAssembler* const asm_;
269
270  RegisterMove* register_move(LiftoffRegister reg) {
271    return reinterpret_cast<RegisterMove*>(&register_moves_) +
272           reg.liftoff_code();
273  }
274  RegisterLoad* register_load(LiftoffRegister reg) {
275    return reinterpret_cast<RegisterLoad*>(&register_loads_) +
276           reg.liftoff_code();
277  }
278  int* src_reg_use_count(LiftoffRegister reg) {
279    return src_reg_use_count_ + reg.liftoff_code();
280  }
281
282  void ExecuteMove(LiftoffRegister dst) {
283    RegisterMove* move = register_move(dst);
284    DCHECK_EQ(0, *src_reg_use_count(dst));
285    asm_->Move(dst, move->src, move->kind);
286    ClearExecutedMove(dst);
287  }
288
289  void ClearExecutedMove(LiftoffRegister dst) {
290    DCHECK(move_dst_regs_.has(dst));
291    move_dst_regs_.clear(dst);
292    RegisterMove* move = register_move(dst);
293    DCHECK_LT(0, *src_reg_use_count(move->src));
294    if (--*src_reg_use_count(move->src)) return;
295    // src count dropped to zero. If this is a destination register, execute
296    // that move now.
297    if (!move_dst_regs_.has(move->src)) return;
298    ExecuteMove(move->src);
299  }
300
301  void ExecuteMoves() {
302    // Execute all moves whose {dst} is not being used as src in another move.
303    // If any src count drops to zero, also (transitively) execute the
304    // corresponding move to that register.
305    for (LiftoffRegister dst : move_dst_regs_) {
306      // Check if already handled via transitivity in {ClearExecutedMove}.
307      if (!move_dst_regs_.has(dst)) continue;
308      if (*src_reg_use_count(dst)) continue;
309      ExecuteMove(dst);
310    }
311
312    // All remaining moves are parts of a cycle. Just spill the first one, then
313    // process all remaining moves in that cycle. Repeat for all cycles.
314    int last_spill_offset = asm_->TopSpillOffset();
315    while (!move_dst_regs_.is_empty()) {
316      // TODO(clemensb): Use an unused register if available.
317      LiftoffRegister dst = move_dst_regs_.GetFirstRegSet();
318      RegisterMove* move = register_move(dst);
319      last_spill_offset += LiftoffAssembler::SlotSizeForType(move->kind);
320      LiftoffRegister spill_reg = move->src;
321      asm_->Spill(last_spill_offset, spill_reg, move->kind);
322      // Remember to reload into the destination register later.
323      LoadStackSlot(dst, last_spill_offset, move->kind);
324      ClearExecutedMove(dst);
325    }
326  }
327
328  void ExecuteLoads() {
329    for (LiftoffRegister dst : load_dst_regs_) {
330      RegisterLoad* load = register_load(dst);
331      switch (load->load_kind) {
332        case RegisterLoad::kNop:
333          break;
334        case RegisterLoad::kConstant:
335          asm_->LoadConstant(dst, load->kind == kI64
336                                      ? WasmValue(int64_t{load->value})
337                                      : WasmValue(int32_t{load->value}));
338          break;
339        case RegisterLoad::kStack:
340          if (kNeedS128RegPair && load->kind == kS128) {
341            asm_->Fill(LiftoffRegister::ForFpPair(dst.fp()), load->value,
342                       load->kind);
343          } else {
344            asm_->Fill(dst, load->value, load->kind);
345          }
346          break;
347        case RegisterLoad::kLowHalfStack:
348          // Half of a register pair, {dst} must be a gp register.
349          asm_->FillI64Half(dst.gp(), load->value, kLowWord);
350          break;
351        case RegisterLoad::kHighHalfStack:
352          // Half of a register pair, {dst} must be a gp register.
353          asm_->FillI64Half(dst.gp(), load->value, kHighWord);
354          break;
355      }
356    }
357    load_dst_regs_ = {};
358  }
359};
360
361class RegisterReuseMap {
362 public:
363  void Add(LiftoffRegister src, LiftoffRegister dst) {
364    if (auto previous = Lookup(src)) {
365      DCHECK_EQ(previous, dst);
366      return;
367    }
368    map_.emplace_back(src);
369    map_.emplace_back(dst);
370  }
371
372  base::Optional<LiftoffRegister> Lookup(LiftoffRegister src) {
373    for (auto it = map_.begin(), end = map_.end(); it != end; it += 2) {
374      if (it->is_gp_pair() == src.is_gp_pair() &&
375          it->is_fp_pair() == src.is_fp_pair() && *it == src)
376        return *(it + 1);
377    }
378    return {};
379  }
380
381 private:
382  // {map_} holds pairs of <src, dst>.
383  base::SmallVector<LiftoffRegister, 8> map_;
384};
385
386enum MergeKeepStackSlots : bool {
387  kKeepStackSlots = true,
388  kTurnStackSlotsIntoRegisters = false
389};
390enum MergeAllowConstants : bool {
391  kConstantsAllowed = true,
392  kConstantsNotAllowed = false
393};
394enum MergeAllowRegisters : bool {
395  kRegistersAllowed = true,
396  kRegistersNotAllowed = false
397};
398enum ReuseRegisters : bool {
399  kReuseRegisters = true,
400  kNoReuseRegisters = false
401};
402void InitMergeRegion(LiftoffAssembler::CacheState* state,
403                     const VarState* source, VarState* target, uint32_t count,
404                     MergeKeepStackSlots keep_stack_slots,
405                     MergeAllowConstants allow_constants,
406                     MergeAllowRegisters allow_registers,
407                     ReuseRegisters reuse_registers, LiftoffRegList used_regs) {
408  RegisterReuseMap register_reuse_map;
409  for (const VarState* source_end = source + count; source < source_end;
410       ++source, ++target) {
411    if ((source->is_stack() && keep_stack_slots) ||
412        (source->is_const() && allow_constants)) {
413      *target = *source;
414      continue;
415    }
416    base::Optional<LiftoffRegister> reg;
417    if (allow_registers) {
418      // First try: Keep the same register, if it's free.
419      if (source->is_reg() && state->is_free(source->reg())) {
420        reg = source->reg();
421      }
422      // Second try: Use the same register we used before (if we reuse
423      // registers).
424      if (!reg && reuse_registers) {
425        reg = register_reuse_map.Lookup(source->reg());
426      }
427      // Third try: Use any free register.
428      RegClass rc = reg_class_for(source->kind());
429      if (!reg && state->has_unused_register(rc, used_regs)) {
430        reg = state->unused_register(rc, used_regs);
431      }
432    }
433    if (!reg) {
434      // No free register; make this a stack slot.
435      *target = VarState(source->kind(), source->offset());
436      continue;
437    }
438    if (reuse_registers) register_reuse_map.Add(source->reg(), *reg);
439    state->inc_used(*reg);
440    *target = VarState(source->kind(), *reg, source->offset());
441  }
442}
443
444}  // namespace
445
446// TODO(clemensb): Don't copy the full parent state (this makes us N^2).
447void LiftoffAssembler::CacheState::InitMerge(const CacheState& source,
448                                             uint32_t num_locals,
449                                             uint32_t arity,
450                                             uint32_t stack_depth) {
451  // |------locals------|---(in between)----|--(discarded)--|----merge----|
452  //  <-- num_locals --> <-- stack_depth -->^stack_base      <-- arity -->
453
454  if (source.cached_instance != no_reg) {
455    SetInstanceCacheRegister(source.cached_instance);
456  }
457
458  if (source.cached_mem_start != no_reg) {
459    SetMemStartCacheRegister(source.cached_mem_start);
460  }
461
462  uint32_t stack_base = stack_depth + num_locals;
463  uint32_t target_height = stack_base + arity;
464  uint32_t discarded = source.stack_height() - target_height;
465  DCHECK(stack_state.empty());
466
467  DCHECK_GE(source.stack_height(), stack_base);
468  stack_state.resize_no_init(target_height);
469
470  const VarState* source_begin = source.stack_state.data();
471  VarState* target_begin = stack_state.data();
472
473  // Try to keep locals and the merge region in their registers. Register used
474  // multiple times need to be copied to another free register. Compute the list
475  // of used registers.
476  LiftoffRegList used_regs;
477  for (auto& src : base::VectorOf(source_begin, num_locals)) {
478    if (src.is_reg()) used_regs.set(src.reg());
479  }
480  // If there is more than one operand in the merge region, a stack-to-stack
481  // move can interfere with a register reload, which would not be handled
482  // correctly by the StackTransferRecipe. To avoid this, spill all registers in
483  // this region.
484  MergeAllowRegisters allow_registers =
485      arity <= 1 ? kRegistersAllowed : kRegistersNotAllowed;
486  if (allow_registers) {
487    for (auto& src :
488         base::VectorOf(source_begin + stack_base + discarded, arity)) {
489      if (src.is_reg()) used_regs.set(src.reg());
490    }
491  }
492
493  // Initialize the merge region. If this region moves, try to turn stack slots
494  // into registers since we need to load the value anyways.
495  MergeKeepStackSlots keep_merge_stack_slots =
496      discarded == 0 ? kKeepStackSlots : kTurnStackSlotsIntoRegisters;
497  InitMergeRegion(this, source_begin + stack_base + discarded,
498                  target_begin + stack_base, arity, keep_merge_stack_slots,
499                  kConstantsNotAllowed, allow_registers, kNoReuseRegisters,
500                  used_regs);
501  // Shift spill offsets down to keep slots contiguous.
502  int offset = stack_base == 0 ? StaticStackFrameSize()
503                               : source.stack_state[stack_base - 1].offset();
504  auto merge_region = base::VectorOf(target_begin + stack_base, arity);
505  for (VarState& var : merge_region) {
506    offset = LiftoffAssembler::NextSpillOffset(var.kind(), offset);
507    var.set_offset(offset);
508  }
509
510  // Initialize the locals region. Here, stack slots stay stack slots (because
511  // they do not move). Try to keep register in registers, but avoid duplicates.
512  InitMergeRegion(this, source_begin, target_begin, num_locals, kKeepStackSlots,
513                  kConstantsNotAllowed, kRegistersAllowed, kNoReuseRegisters,
514                  used_regs);
515  // Consistency check: All the {used_regs} are really in use now.
516  DCHECK_EQ(used_regs, used_registers & used_regs);
517
518  // Last, initialize the section in between. Here, constants are allowed, but
519  // registers which are already used for the merge region or locals must be
520  // moved to other registers or spilled. If a register appears twice in the
521  // source region, ensure to use the same register twice in the target region.
522  InitMergeRegion(this, source_begin + num_locals, target_begin + num_locals,
523                  stack_depth, kKeepStackSlots, kConstantsAllowed,
524                  kRegistersAllowed, kReuseRegisters, used_regs);
525}
526
527void LiftoffAssembler::CacheState::Steal(const CacheState& source) {
528  // Just use the move assignment operator.
529  *this = std::move(source);
530}
531
532void LiftoffAssembler::CacheState::Split(const CacheState& source) {
533  // Call the private copy assignment operator.
534  *this = source;
535}
536
537namespace {
538int GetSafepointIndexForStackSlot(const VarState& slot) {
539  // index = 0 is for the stack slot at 'fp + kFixedFrameSizeAboveFp -
540  // kSystemPointerSize', the location of the current stack slot is 'fp -
541  // slot.offset()'. The index we need is therefore '(fp +
542  // kFixedFrameSizeAboveFp - kSystemPointerSize) - (fp - slot.offset())' =
543  // 'slot.offset() + kFixedFrameSizeAboveFp - kSystemPointerSize'.
544  // Concretely, the index of the first stack slot is '4'.
545  return (slot.offset() + StandardFrameConstants::kFixedFrameSizeAboveFp -
546          kSystemPointerSize) /
547         kSystemPointerSize;
548}
549}  // namespace
550
551void LiftoffAssembler::CacheState::GetTaggedSlotsForOOLCode(
552    ZoneVector<int>* slots, LiftoffRegList* spills,
553    SpillLocation spill_location) {
554  for (const auto& slot : stack_state) {
555    if (!is_reference(slot.kind())) continue;
556
557    if (spill_location == SpillLocation::kTopOfStack && slot.is_reg()) {
558      // Registers get spilled just before the call to the runtime. In {spills}
559      // we store which of the spilled registers contain references, so that we
560      // can add the spill slots to the safepoint.
561      spills->set(slot.reg());
562      continue;
563    }
564    DCHECK_IMPLIES(slot.is_reg(), spill_location == SpillLocation::kStackSlots);
565
566    slots->push_back(GetSafepointIndexForStackSlot(slot));
567  }
568}
569
570void LiftoffAssembler::CacheState::DefineSafepoint(
571    SafepointTableBuilder::Safepoint& safepoint) {
572  for (const auto& slot : stack_state) {
573    if (is_reference(slot.kind())) {
574      DCHECK(slot.is_stack());
575      safepoint.DefineTaggedStackSlot(GetSafepointIndexForStackSlot(slot));
576    }
577  }
578}
579
580void LiftoffAssembler::CacheState::DefineSafepointWithCalleeSavedRegisters(
581    SafepointTableBuilder::Safepoint& safepoint) {
582  for (const auto& slot : stack_state) {
583    if (!is_reference(slot.kind())) continue;
584    if (slot.is_stack()) {
585      safepoint.DefineTaggedStackSlot(GetSafepointIndexForStackSlot(slot));
586    } else {
587      DCHECK(slot.is_reg());
588      safepoint.DefineTaggedRegister(slot.reg().gp().code());
589    }
590  }
591  if (cached_instance != no_reg) {
592    safepoint.DefineTaggedRegister(cached_instance.code());
593  }
594}
595
596int LiftoffAssembler::GetTotalFrameSlotCountForGC() const {
597  // The GC does not care about the actual number of spill slots, just about
598  // the number of references that could be there in the spilling area. Note
599  // that the offset of the first spill slot is kSystemPointerSize and not
600  // '0'. Therefore we don't have to add '+1' here.
601  return (max_used_spill_offset_ +
602          StandardFrameConstants::kFixedFrameSizeAboveFp +
603          ool_spill_space_size_) /
604         kSystemPointerSize;
605}
606
607namespace {
608
609AssemblerOptions DefaultLiftoffOptions() { return AssemblerOptions{}; }
610
611}  // namespace
612
613LiftoffAssembler::LiftoffAssembler(std::unique_ptr<AssemblerBuffer> buffer)
614    : TurboAssembler(nullptr, DefaultLiftoffOptions(), CodeObjectRequired::kNo,
615                     std::move(buffer)) {
616  set_abort_hard(true);  // Avoid calls to Abort.
617}
618
619LiftoffAssembler::~LiftoffAssembler() {
620  if (num_locals_ > kInlineLocalKinds) {
621    base::Free(more_local_kinds_);
622  }
623}
624
625LiftoffRegister LiftoffAssembler::LoadToRegister(VarState slot,
626                                                 LiftoffRegList pinned) {
627  if (slot.is_reg()) return slot.reg();
628  LiftoffRegister reg = GetUnusedRegister(reg_class_for(slot.kind()), pinned);
629  if (slot.is_const()) {
630    LoadConstant(reg, slot.constant());
631  } else {
632    DCHECK(slot.is_stack());
633    Fill(reg, slot.offset(), slot.kind());
634  }
635  return reg;
636}
637
638LiftoffRegister LiftoffAssembler::LoadI64HalfIntoRegister(VarState slot,
639                                                          RegPairHalf half) {
640  if (slot.is_reg()) {
641    return half == kLowWord ? slot.reg().low() : slot.reg().high();
642  }
643  LiftoffRegister dst = GetUnusedRegister(kGpReg, {});
644  if (slot.is_stack()) {
645    FillI64Half(dst.gp(), slot.offset(), half);
646    return dst;
647  }
648  DCHECK(slot.is_const());
649  int32_t half_word =
650      static_cast<int32_t>(half == kLowWord ? slot.constant().to_i64()
651                                            : slot.constant().to_i64() >> 32);
652  LoadConstant(dst, WasmValue(half_word));
653  return dst;
654}
655
656LiftoffRegister LiftoffAssembler::PeekToRegister(int index,
657                                                 LiftoffRegList pinned) {
658  DCHECK_LT(index, cache_state_.stack_state.size());
659  VarState& slot = cache_state_.stack_state.end()[-1 - index];
660  if (slot.is_reg()) {
661    return slot.reg();
662  }
663  LiftoffRegister reg = LoadToRegister(slot, pinned);
664  cache_state_.inc_used(reg);
665  slot.MakeRegister(reg);
666  return reg;
667}
668
669void LiftoffAssembler::DropValues(int count) {
670  for (int i = 0; i < count; ++i) {
671    DCHECK(!cache_state_.stack_state.empty());
672    VarState slot = cache_state_.stack_state.back();
673    cache_state_.stack_state.pop_back();
674    if (slot.is_reg()) {
675      cache_state_.dec_used(slot.reg());
676    }
677  }
678}
679
680void LiftoffAssembler::DropValue(int depth) {
681  auto* dropped = cache_state_.stack_state.begin() + depth;
682  if (dropped->is_reg()) {
683    cache_state_.dec_used(dropped->reg());
684  }
685  std::copy(dropped + 1, cache_state_.stack_state.end(), dropped);
686  cache_state_.stack_state.pop_back();
687}
688
689void LiftoffAssembler::PrepareLoopArgs(int num) {
690  for (int i = 0; i < num; ++i) {
691    VarState& slot = cache_state_.stack_state.end()[-1 - i];
692    if (slot.is_stack()) continue;
693    RegClass rc = reg_class_for(slot.kind());
694    if (slot.is_reg()) {
695      if (cache_state_.get_use_count(slot.reg()) > 1) {
696        // If the register is used more than once, we cannot use it for the
697        // merge. Move it to an unused register instead.
698        LiftoffRegList pinned;
699        pinned.set(slot.reg());
700        LiftoffRegister dst_reg = GetUnusedRegister(rc, pinned);
701        Move(dst_reg, slot.reg(), slot.kind());
702        cache_state_.dec_used(slot.reg());
703        cache_state_.inc_used(dst_reg);
704        slot.MakeRegister(dst_reg);
705      }
706      continue;
707    }
708    LiftoffRegister reg = GetUnusedRegister(rc, {});
709    LoadConstant(reg, slot.constant());
710    slot.MakeRegister(reg);
711    cache_state_.inc_used(reg);
712  }
713}
714
715void LiftoffAssembler::MaterializeMergedConstants(uint32_t arity) {
716  // Materialize constants on top of the stack ({arity} many), and locals.
717  VarState* stack_base = cache_state_.stack_state.data();
718  for (auto slots :
719       {base::VectorOf(stack_base + cache_state_.stack_state.size() - arity,
720                       arity),
721        base::VectorOf(stack_base, num_locals())}) {
722    for (VarState& slot : slots) {
723      if (!slot.is_const()) continue;
724      RegClass rc = reg_class_for(slot.kind());
725      if (cache_state_.has_unused_register(rc)) {
726        LiftoffRegister reg = cache_state_.unused_register(rc);
727        LoadConstant(reg, slot.constant());
728        cache_state_.inc_used(reg);
729        slot.MakeRegister(reg);
730      } else {
731        Spill(slot.offset(), slot.constant());
732        slot.MakeStack();
733      }
734    }
735  }
736}
737
738#ifdef DEBUG
739namespace {
740bool SlotInterference(const VarState& a, const VarState& b) {
741  return a.is_stack() && b.is_stack() &&
742         b.offset() > a.offset() - value_kind_size(a.kind()) &&
743         b.offset() - value_kind_size(b.kind()) < a.offset();
744}
745
746bool SlotInterference(const VarState& a, base::Vector<const VarState> v) {
747  return std::any_of(v.begin(), v.end(), [&a](const VarState& b) {
748    return SlotInterference(a, b);
749  });
750}
751}  // namespace
752#endif
753
754void LiftoffAssembler::MergeFullStackWith(CacheState& target,
755                                          const CacheState& source) {
756  DCHECK_EQ(source.stack_height(), target.stack_height());
757  // TODO(clemensb): Reuse the same StackTransferRecipe object to save some
758  // allocations.
759  StackTransferRecipe transfers(this);
760  for (uint32_t i = 0, e = source.stack_height(); i < e; ++i) {
761    transfers.TransferStackSlot(target.stack_state[i], source.stack_state[i]);
762    DCHECK(!SlotInterference(target.stack_state[i],
763                             base::VectorOf(source.stack_state.data() + i + 1,
764                                            source.stack_height() - i - 1)));
765  }
766
767  // Full stack merging is only done for forward jumps, so we can just clear the
768  // cache registers at the target in case of mismatch.
769  if (source.cached_instance != target.cached_instance) {
770    target.ClearCachedInstanceRegister();
771  }
772  if (source.cached_mem_start != target.cached_mem_start) {
773    target.ClearCachedMemStartRegister();
774  }
775}
776
777void LiftoffAssembler::MergeStackWith(CacheState& target, uint32_t arity,
778                                      JumpDirection jump_direction) {
779  // Before: ----------------|----- (discarded) ----|--- arity ---|
780  //                         ^target_stack_height   ^stack_base   ^stack_height
781  // After:  ----|-- arity --|
782  //             ^           ^target_stack_height
783  //             ^target_stack_base
784  uint32_t stack_height = cache_state_.stack_height();
785  uint32_t target_stack_height = target.stack_height();
786  DCHECK_LE(target_stack_height, stack_height);
787  DCHECK_LE(arity, target_stack_height);
788  uint32_t stack_base = stack_height - arity;
789  uint32_t target_stack_base = target_stack_height - arity;
790  StackTransferRecipe transfers(this);
791  for (uint32_t i = 0; i < target_stack_base; ++i) {
792    transfers.TransferStackSlot(target.stack_state[i],
793                                cache_state_.stack_state[i]);
794    DCHECK(!SlotInterference(
795        target.stack_state[i],
796        base::VectorOf(cache_state_.stack_state.data() + i + 1,
797                       target_stack_base - i - 1)));
798    DCHECK(!SlotInterference(
799        target.stack_state[i],
800        base::VectorOf(cache_state_.stack_state.data() + stack_base, arity)));
801  }
802  for (uint32_t i = 0; i < arity; ++i) {
803    transfers.TransferStackSlot(target.stack_state[target_stack_base + i],
804                                cache_state_.stack_state[stack_base + i]);
805    DCHECK(!SlotInterference(
806        target.stack_state[target_stack_base + i],
807        base::VectorOf(cache_state_.stack_state.data() + stack_base + i + 1,
808                       arity - i - 1)));
809  }
810
811  // Check whether the cached instance and/or memory start need to be moved to
812  // another register. Register moves are executed as part of the
813  // {StackTransferRecipe}. Remember whether the register content has to be
814  // reloaded after executing the stack transfers.
815  bool reload_instance = false;
816  bool reload_mem_start = false;
817  for (auto tuple :
818       {std::make_tuple(&reload_instance, cache_state_.cached_instance,
819                        &target.cached_instance),
820        std::make_tuple(&reload_mem_start, cache_state_.cached_mem_start,
821                        &target.cached_mem_start)}) {
822    bool* reload = std::get<0>(tuple);
823    Register src_reg = std::get<1>(tuple);
824    Register* dst_reg = std::get<2>(tuple);
825    // If the registers match, or the destination has no cache register, nothing
826    // needs to be done.
827    if (src_reg == *dst_reg || *dst_reg == no_reg) continue;
828    // On forward jumps, just reset the cached register in the target state.
829    if (jump_direction == kForwardJump) {
830      target.ClearCacheRegister(dst_reg);
831    } else if (src_reg != no_reg) {
832      // If the source has the content but in the wrong register, execute a
833      // register move as part of the stack transfer.
834      transfers.MoveRegister(LiftoffRegister{*dst_reg},
835                             LiftoffRegister{src_reg}, kPointerKind);
836    } else {
837      // Otherwise (the source state has no cached content), we reload later.
838      *reload = true;
839    }
840  }
841
842  // Now execute stack transfers and register moves/loads.
843  transfers.Execute();
844
845  if (reload_instance) {
846    LoadInstanceFromFrame(target.cached_instance);
847  }
848  if (reload_mem_start) {
849    // {target.cached_instance} already got restored above, so we can use it
850    // if it exists.
851    Register instance = target.cached_instance;
852    if (instance == no_reg) {
853      // We don't have the instance available yet. Store it into the target
854      // mem_start, so that we can load the mem_start from there.
855      instance = target.cached_mem_start;
856      LoadInstanceFromFrame(instance);
857    }
858    LoadFromInstance(
859        target.cached_mem_start, instance,
860        ObjectAccess::ToTagged(WasmInstanceObject::kMemoryStartOffset),
861        sizeof(size_t));
862#ifdef V8_SANDBOXED_POINTERS
863    DecodeSandboxedPointer(target.cached_mem_start);
864#endif
865  }
866}
867
868void LiftoffAssembler::Spill(VarState* slot) {
869  switch (slot->loc()) {
870    case VarState::kStack:
871      return;
872    case VarState::kRegister:
873      Spill(slot->offset(), slot->reg(), slot->kind());
874      cache_state_.dec_used(slot->reg());
875      break;
876    case VarState::kIntConst:
877      Spill(slot->offset(), slot->constant());
878      break;
879  }
880  slot->MakeStack();
881}
882
883void LiftoffAssembler::SpillLocals() {
884  for (uint32_t i = 0; i < num_locals_; ++i) {
885    Spill(&cache_state_.stack_state[i]);
886  }
887}
888
889void LiftoffAssembler::SpillAllRegisters() {
890  for (uint32_t i = 0, e = cache_state_.stack_height(); i < e; ++i) {
891    auto& slot = cache_state_.stack_state[i];
892    if (!slot.is_reg()) continue;
893    Spill(slot.offset(), slot.reg(), slot.kind());
894    slot.MakeStack();
895  }
896  cache_state_.ClearAllCacheRegisters();
897  cache_state_.reset_used_registers();
898}
899
900void LiftoffAssembler::ClearRegister(
901    Register reg, std::initializer_list<Register*> possible_uses,
902    LiftoffRegList pinned) {
903  if (reg == cache_state()->cached_instance) {
904    cache_state()->ClearCachedInstanceRegister();
905    // We can return immediately. The instance is only used to load information
906    // at the beginning of an instruction when values don't have to be in
907    // specific registers yet. Therefore the instance should never be one of the
908    // {possible_uses}.
909    for (Register* use : possible_uses) {
910      USE(use);
911      DCHECK_NE(reg, *use);
912    }
913    return;
914  } else if (reg == cache_state()->cached_mem_start) {
915    cache_state()->ClearCachedMemStartRegister();
916    // The memory start may be among the {possible_uses}, e.g. for an atomic
917    // compare exchange. Therefore it is necessary to iterate over the
918    // {possible_uses} below, and we cannot return early.
919  } else if (cache_state()->is_used(LiftoffRegister(reg))) {
920    SpillRegister(LiftoffRegister(reg));
921  }
922  Register replacement = no_reg;
923  for (Register* use : possible_uses) {
924    if (reg != *use) continue;
925    if (replacement == no_reg) {
926      replacement = GetUnusedRegister(kGpReg, pinned).gp();
927      Move(replacement, reg, kPointerKind);
928    }
929    // We cannot leave this loop early. There may be multiple uses of {reg}.
930    *use = replacement;
931  }
932}
933
934namespace {
935void PrepareStackTransfers(const ValueKindSig* sig,
936                           compiler::CallDescriptor* call_descriptor,
937                           const VarState* slots,
938                           LiftoffStackSlots* stack_slots,
939                           StackTransferRecipe* stack_transfers,
940                           LiftoffRegList* param_regs) {
941  // Process parameters backwards, to reduce the amount of Slot sorting for
942  // the most common case - a normal Wasm Call. Slots will be mostly unsorted
943  // in the Builtin call case.
944  uint32_t call_desc_input_idx =
945      static_cast<uint32_t>(call_descriptor->InputCount());
946  uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
947  for (uint32_t i = num_params; i > 0; --i) {
948    const uint32_t param = i - 1;
949    ValueKind kind = sig->GetParam(param);
950    const bool is_gp_pair = kNeedI64RegPair && kind == kI64;
951    const int num_lowered_params = is_gp_pair ? 2 : 1;
952    const VarState& slot = slots[param];
953    const uint32_t stack_offset = slot.offset();
954    // Process both halfs of a register pair separately, because they are passed
955    // as separate parameters. One or both of them could end up on the stack.
956    for (int lowered_idx = 0; lowered_idx < num_lowered_params; ++lowered_idx) {
957      const RegPairHalf half =
958          is_gp_pair && lowered_idx == 0 ? kHighWord : kLowWord;
959      --call_desc_input_idx;
960      compiler::LinkageLocation loc =
961          call_descriptor->GetInputLocation(call_desc_input_idx);
962      if (loc.IsRegister()) {
963        DCHECK(!loc.IsAnyRegister());
964        RegClass rc = is_gp_pair ? kGpReg : reg_class_for(kind);
965        int reg_code = loc.AsRegister();
966        LiftoffRegister reg =
967            LiftoffRegister::from_external_code(rc, kind, reg_code);
968        param_regs->set(reg);
969        if (is_gp_pair) {
970          stack_transfers->LoadI64HalfIntoRegister(reg, slot, stack_offset,
971                                                   half);
972        } else {
973          stack_transfers->LoadIntoRegister(reg, slot, stack_offset);
974        }
975      } else {
976        DCHECK(loc.IsCallerFrameSlot());
977        int param_offset = -loc.GetLocation() - 1;
978        stack_slots->Add(slot, stack_offset, half, param_offset);
979      }
980    }
981  }
982}
983
984}  // namespace
985
986void LiftoffAssembler::PrepareBuiltinCall(
987    const ValueKindSig* sig, compiler::CallDescriptor* call_descriptor,
988    std::initializer_list<VarState> params) {
989  LiftoffStackSlots stack_slots(this);
990  StackTransferRecipe stack_transfers(this);
991  LiftoffRegList param_regs;
992  PrepareStackTransfers(sig, call_descriptor, params.begin(), &stack_slots,
993                        &stack_transfers, &param_regs);
994  SpillAllRegisters();
995  int param_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
996  if (param_slots > 0) {
997    stack_slots.Construct(param_slots);
998  }
999  // Execute the stack transfers before filling the instance register.
1000  stack_transfers.Execute();
1001
1002  // Reset register use counters.
1003  cache_state_.reset_used_registers();
1004}
1005
1006void LiftoffAssembler::PrepareCall(const ValueKindSig* sig,
1007                                   compiler::CallDescriptor* call_descriptor,
1008                                   Register* target,
1009                                   Register* target_instance) {
1010  uint32_t num_params = static_cast<uint32_t>(sig->parameter_count());
1011  // Input 0 is the call target.
1012  constexpr size_t kInputShift = 1;
1013
1014  // Spill all cache slots which are not being used as parameters.
1015  cache_state_.ClearAllCacheRegisters();
1016  for (VarState* it = cache_state_.stack_state.end() - 1 - num_params;
1017       it >= cache_state_.stack_state.begin() &&
1018       !cache_state_.used_registers.is_empty();
1019       --it) {
1020    if (!it->is_reg()) continue;
1021    Spill(it->offset(), it->reg(), it->kind());
1022    cache_state_.dec_used(it->reg());
1023    it->MakeStack();
1024  }
1025
1026  LiftoffStackSlots stack_slots(this);
1027  StackTransferRecipe stack_transfers(this);
1028  LiftoffRegList param_regs;
1029
1030  // Move the target instance (if supplied) into the correct instance register.
1031  compiler::LinkageLocation instance_loc =
1032      call_descriptor->GetInputLocation(kInputShift);
1033  DCHECK(instance_loc.IsRegister() && !instance_loc.IsAnyRegister());
1034  Register instance_reg = Register::from_code(instance_loc.AsRegister());
1035  param_regs.set(instance_reg);
1036  if (target_instance && *target_instance != instance_reg) {
1037    stack_transfers.MoveRegister(LiftoffRegister(instance_reg),
1038                                 LiftoffRegister(*target_instance),
1039                                 kPointerKind);
1040  }
1041
1042  int param_slots = static_cast<int>(call_descriptor->ParameterSlotCount());
1043  if (num_params) {
1044    uint32_t param_base = cache_state_.stack_height() - num_params;
1045    PrepareStackTransfers(sig, call_descriptor,
1046                          &cache_state_.stack_state[param_base], &stack_slots,
1047                          &stack_transfers, &param_regs);
1048  }
1049
1050  // If the target register overlaps with a parameter register, then move the
1051  // target to another free register, or spill to the stack.
1052  if (target && param_regs.has(LiftoffRegister(*target))) {
1053    // Try to find another free register.
1054    LiftoffRegList free_regs = kGpCacheRegList.MaskOut(param_regs);
1055    if (!free_regs.is_empty()) {
1056      LiftoffRegister new_target = free_regs.GetFirstRegSet();
1057      stack_transfers.MoveRegister(new_target, LiftoffRegister(*target),
1058                                   kPointerKind);
1059      *target = new_target.gp();
1060    } else {
1061      stack_slots.Add(VarState(kPointerKind, LiftoffRegister(*target), 0),
1062                      param_slots);
1063      param_slots++;
1064      *target = no_reg;
1065    }
1066  }
1067
1068  if (param_slots > 0) {
1069    stack_slots.Construct(param_slots);
1070  }
1071  // Execute the stack transfers before filling the instance register.
1072  stack_transfers.Execute();
1073  // Pop parameters from the value stack.
1074  cache_state_.stack_state.pop_back(num_params);
1075
1076  // Reset register use counters.
1077  cache_state_.reset_used_registers();
1078
1079  // Reload the instance from the stack.
1080  if (!target_instance) {
1081    LoadInstanceFromFrame(instance_reg);
1082  }
1083}
1084
1085void LiftoffAssembler::FinishCall(const ValueKindSig* sig,
1086                                  compiler::CallDescriptor* call_descriptor) {
1087  int call_desc_return_idx = 0;
1088  for (ValueKind return_kind : sig->returns()) {
1089    DCHECK_LT(call_desc_return_idx, call_descriptor->ReturnCount());
1090    const bool needs_gp_pair = needs_gp_reg_pair(return_kind);
1091    const int num_lowered_params = 1 + needs_gp_pair;
1092    const ValueKind lowered_kind = needs_gp_pair ? kI32 : return_kind;
1093    const RegClass rc = reg_class_for(lowered_kind);
1094    // Initialize to anything, will be set in the loop and used afterwards.
1095    LiftoffRegister reg_pair[2] = {kGpCacheRegList.GetFirstRegSet(),
1096                                   kGpCacheRegList.GetFirstRegSet()};
1097    LiftoffRegList pinned;
1098    for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
1099      compiler::LinkageLocation loc =
1100          call_descriptor->GetReturnLocation(call_desc_return_idx++);
1101      if (loc.IsRegister()) {
1102        DCHECK(!loc.IsAnyRegister());
1103        reg_pair[pair_idx] = LiftoffRegister::from_external_code(
1104            rc, lowered_kind, loc.AsRegister());
1105      } else {
1106        DCHECK(loc.IsCallerFrameSlot());
1107        reg_pair[pair_idx] = GetUnusedRegister(rc, pinned);
1108        // Get slot offset relative to the stack pointer.
1109        int offset = call_descriptor->GetOffsetToReturns();
1110        int return_slot = -loc.GetLocation() - offset - 1;
1111        LoadReturnStackSlot(reg_pair[pair_idx],
1112                            return_slot * kSystemPointerSize, lowered_kind);
1113      }
1114      if (pair_idx == 0) {
1115        pinned.set(reg_pair[0]);
1116      }
1117    }
1118    if (num_lowered_params == 1) {
1119      PushRegister(return_kind, reg_pair[0]);
1120    } else {
1121      PushRegister(return_kind, LiftoffRegister::ForPair(reg_pair[0].gp(),
1122                                                         reg_pair[1].gp()));
1123    }
1124  }
1125  int return_slots = static_cast<int>(call_descriptor->ReturnSlotCount());
1126  RecordUsedSpillOffset(TopSpillOffset() + return_slots * kSystemPointerSize);
1127}
1128
1129void LiftoffAssembler::Move(LiftoffRegister dst, LiftoffRegister src,
1130                            ValueKind kind) {
1131  DCHECK_EQ(dst.reg_class(), src.reg_class());
1132  DCHECK_NE(dst, src);
1133  if (kNeedI64RegPair && dst.is_gp_pair()) {
1134    // Use the {StackTransferRecipe} to move pairs, as the registers in the
1135    // pairs might overlap.
1136    StackTransferRecipe(this).MoveRegister(dst, src, kind);
1137  } else if (kNeedS128RegPair && dst.is_fp_pair()) {
1138    // Calling low_fp is fine, Move will automatically check the kind and
1139    // convert this FP to its SIMD register, and use a SIMD move.
1140    Move(dst.low_fp(), src.low_fp(), kind);
1141  } else if (dst.is_gp()) {
1142    Move(dst.gp(), src.gp(), kind);
1143  } else {
1144    Move(dst.fp(), src.fp(), kind);
1145  }
1146}
1147
1148void LiftoffAssembler::ParallelRegisterMove(
1149    base::Vector<const ParallelRegisterMoveTuple> tuples) {
1150  StackTransferRecipe stack_transfers(this);
1151  for (auto tuple : tuples) {
1152    if (tuple.dst == tuple.src) continue;
1153    stack_transfers.MoveRegister(tuple.dst, tuple.src, tuple.kind);
1154  }
1155}
1156
1157void LiftoffAssembler::MoveToReturnLocations(
1158    const FunctionSig* sig, compiler::CallDescriptor* descriptor) {
1159  StackTransferRecipe stack_transfers(this);
1160  if (sig->return_count() == 1) {
1161    ValueKind return_kind = sig->GetReturn(0).kind();
1162    // Defaults to a gp reg, will be set below if return kind is not gp.
1163    LiftoffRegister return_reg = LiftoffRegister(kGpReturnRegisters[0]);
1164
1165    if (needs_gp_reg_pair(return_kind)) {
1166      return_reg = LiftoffRegister::ForPair(kGpReturnRegisters[0],
1167                                            kGpReturnRegisters[1]);
1168    } else if (needs_fp_reg_pair(return_kind)) {
1169      return_reg = LiftoffRegister::ForFpPair(kFpReturnRegisters[0]);
1170    } else if (reg_class_for(return_kind) == kFpReg) {
1171      return_reg = LiftoffRegister(kFpReturnRegisters[0]);
1172    } else {
1173      DCHECK_EQ(kGpReg, reg_class_for(return_kind));
1174    }
1175    stack_transfers.LoadIntoRegister(return_reg,
1176                                     cache_state_.stack_state.back(),
1177                                     cache_state_.stack_state.back().offset());
1178    return;
1179  }
1180
1181  // Slow path for multi-return.
1182  // We sometimes allocate a register to perform stack-to-stack moves, which can
1183  // cause a spill in the cache state. Conservatively save and restore the
1184  // original state in case it is needed after the current instruction
1185  // (conditional branch).
1186  CacheState saved_state;
1187  saved_state.Split(*cache_state());
1188  int call_desc_return_idx = 0;
1189  DCHECK_LE(sig->return_count(), cache_state_.stack_height());
1190  VarState* slots = cache_state_.stack_state.end() - sig->return_count();
1191  // Fill return frame slots first to ensure that all potential spills happen
1192  // before we prepare the stack transfers.
1193  for (size_t i = 0; i < sig->return_count(); ++i) {
1194    ValueKind return_kind = sig->GetReturn(i).kind();
1195    bool needs_gp_pair = needs_gp_reg_pair(return_kind);
1196    int num_lowered_params = 1 + needs_gp_pair;
1197    for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
1198      compiler::LinkageLocation loc =
1199          descriptor->GetReturnLocation(call_desc_return_idx++);
1200      if (loc.IsCallerFrameSlot()) {
1201        RegPairHalf half = pair_idx == 0 ? kLowWord : kHighWord;
1202        VarState& slot = slots[i];
1203        LiftoffRegister reg = needs_gp_pair
1204                                  ? LoadI64HalfIntoRegister(slot, half)
1205                                  : LoadToRegister(slot, {});
1206        ValueKind lowered_kind = needs_gp_pair ? kI32 : return_kind;
1207        StoreCallerFrameSlot(reg, -loc.AsCallerFrameSlot(), lowered_kind);
1208      }
1209    }
1210  }
1211  // Prepare and execute stack transfers.
1212  call_desc_return_idx = 0;
1213  for (size_t i = 0; i < sig->return_count(); ++i) {
1214    ValueKind return_kind = sig->GetReturn(i).kind();
1215    bool needs_gp_pair = needs_gp_reg_pair(return_kind);
1216    int num_lowered_params = 1 + needs_gp_pair;
1217    for (int pair_idx = 0; pair_idx < num_lowered_params; ++pair_idx) {
1218      RegPairHalf half = pair_idx == 0 ? kLowWord : kHighWord;
1219      compiler::LinkageLocation loc =
1220          descriptor->GetReturnLocation(call_desc_return_idx++);
1221      if (loc.IsRegister()) {
1222        DCHECK(!loc.IsAnyRegister());
1223        int reg_code = loc.AsRegister();
1224        ValueKind lowered_kind = needs_gp_pair ? kI32 : return_kind;
1225        RegClass rc = reg_class_for(lowered_kind);
1226        LiftoffRegister reg =
1227            LiftoffRegister::from_external_code(rc, return_kind, reg_code);
1228        VarState& slot = slots[i];
1229        if (needs_gp_pair) {
1230          stack_transfers.LoadI64HalfIntoRegister(reg, slot, slot.offset(),
1231                                                  half);
1232        } else {
1233          stack_transfers.LoadIntoRegister(reg, slot, slot.offset());
1234        }
1235      }
1236    }
1237  }
1238  cache_state()->Steal(saved_state);
1239}
1240
1241#ifdef ENABLE_SLOW_DCHECKS
1242bool LiftoffAssembler::ValidateCacheState() const {
1243  uint32_t register_use_count[kAfterMaxLiftoffRegCode] = {0};
1244  LiftoffRegList used_regs;
1245  for (const VarState& var : cache_state_.stack_state) {
1246    if (!var.is_reg()) continue;
1247    LiftoffRegister reg = var.reg();
1248    if ((kNeedI64RegPair || kNeedS128RegPair) && reg.is_pair()) {
1249      ++register_use_count[reg.low().liftoff_code()];
1250      ++register_use_count[reg.high().liftoff_code()];
1251    } else {
1252      ++register_use_count[reg.liftoff_code()];
1253    }
1254    used_regs.set(reg);
1255  }
1256  for (Register cache_reg :
1257       {cache_state_.cached_instance, cache_state_.cached_mem_start}) {
1258    if (cache_reg != no_reg) {
1259      DCHECK(!used_regs.has(cache_reg));
1260      int liftoff_code = LiftoffRegister{cache_reg}.liftoff_code();
1261      used_regs.set(cache_reg);
1262      DCHECK_EQ(0, register_use_count[liftoff_code]);
1263      register_use_count[liftoff_code] = 1;
1264    }
1265  }
1266  bool valid = memcmp(register_use_count, cache_state_.register_use_count,
1267                      sizeof(register_use_count)) == 0 &&
1268               used_regs == cache_state_.used_registers;
1269  if (valid) return true;
1270  std::ostringstream os;
1271  os << "Error in LiftoffAssembler::ValidateCacheState().\n";
1272  os << "expected: used_regs " << used_regs << ", counts "
1273     << PrintCollection(register_use_count) << "\n";
1274  os << "found:    used_regs " << cache_state_.used_registers << ", counts "
1275     << PrintCollection(cache_state_.register_use_count) << "\n";
1276  os << "Use --trace-wasm-decoder and --trace-liftoff to debug.";
1277  FATAL("%s", os.str().c_str());
1278}
1279#endif
1280
1281LiftoffRegister LiftoffAssembler::SpillOneRegister(LiftoffRegList candidates) {
1282  // Spill one cached value to free a register.
1283  LiftoffRegister spill_reg = cache_state_.GetNextSpillReg(candidates);
1284  SpillRegister(spill_reg);
1285  return spill_reg;
1286}
1287
1288LiftoffRegister LiftoffAssembler::SpillAdjacentFpRegisters(
1289    LiftoffRegList pinned) {
1290  // We end up in this call only when:
1291  // [1] kNeedS128RegPair, and
1292  // [2] there are no pair of adjacent FP registers that are free
1293  CHECK(kNeedS128RegPair);
1294  DCHECK(!kFpCacheRegList.MaskOut(pinned)
1295              .MaskOut(cache_state_.used_registers)
1296              .HasAdjacentFpRegsSet());
1297
1298  // Special logic, if the top fp register is even, we might hit a case of an
1299  // invalid register in case 2.
1300  LiftoffRegister last_fp = kFpCacheRegList.GetLastRegSet();
1301  if (last_fp.fp().code() % 2 == 0) {
1302    pinned.set(last_fp);
1303  }
1304
1305  // We can try to optimize the spilling here:
1306  // 1. Try to get a free fp register, either:
1307  //  a. This register is already free, or
1308  //  b. it had to be spilled.
1309  // 2. If 1a, the adjacent register is used (invariant [2]), spill it.
1310  // 3. If 1b, check the adjacent register:
1311  //  a. If free, done!
1312  //  b. If used, spill it.
1313  // We spill one register in 2 and 3a, and two registers in 3b.
1314
1315  LiftoffRegister first_reg = GetUnusedRegister(kFpReg, pinned);
1316  LiftoffRegister second_reg = first_reg, low_reg = first_reg;
1317
1318  if (first_reg.fp().code() % 2 == 0) {
1319    second_reg =
1320        LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() + 1);
1321  } else {
1322    second_reg =
1323        LiftoffRegister::from_liftoff_code(first_reg.liftoff_code() - 1);
1324    low_reg = second_reg;
1325  }
1326
1327  if (cache_state_.is_used(second_reg)) {
1328    SpillRegister(second_reg);
1329  }
1330
1331  return low_reg;
1332}
1333
1334void LiftoffAssembler::SpillRegister(LiftoffRegister reg) {
1335  int remaining_uses = cache_state_.get_use_count(reg);
1336  DCHECK_LT(0, remaining_uses);
1337  for (uint32_t idx = cache_state_.stack_height() - 1;; --idx) {
1338    DCHECK_GT(cache_state_.stack_height(), idx);
1339    auto* slot = &cache_state_.stack_state[idx];
1340    if (!slot->is_reg() || !slot->reg().overlaps(reg)) continue;
1341    if (slot->reg().is_pair()) {
1342      // Make sure to decrement *both* registers in a pair, because the
1343      // {clear_used} call below only clears one of them.
1344      cache_state_.dec_used(slot->reg().low());
1345      cache_state_.dec_used(slot->reg().high());
1346      cache_state_.last_spilled_regs.set(slot->reg().low());
1347      cache_state_.last_spilled_regs.set(slot->reg().high());
1348    }
1349    Spill(slot->offset(), slot->reg(), slot->kind());
1350    slot->MakeStack();
1351    if (--remaining_uses == 0) break;
1352  }
1353  cache_state_.clear_used(reg);
1354  cache_state_.last_spilled_regs.set(reg);
1355}
1356
1357void LiftoffAssembler::set_num_locals(uint32_t num_locals) {
1358  DCHECK_EQ(0, num_locals_);  // only call this once.
1359  num_locals_ = num_locals;
1360  if (num_locals > kInlineLocalKinds) {
1361    more_local_kinds_ = reinterpret_cast<ValueKind*>(
1362        base::Malloc(num_locals * sizeof(ValueKind)));
1363    DCHECK_NOT_NULL(more_local_kinds_);
1364  }
1365}
1366
1367std::ostream& operator<<(std::ostream& os, VarState slot) {
1368  os << name(slot.kind()) << ":";
1369  switch (slot.loc()) {
1370    case VarState::kStack:
1371      return os << "s0x" << std::hex << slot.offset() << std::dec;
1372    case VarState::kRegister:
1373      return os << slot.reg();
1374    case VarState::kIntConst:
1375      return os << "c" << slot.i32_const();
1376  }
1377  UNREACHABLE();
1378}
1379
1380#if DEBUG
1381bool CheckCompatibleStackSlotTypes(ValueKind a, ValueKind b) {
1382  if (is_object_reference(a)) {
1383    // Since Liftoff doesn't do accurate type tracking (e.g. on loop back
1384    // edges), we only care that pointer types stay amongst pointer types.
1385    // It's fine if ref/optref overwrite each other.
1386    DCHECK(is_object_reference(b));
1387  } else if (is_rtt(a)) {
1388    // Same for rtt/rtt_with_depth.
1389    DCHECK(is_rtt(b));
1390  } else {
1391    // All other types (primitive numbers, bottom/stmt) must be equal.
1392    DCHECK_EQ(a, b);
1393  }
1394  return true;  // Dummy so this can be called via DCHECK.
1395}
1396#endif
1397
1398}  // namespace wasm
1399}  // namespace internal
1400}  // namespace v8
1401