1// Copyright 2014 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <assert.h>  // For assert
6#include <limits.h>  // For LONG_MIN, LONG_MAX.
7
8#if V8_TARGET_ARCH_S390
9
10#include "src/base/bits.h"
11#include "src/base/division-by-constant.h"
12#include "src/codegen/callable.h"
13#include "src/codegen/code-factory.h"
14#include "src/codegen/external-reference-table.h"
15#include "src/codegen/interface-descriptors-inl.h"
16#include "src/codegen/macro-assembler.h"
17#include "src/codegen/register-configuration.h"
18#include "src/debug/debug.h"
19#include "src/deoptimizer/deoptimizer.h"
20#include "src/execution/frames-inl.h"
21#include "src/heap/memory-chunk.h"
22#include "src/init/bootstrapper.h"
23#include "src/logging/counters.h"
24#include "src/objects/smi.h"
25#include "src/runtime/runtime.h"
26#include "src/snapshot/snapshot.h"
27
28#if V8_ENABLE_WEBASSEMBLY
29#include "src/wasm/wasm-code-manager.h"
30#endif  // V8_ENABLE_WEBASSEMBLY
31
32// Satisfy cpplint check, but don't include platform-specific header. It is
33// included recursively via macro-assembler.h.
34#if 0
35#include "src/codegen/s390/macro-assembler-s390.h"
36#endif
37
38namespace v8 {
39namespace internal {
40
41namespace {
42
43// For WebAssembly we care about the full floating point (Simd) registers. If we
44// are not running Wasm, we can get away with saving half of those (F64)
45// registers.
46#if V8_ENABLE_WEBASSEMBLY
47constexpr int kStackSavedSavedFPSizeInBytes =
48    kNumCallerSavedDoubles * kSimd128Size;
49#else
50constexpr int kStackSavedSavedFPSizeInBytes =
51    kNumCallerSavedDoubles * kDoubleSize;
52#endif  // V8_ENABLE_WEBASSEMBLY
53
54}  // namespace
55
56void TurboAssembler::DoubleMax(DoubleRegister result_reg,
57                               DoubleRegister left_reg,
58                               DoubleRegister right_reg) {
59  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
60    vfmax(result_reg, left_reg, right_reg, Condition(1), Condition(8),
61          Condition(3));
62    return;
63  }
64
65  Label check_zero, return_left, return_right, return_nan, done;
66  cdbr(left_reg, right_reg);
67  bunordered(&return_nan, Label::kNear);
68  beq(&check_zero);
69  bge(&return_left, Label::kNear);
70  b(&return_right, Label::kNear);
71
72  bind(&check_zero);
73  lzdr(kDoubleRegZero);
74  cdbr(left_reg, kDoubleRegZero);
75  /* left == right != 0. */
76  bne(&return_left, Label::kNear);
77  /* At this point, both left and right are either 0 or -0. */
78  /* N.B. The following works because +0 + -0 == +0 */
79  /* For max we want logical-and of sign bit: (L + R) */
80  ldr(result_reg, left_reg);
81  adbr(result_reg, right_reg);
82  b(&done, Label::kNear);
83
84  bind(&return_nan);
85  /* If left or right are NaN, adbr propagates the appropriate one.*/
86  adbr(left_reg, right_reg);
87  b(&return_left, Label::kNear);
88
89  bind(&return_right);
90  if (right_reg != result_reg) {
91    ldr(result_reg, right_reg);
92  }
93  b(&done, Label::kNear);
94
95  bind(&return_left);
96  if (left_reg != result_reg) {
97    ldr(result_reg, left_reg);
98  }
99  bind(&done);
100}
101
102void TurboAssembler::DoubleMin(DoubleRegister result_reg,
103                               DoubleRegister left_reg,
104                               DoubleRegister right_reg) {
105  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
106    vfmin(result_reg, left_reg, right_reg, Condition(1), Condition(8),
107          Condition(3));
108    return;
109  }
110  Label check_zero, return_left, return_right, return_nan, done;
111  cdbr(left_reg, right_reg);
112  bunordered(&return_nan, Label::kNear);
113  beq(&check_zero);
114  ble(&return_left, Label::kNear);
115  b(&return_right, Label::kNear);
116
117  bind(&check_zero);
118  lzdr(kDoubleRegZero);
119  cdbr(left_reg, kDoubleRegZero);
120  /* left == right != 0. */
121  bne(&return_left, Label::kNear);
122  /* At this point, both left and right are either 0 or -0. */
123  /* N.B. The following works because +0 + -0 == +0 */
124  /* For min we want logical-or of sign bit: -(-L + -R) */
125  lcdbr(left_reg, left_reg);
126  ldr(result_reg, left_reg);
127  if (left_reg == right_reg) {
128    adbr(result_reg, right_reg);
129  } else {
130    sdbr(result_reg, right_reg);
131  }
132  lcdbr(result_reg, result_reg);
133  b(&done, Label::kNear);
134
135  bind(&return_nan);
136  /* If left or right are NaN, adbr propagates the appropriate one.*/
137  adbr(left_reg, right_reg);
138  b(&return_left, Label::kNear);
139
140  bind(&return_right);
141  if (right_reg != result_reg) {
142    ldr(result_reg, right_reg);
143  }
144  b(&done, Label::kNear);
145
146  bind(&return_left);
147  if (left_reg != result_reg) {
148    ldr(result_reg, left_reg);
149  }
150  bind(&done);
151}
152
153void TurboAssembler::FloatMax(DoubleRegister result_reg,
154                              DoubleRegister left_reg,
155                              DoubleRegister right_reg) {
156  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
157    vfmax(result_reg, left_reg, right_reg, Condition(1), Condition(8),
158          Condition(2));
159    return;
160  }
161  Label check_zero, return_left, return_right, return_nan, done;
162  cebr(left_reg, right_reg);
163  bunordered(&return_nan, Label::kNear);
164  beq(&check_zero);
165  bge(&return_left, Label::kNear);
166  b(&return_right, Label::kNear);
167
168  bind(&check_zero);
169  lzdr(kDoubleRegZero);
170  cebr(left_reg, kDoubleRegZero);
171  /* left == right != 0. */
172  bne(&return_left, Label::kNear);
173  /* At this point, both left and right are either 0 or -0. */
174  /* N.B. The following works because +0 + -0 == +0 */
175  /* For max we want logical-and of sign bit: (L + R) */
176  ldr(result_reg, left_reg);
177  aebr(result_reg, right_reg);
178  b(&done, Label::kNear);
179
180  bind(&return_nan);
181  /* If left or right are NaN, aebr propagates the appropriate one.*/
182  aebr(left_reg, right_reg);
183  b(&return_left, Label::kNear);
184
185  bind(&return_right);
186  if (right_reg != result_reg) {
187    ldr(result_reg, right_reg);
188  }
189  b(&done, Label::kNear);
190
191  bind(&return_left);
192  if (left_reg != result_reg) {
193    ldr(result_reg, left_reg);
194  }
195  bind(&done);
196}
197
198void TurboAssembler::FloatMin(DoubleRegister result_reg,
199                              DoubleRegister left_reg,
200                              DoubleRegister right_reg) {
201  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_1)) {
202    vfmin(result_reg, left_reg, right_reg, Condition(1), Condition(8),
203          Condition(2));
204    return;
205  }
206
207  Label check_zero, return_left, return_right, return_nan, done;
208  cebr(left_reg, right_reg);
209  bunordered(&return_nan, Label::kNear);
210  beq(&check_zero);
211  ble(&return_left, Label::kNear);
212  b(&return_right, Label::kNear);
213
214  bind(&check_zero);
215  lzdr(kDoubleRegZero);
216  cebr(left_reg, kDoubleRegZero);
217  /* left == right != 0. */
218  bne(&return_left, Label::kNear);
219  /* At this point, both left and right are either 0 or -0. */
220  /* N.B. The following works because +0 + -0 == +0 */
221  /* For min we want logical-or of sign bit: -(-L + -R) */
222  lcebr(left_reg, left_reg);
223  ldr(result_reg, left_reg);
224  if (left_reg == right_reg) {
225    aebr(result_reg, right_reg);
226  } else {
227    sebr(result_reg, right_reg);
228  }
229  lcebr(result_reg, result_reg);
230  b(&done, Label::kNear);
231
232  bind(&return_nan);
233  /* If left or right are NaN, aebr propagates the appropriate one.*/
234  aebr(left_reg, right_reg);
235  b(&return_left, Label::kNear);
236
237  bind(&return_right);
238  if (right_reg != result_reg) {
239    ldr(result_reg, right_reg);
240  }
241  b(&done, Label::kNear);
242
243  bind(&return_left);
244  if (left_reg != result_reg) {
245    ldr(result_reg, left_reg);
246  }
247  bind(&done);
248}
249
250void TurboAssembler::CeilF32(DoubleRegister dst, DoubleRegister src) {
251  fiebra(ROUND_TOWARD_POS_INF, dst, src);
252}
253
254void TurboAssembler::CeilF64(DoubleRegister dst, DoubleRegister src) {
255  fidbra(ROUND_TOWARD_POS_INF, dst, src);
256}
257
258void TurboAssembler::FloorF32(DoubleRegister dst, DoubleRegister src) {
259  fiebra(ROUND_TOWARD_NEG_INF, dst, src);
260}
261
262void TurboAssembler::FloorF64(DoubleRegister dst, DoubleRegister src) {
263  fidbra(ROUND_TOWARD_NEG_INF, dst, src);
264}
265
266void TurboAssembler::TruncF32(DoubleRegister dst, DoubleRegister src) {
267  fiebra(ROUND_TOWARD_0, dst, src);
268}
269
270void TurboAssembler::TruncF64(DoubleRegister dst, DoubleRegister src) {
271  fidbra(ROUND_TOWARD_0, dst, src);
272}
273
274void TurboAssembler::NearestIntF32(DoubleRegister dst, DoubleRegister src) {
275  fiebra(ROUND_TO_NEAREST_TO_EVEN, dst, src);
276}
277
278void TurboAssembler::NearestIntF64(DoubleRegister dst, DoubleRegister src) {
279  fidbra(ROUND_TO_NEAREST_TO_EVEN, dst, src);
280}
281
282int TurboAssembler::RequiredStackSizeForCallerSaved(SaveFPRegsMode fp_mode,
283                                                    Register exclusion1,
284                                                    Register exclusion2,
285                                                    Register exclusion3) const {
286  int bytes = 0;
287
288  RegList exclusions = {exclusion1, exclusion2, exclusion3};
289  RegList list = kJSCallerSaved - exclusions;
290  bytes += list.Count() * kSystemPointerSize;
291
292  if (fp_mode == SaveFPRegsMode::kSave) {
293    bytes += kStackSavedSavedFPSizeInBytes;
294  }
295
296  return bytes;
297}
298
299int TurboAssembler::PushCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
300                                    Register exclusion1, Register exclusion2,
301                                    Register exclusion3) {
302  int bytes = 0;
303
304  RegList exclusions = {exclusion1, exclusion2, exclusion3};
305  RegList list = kJSCallerSaved - exclusions;
306  MultiPush(list);
307  bytes += list.Count() * kSystemPointerSize;
308
309  if (fp_mode == SaveFPRegsMode::kSave) {
310    MultiPushF64OrV128(kCallerSavedDoubles, scratch);
311    bytes += kStackSavedSavedFPSizeInBytes;
312  }
313
314  return bytes;
315}
316
317int TurboAssembler::PopCallerSaved(SaveFPRegsMode fp_mode, Register scratch,
318                                   Register exclusion1, Register exclusion2,
319                                   Register exclusion3) {
320  int bytes = 0;
321  if (fp_mode == SaveFPRegsMode::kSave) {
322    MultiPopF64OrV128(kCallerSavedDoubles, scratch);
323    bytes += kStackSavedSavedFPSizeInBytes;
324  }
325
326  RegList exclusions = {exclusion1, exclusion2, exclusion3};
327  RegList list = kJSCallerSaved - exclusions;
328  MultiPop(list);
329  bytes += list.Count() * kSystemPointerSize;
330
331  return bytes;
332}
333
334void TurboAssembler::LoadFromConstantsTable(Register destination,
335                                            int constant_index) {
336  DCHECK(RootsTable::IsImmortalImmovable(RootIndex::kBuiltinsConstantsTable));
337
338  const uint32_t offset = FixedArray::kHeaderSize +
339                          constant_index * kSystemPointerSize - kHeapObjectTag;
340
341  CHECK(is_uint19(offset));
342  DCHECK_NE(destination, r0);
343  LoadRoot(destination, RootIndex::kBuiltinsConstantsTable);
344  LoadTaggedPointerField(
345      destination,
346      FieldMemOperand(destination,
347                      FixedArray::OffsetOfElementAt(constant_index)),
348      r1);
349}
350
351void TurboAssembler::LoadRootRelative(Register destination, int32_t offset) {
352  LoadU64(destination, MemOperand(kRootRegister, offset));
353}
354
355void TurboAssembler::LoadRootRegisterOffset(Register destination,
356                                            intptr_t offset) {
357  if (offset == 0) {
358    mov(destination, kRootRegister);
359  } else if (is_uint12(offset)) {
360    la(destination, MemOperand(kRootRegister, offset));
361  } else {
362    DCHECK(is_int20(offset));
363    lay(destination, MemOperand(kRootRegister, offset));
364  }
365}
366
367void TurboAssembler::Jump(Register target, Condition cond) { b(cond, target); }
368
369void TurboAssembler::Jump(intptr_t target, RelocInfo::Mode rmode,
370                          Condition cond) {
371  Label skip;
372
373  if (cond != al) b(NegateCondition(cond), &skip);
374
375  mov(ip, Operand(target, rmode));
376  b(ip);
377
378  bind(&skip);
379}
380
381void TurboAssembler::Jump(Address target, RelocInfo::Mode rmode,
382                          Condition cond) {
383  DCHECK(!RelocInfo::IsCodeTarget(rmode));
384  Jump(static_cast<intptr_t>(target), rmode, cond);
385}
386
387void TurboAssembler::Jump(Handle<Code> code, RelocInfo::Mode rmode,
388                          Condition cond) {
389  DCHECK(RelocInfo::IsCodeTarget(rmode));
390  DCHECK_IMPLIES(options().isolate_independent_code,
391                 Builtins::IsIsolateIndependentBuiltin(*code));
392
393  Builtin builtin = Builtin::kNoBuiltinId;
394  bool target_is_builtin =
395      isolate()->builtins()->IsBuiltinHandle(code, &builtin);
396
397  if (options().inline_offheap_trampolines && target_is_builtin) {
398    // Inline the trampoline.
399    RecordCommentForOffHeapTrampoline(builtin);
400    mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
401    b(cond, ip);
402    return;
403  }
404  jump(code, RelocInfo::RELATIVE_CODE_TARGET, cond);
405}
406
407void TurboAssembler::Jump(const ExternalReference& reference) {
408  UseScratchRegisterScope temps(this);
409  Register scratch = temps.Acquire();
410  Move(scratch, reference);
411  Jump(scratch);
412}
413
414void TurboAssembler::Call(Register target) {
415  // Branch to target via indirect branch
416  basr(r14, target);
417}
418
419void MacroAssembler::CallJSEntry(Register target) {
420  DCHECK(target == r4);
421  Call(target);
422}
423
424int MacroAssembler::CallSizeNotPredictableCodeSize(Address target,
425                                                   RelocInfo::Mode rmode,
426                                                   Condition cond) {
427  // S390 Assembler::move sequence is IILF / IIHF
428  int size;
429#if V8_TARGET_ARCH_S390X
430  size = 14;  // IILF + IIHF + BASR
431#else
432  size = 8;  // IILF + BASR
433#endif
434  return size;
435}
436
437void TurboAssembler::Call(Address target, RelocInfo::Mode rmode,
438                          Condition cond) {
439  DCHECK(cond == al);
440
441  mov(ip, Operand(target, rmode));
442  basr(r14, ip);
443}
444
445void TurboAssembler::Call(Handle<Code> code, RelocInfo::Mode rmode,
446                          Condition cond) {
447  DCHECK(RelocInfo::IsCodeTarget(rmode) && cond == al);
448
449  DCHECK_IMPLIES(options().isolate_independent_code,
450                 Builtins::IsIsolateIndependentBuiltin(*code));
451  Builtin builtin = Builtin::kNoBuiltinId;
452  bool target_is_builtin =
453      isolate()->builtins()->IsBuiltinHandle(code, &builtin);
454
455  if (target_is_builtin && options().inline_offheap_trampolines) {
456    // Inline the trampoline.
457    CallBuiltin(builtin);
458    return;
459  }
460  DCHECK(code->IsExecutable());
461  call(code, rmode);
462}
463
464void TurboAssembler::CallBuiltin(Builtin builtin) {
465  ASM_CODE_COMMENT_STRING(this, CommentForOffHeapTrampoline("call", builtin));
466  DCHECK(Builtins::IsBuiltinId(builtin));
467  // Use ip directly instead of using UseScratchRegisterScope, as we do not
468  // preserve scratch registers across calls.
469  mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
470  Call(ip);
471}
472
473void TurboAssembler::TailCallBuiltin(Builtin builtin) {
474  ASM_CODE_COMMENT_STRING(this,
475                          CommentForOffHeapTrampoline("tail call", builtin));
476  mov(ip, Operand(BuiltinEntry(builtin), RelocInfo::OFF_HEAP_TARGET));
477  b(ip);
478}
479
480void TurboAssembler::Drop(int count) {
481  if (count > 0) {
482    int total = count * kSystemPointerSize;
483    if (is_uint12(total)) {
484      la(sp, MemOperand(sp, total));
485    } else if (is_int20(total)) {
486      lay(sp, MemOperand(sp, total));
487    } else {
488      AddS64(sp, Operand(total));
489    }
490  }
491}
492
493void TurboAssembler::Drop(Register count, Register scratch) {
494  ShiftLeftU64(scratch, count, Operand(kSystemPointerSizeLog2));
495  AddS64(sp, sp, scratch);
496}
497
498void TurboAssembler::Call(Label* target) { b(r14, target); }
499
500void TurboAssembler::Push(Handle<HeapObject> handle) {
501  mov(r0, Operand(handle));
502  push(r0);
503}
504
505void TurboAssembler::Push(Smi smi) {
506  mov(r0, Operand(smi));
507  push(r0);
508}
509
510void TurboAssembler::Move(Register dst, Handle<HeapObject> value,
511                          RelocInfo::Mode rmode) {
512  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
513  // non-isolate-independent code. In many cases it might be cheaper than
514  // embedding the relocatable value.
515  if (root_array_available_ && options().isolate_independent_code) {
516    IndirectLoadConstant(dst, value);
517    return;
518  } else if (RelocInfo::IsCompressedEmbeddedObject(rmode)) {
519    EmbeddedObjectIndex index = AddEmbeddedObject(value);
520    DCHECK(is_uint32(index));
521    mov(dst, Operand(static_cast<int>(index), rmode));
522  } else {
523    DCHECK(RelocInfo::IsFullEmbeddedObject(rmode));
524    mov(dst, Operand(value.address(), rmode));
525  }
526}
527
528void TurboAssembler::Move(Register dst, ExternalReference reference) {
529  // TODO(jgruber,v8:8887): Also consider a root-relative load when generating
530  // non-isolate-independent code. In many cases it might be cheaper than
531  // embedding the relocatable value.
532  if (root_array_available_ && options().isolate_independent_code) {
533    IndirectLoadExternalReference(dst, reference);
534    return;
535  }
536  mov(dst, Operand(reference));
537}
538
539void TurboAssembler::Move(Register dst, Register src, Condition cond) {
540  if (dst != src) {
541    if (cond == al) {
542      mov(dst, src);
543    } else {
544      LoadOnConditionP(cond, dst, src);
545    }
546  }
547}
548
549void TurboAssembler::Move(DoubleRegister dst, DoubleRegister src) {
550  if (dst != src) {
551    ldr(dst, src);
552  }
553}
554
555void TurboAssembler::Move(Register dst, const MemOperand& src) {
556  LoadU64(dst, src);
557}
558
559// Wrapper around Assembler::mvc (SS-a format)
560void TurboAssembler::MoveChar(const MemOperand& opnd1, const MemOperand& opnd2,
561                              const Operand& length) {
562  mvc(opnd1, opnd2, Operand(static_cast<intptr_t>(length.immediate() - 1)));
563}
564
565// Wrapper around Assembler::clc (SS-a format)
566void TurboAssembler::CompareLogicalChar(const MemOperand& opnd1,
567                                        const MemOperand& opnd2,
568                                        const Operand& length) {
569  clc(opnd1, opnd2, Operand(static_cast<intptr_t>(length.immediate() - 1)));
570}
571
572// Wrapper around Assembler::xc (SS-a format)
573void TurboAssembler::ExclusiveOrChar(const MemOperand& opnd1,
574                                     const MemOperand& opnd2,
575                                     const Operand& length) {
576  xc(opnd1, opnd2, Operand(static_cast<intptr_t>(length.immediate() - 1)));
577}
578
579// Wrapper around Assembler::risbg(n) (RIE-f)
580void TurboAssembler::RotateInsertSelectBits(Register dst, Register src,
581                                            const Operand& startBit,
582                                            const Operand& endBit,
583                                            const Operand& shiftAmt,
584                                            bool zeroBits) {
585  if (zeroBits)
586    // High tag the top bit of I4/EndBit to zero out any unselected bits
587    risbg(dst, src, startBit,
588          Operand(static_cast<intptr_t>(endBit.immediate() | 0x80)), shiftAmt);
589  else
590    risbg(dst, src, startBit, endBit, shiftAmt);
591}
592
593void TurboAssembler::BranchRelativeOnIdxHighP(Register dst, Register inc,
594                                              Label* L) {
595#if V8_TARGET_ARCH_S390X
596  brxhg(dst, inc, L);
597#else
598  brxh(dst, inc, L);
599#endif  // V8_TARGET_ARCH_S390X
600}
601
602void TurboAssembler::PushArray(Register array, Register size, Register scratch,
603                               Register scratch2, PushArrayOrder order) {
604  Label loop, done;
605
606  if (order == kNormal) {
607    ShiftLeftU64(scratch, size, Operand(kSystemPointerSizeLog2));
608    lay(scratch, MemOperand(array, scratch));
609    bind(&loop);
610    CmpS64(array, scratch);
611    bge(&done);
612    lay(scratch, MemOperand(scratch, -kSystemPointerSize));
613    lay(sp, MemOperand(sp, -kSystemPointerSize));
614    MoveChar(MemOperand(sp), MemOperand(scratch), Operand(kSystemPointerSize));
615    b(&loop);
616    bind(&done);
617  } else {
618    DCHECK_NE(scratch2, r0);
619    ShiftLeftU64(scratch, size, Operand(kSystemPointerSizeLog2));
620    lay(scratch, MemOperand(array, scratch));
621    mov(scratch2, array);
622    bind(&loop);
623    CmpS64(scratch2, scratch);
624    bge(&done);
625    lay(sp, MemOperand(sp, -kSystemPointerSize));
626    MoveChar(MemOperand(sp), MemOperand(scratch2), Operand(kSystemPointerSize));
627    lay(scratch2, MemOperand(scratch2, kSystemPointerSize));
628    b(&loop);
629    bind(&done);
630  }
631}
632
633void TurboAssembler::MultiPush(RegList regs, Register location) {
634  int16_t num_to_push = regs.Count();
635  int16_t stack_offset = num_to_push * kSystemPointerSize;
636
637  SubS64(location, location, Operand(stack_offset));
638  for (int16_t i = Register::kNumRegisters - 1; i >= 0; i--) {
639    if ((regs.bits() & (1 << i)) != 0) {
640      stack_offset -= kSystemPointerSize;
641      StoreU64(ToRegister(i), MemOperand(location, stack_offset));
642    }
643  }
644}
645
646void TurboAssembler::MultiPop(RegList regs, Register location) {
647  int16_t stack_offset = 0;
648
649  for (int16_t i = 0; i < Register::kNumRegisters; i++) {
650    if ((regs.bits() & (1 << i)) != 0) {
651      LoadU64(ToRegister(i), MemOperand(location, stack_offset));
652      stack_offset += kSystemPointerSize;
653    }
654  }
655  AddS64(location, location, Operand(stack_offset));
656}
657
658void TurboAssembler::MultiPushDoubles(DoubleRegList dregs, Register location) {
659  int16_t num_to_push = dregs.Count();
660  int16_t stack_offset = num_to_push * kDoubleSize;
661
662  SubS64(location, location, Operand(stack_offset));
663  for (int16_t i = DoubleRegister::kNumRegisters - 1; i >= 0; i--) {
664    if ((dregs.bits() & (1 << i)) != 0) {
665      DoubleRegister dreg = DoubleRegister::from_code(i);
666      stack_offset -= kDoubleSize;
667      StoreF64(dreg, MemOperand(location, stack_offset));
668    }
669  }
670}
671
672void TurboAssembler::MultiPushV128(DoubleRegList dregs, Register scratch,
673                                   Register location) {
674  int16_t num_to_push = dregs.Count();
675  int16_t stack_offset = num_to_push * kSimd128Size;
676
677  SubS64(location, location, Operand(stack_offset));
678  for (int16_t i = Simd128Register::kNumRegisters - 1; i >= 0; i--) {
679    if ((dregs.bits() & (1 << i)) != 0) {
680      Simd128Register dreg = Simd128Register::from_code(i);
681      stack_offset -= kSimd128Size;
682      StoreV128(dreg, MemOperand(location, stack_offset), scratch);
683    }
684  }
685}
686
687void TurboAssembler::MultiPopDoubles(DoubleRegList dregs, Register location) {
688  int16_t stack_offset = 0;
689
690  for (int16_t i = 0; i < DoubleRegister::kNumRegisters; i++) {
691    if ((dregs.bits() & (1 << i)) != 0) {
692      DoubleRegister dreg = DoubleRegister::from_code(i);
693      LoadF64(dreg, MemOperand(location, stack_offset));
694      stack_offset += kDoubleSize;
695    }
696  }
697  AddS64(location, location, Operand(stack_offset));
698}
699
700void TurboAssembler::MultiPopV128(DoubleRegList dregs, Register scratch,
701                                  Register location) {
702  int16_t stack_offset = 0;
703
704  for (int16_t i = 0; i < Simd128Register::kNumRegisters; i++) {
705    if ((dregs.bits() & (1 << i)) != 0) {
706      Simd128Register dreg = Simd128Register::from_code(i);
707      LoadV128(dreg, MemOperand(location, stack_offset), scratch);
708      stack_offset += kSimd128Size;
709    }
710  }
711  AddS64(location, location, Operand(stack_offset));
712}
713
714void TurboAssembler::MultiPushF64OrV128(DoubleRegList dregs, Register scratch,
715                                        Register location) {
716#if V8_ENABLE_WEBASSEMBLY
717  bool generating_bultins =
718      isolate() && isolate()->IsGeneratingEmbeddedBuiltins();
719  if (generating_bultins) {
720    Label push_doubles, simd_pushed;
721    Move(r1, ExternalReference::supports_wasm_simd_128_address());
722    LoadU8(r1, MemOperand(r1));
723    LoadAndTestP(r1, r1);  // If > 0 then simd is available.
724    ble(&push_doubles, Label::kNear);
725    // Save vector registers, don't save double registers anymore.
726    MultiPushV128(dregs, scratch);
727    b(&simd_pushed);
728    bind(&push_doubles);
729    // Simd not supported, only save double registers.
730    MultiPushDoubles(dregs);
731    // We still need to allocate empty space on the stack as if
732    // Simd rgeisters were saved (see kFixedFrameSizeFromFp).
733    lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize)));
734    bind(&simd_pushed);
735  } else {
736    if (CpuFeatures::SupportsWasmSimd128()) {
737      MultiPushV128(dregs, scratch);
738    } else {
739      MultiPushDoubles(dregs);
740      lay(sp, MemOperand(sp, -(dregs.Count() * kDoubleSize)));
741    }
742  }
743#else
744  MultiPushDoubles(dregs);
745#endif
746}
747
748void TurboAssembler::MultiPopF64OrV128(DoubleRegList dregs, Register scratch,
749                                       Register location) {
750#if V8_ENABLE_WEBASSEMBLY
751  bool generating_bultins =
752      isolate() && isolate()->IsGeneratingEmbeddedBuiltins();
753  if (generating_bultins) {
754    Label pop_doubles, simd_popped;
755    Move(r1, ExternalReference::supports_wasm_simd_128_address());
756    LoadU8(r1, MemOperand(r1));
757    LoadAndTestP(r1, r1);  // If > 0 then simd is available.
758    ble(&pop_doubles, Label::kNear);
759    // Pop vector registers, don't pop double registers anymore.
760    MultiPopV128(dregs, scratch);
761    b(&simd_popped);
762    bind(&pop_doubles);
763    // Simd not supported, only pop double registers.
764    lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize));
765    MultiPopDoubles(dregs);
766    bind(&simd_popped);
767  } else {
768    if (CpuFeatures::SupportsWasmSimd128()) {
769      MultiPopV128(dregs, scratch);
770    } else {
771      lay(sp, MemOperand(sp, dregs.Count() * kDoubleSize));
772      MultiPopDoubles(dregs);
773    }
774  }
775#else
776  MultiPopDoubles(dregs);
777#endif
778}
779
780void TurboAssembler::LoadRoot(Register destination, RootIndex index,
781                              Condition) {
782  LoadU64(destination,
783          MemOperand(kRootRegister, RootRegisterOffsetForRootIndex(index)), r0);
784}
785
786void TurboAssembler::LoadTaggedPointerField(const Register& destination,
787                                            const MemOperand& field_operand,
788                                            const Register& scratch) {
789  if (COMPRESS_POINTERS_BOOL) {
790    DecompressTaggedPointer(destination, field_operand);
791  } else {
792    LoadU64(destination, field_operand, scratch);
793  }
794}
795
796void TurboAssembler::LoadAnyTaggedField(const Register& destination,
797                                        const MemOperand& field_operand,
798                                        const Register& scratch) {
799  if (COMPRESS_POINTERS_BOOL) {
800    DecompressAnyTagged(destination, field_operand);
801  } else {
802    LoadU64(destination, field_operand, scratch);
803  }
804}
805
806void TurboAssembler::SmiUntag(Register dst, const MemOperand& src) {
807  if (SmiValuesAre31Bits()) {
808    LoadS32(dst, src);
809  } else {
810    LoadU64(dst, src);
811  }
812  SmiUntag(dst);
813}
814
815void TurboAssembler::SmiUntagField(Register dst, const MemOperand& src) {
816  SmiUntag(dst, src);
817}
818
819void TurboAssembler::StoreTaggedField(const Register& value,
820                                      const MemOperand& dst_field_operand,
821                                      const Register& scratch) {
822  if (COMPRESS_POINTERS_BOOL) {
823    RecordComment("[ StoreTagged");
824    StoreU32(value, dst_field_operand);
825    RecordComment("]");
826  } else {
827    StoreU64(value, dst_field_operand, scratch);
828  }
829}
830
831void TurboAssembler::DecompressTaggedSigned(Register destination,
832                                            Register src) {
833  RecordComment("[ DecompressTaggedSigned");
834  llgfr(destination, src);
835  RecordComment("]");
836}
837
838void TurboAssembler::DecompressTaggedSigned(Register destination,
839                                            MemOperand field_operand) {
840  RecordComment("[ DecompressTaggedSigned");
841  llgf(destination, field_operand);
842  RecordComment("]");
843}
844
845void TurboAssembler::DecompressTaggedPointer(Register destination,
846                                             Register source) {
847  RecordComment("[ DecompressTaggedPointer");
848  llgfr(destination, source);
849  agr(destination, kRootRegister);
850  RecordComment("]");
851}
852
853void TurboAssembler::DecompressTaggedPointer(Register destination,
854                                             MemOperand field_operand) {
855  RecordComment("[ DecompressTaggedPointer");
856  llgf(destination, field_operand);
857  agr(destination, kRootRegister);
858  RecordComment("]");
859}
860
861void TurboAssembler::DecompressAnyTagged(Register destination,
862                                         MemOperand field_operand) {
863  RecordComment("[ DecompressAnyTagged");
864  llgf(destination, field_operand);
865  agr(destination, kRootRegister);
866  RecordComment("]");
867}
868
869void TurboAssembler::DecompressAnyTagged(Register destination,
870                                         Register source) {
871  RecordComment("[ DecompressAnyTagged");
872  llgfr(destination, source);
873  agr(destination, kRootRegister);
874  RecordComment("]");
875}
876
877void TurboAssembler::LoadTaggedSignedField(Register destination,
878                                           MemOperand field_operand) {
879  if (COMPRESS_POINTERS_BOOL) {
880    DecompressTaggedSigned(destination, field_operand);
881  } else {
882    LoadU64(destination, field_operand);
883  }
884}
885
886void MacroAssembler::RecordWriteField(Register object, int offset,
887                                      Register value, Register slot_address,
888                                      LinkRegisterStatus lr_status,
889                                      SaveFPRegsMode save_fp,
890                                      RememberedSetAction remembered_set_action,
891                                      SmiCheck smi_check) {
892  // First, check if a write barrier is even needed. The tests below
893  // catch stores of Smis.
894  Label done;
895
896  // Skip barrier if writing a smi.
897  if (smi_check == SmiCheck::kInline) {
898    JumpIfSmi(value, &done);
899  }
900
901  // Although the object register is tagged, the offset is relative to the start
902  // of the object, so so offset must be a multiple of kSystemPointerSize.
903  DCHECK(IsAligned(offset, kTaggedSize));
904
905  lay(slot_address, MemOperand(object, offset - kHeapObjectTag));
906  if (FLAG_debug_code) {
907    Label ok;
908    AndP(r0, slot_address, Operand(kTaggedSize - 1));
909    beq(&ok, Label::kNear);
910    stop();
911    bind(&ok);
912  }
913
914  RecordWrite(object, slot_address, value, lr_status, save_fp,
915              remembered_set_action, SmiCheck::kOmit);
916
917  bind(&done);
918
919  // Clobber clobbered input registers when running with the debug-code flag
920  // turned on to provoke errors.
921  if (FLAG_debug_code) {
922    mov(value, Operand(bit_cast<intptr_t>(kZapValue + 4)));
923    mov(slot_address, Operand(bit_cast<intptr_t>(kZapValue + 8)));
924  }
925}
926
927void TurboAssembler::MaybeSaveRegisters(RegList registers) {
928  if (registers.is_empty()) return;
929  MultiPush(registers);
930}
931
932void TurboAssembler::MaybeRestoreRegisters(RegList registers) {
933  if (registers.is_empty()) return;
934  MultiPop(registers);
935}
936
937void TurboAssembler::CallEphemeronKeyBarrier(Register object,
938                                             Register slot_address,
939                                             SaveFPRegsMode fp_mode) {
940  DCHECK(!AreAliased(object, slot_address));
941  RegList registers =
942      WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
943  MaybeSaveRegisters(registers);
944
945  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
946  Register slot_address_parameter =
947      WriteBarrierDescriptor::SlotAddressRegister();
948
949  Push(object);
950  Push(slot_address);
951  Pop(slot_address_parameter);
952  Pop(object_parameter);
953
954  Call(isolate()->builtins()->code_handle(
955           Builtins::GetEphemeronKeyBarrierStub(fp_mode)),
956       RelocInfo::CODE_TARGET);
957  MaybeRestoreRegisters(registers);
958}
959
960void TurboAssembler::CallRecordWriteStubSaveRegisters(
961    Register object, Register slot_address,
962    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
963    StubCallMode mode) {
964  DCHECK(!AreAliased(object, slot_address));
965  RegList registers =
966      WriteBarrierDescriptor::ComputeSavedRegisters(object, slot_address);
967  MaybeSaveRegisters(registers);
968
969  Register object_parameter = WriteBarrierDescriptor::ObjectRegister();
970  Register slot_address_parameter =
971      WriteBarrierDescriptor::SlotAddressRegister();
972
973  Push(object);
974  Push(slot_address);
975  Pop(slot_address_parameter);
976  Pop(object_parameter);
977
978  CallRecordWriteStub(object_parameter, slot_address_parameter,
979                      remembered_set_action, fp_mode, mode);
980
981  MaybeRestoreRegisters(registers);
982}
983
984void TurboAssembler::CallRecordWriteStub(
985    Register object, Register slot_address,
986    RememberedSetAction remembered_set_action, SaveFPRegsMode fp_mode,
987    StubCallMode mode) {
988  // Use CallRecordWriteStubSaveRegisters if the object and slot registers
989  // need to be caller saved.
990  DCHECK_EQ(WriteBarrierDescriptor::ObjectRegister(), object);
991  DCHECK_EQ(WriteBarrierDescriptor::SlotAddressRegister(), slot_address);
992#if V8_ENABLE_WEBASSEMBLY
993  if (mode == StubCallMode::kCallWasmRuntimeStub) {
994    auto wasm_target =
995        wasm::WasmCode::GetRecordWriteStub(remembered_set_action, fp_mode);
996    Call(wasm_target, RelocInfo::WASM_STUB_CALL);
997#else
998  if (false) {
999#endif
1000  } else {
1001    auto builtin_index =
1002        Builtins::GetRecordWriteStub(remembered_set_action, fp_mode);
1003    if (options().inline_offheap_trampolines) {
1004      RecordCommentForOffHeapTrampoline(builtin_index);
1005      mov(ip, Operand(BuiltinEntry(builtin_index), RelocInfo::OFF_HEAP_TARGET));
1006      Call(ip);
1007    } else {
1008      Handle<Code> code_target =
1009          isolate()->builtins()->code_handle(builtin_index);
1010      Call(code_target, RelocInfo::CODE_TARGET);
1011    }
1012  }
1013}
1014
1015// Will clobber 4 registers: object, address, scratch, ip.  The
1016// register 'object' contains a heap object pointer.  The heap object
1017// tag is shifted away.
1018void MacroAssembler::RecordWrite(Register object, Register slot_address,
1019                                 Register value, LinkRegisterStatus lr_status,
1020                                 SaveFPRegsMode fp_mode,
1021                                 RememberedSetAction remembered_set_action,
1022                                 SmiCheck smi_check) {
1023  DCHECK(!AreAliased(object, slot_address, value));
1024  if (FLAG_debug_code) {
1025    LoadTaggedPointerField(r0, MemOperand(slot_address));
1026    CmpS64(value, r0);
1027    Check(eq, AbortReason::kWrongAddressOrValuePassedToRecordWrite);
1028  }
1029
1030  if ((remembered_set_action == RememberedSetAction::kOmit &&
1031       !FLAG_incremental_marking) ||
1032      FLAG_disable_write_barriers) {
1033    return;
1034  }
1035  // First, check if a write barrier is even needed. The tests below
1036  // catch stores of smis and stores into the young generation.
1037  Label done;
1038
1039  if (smi_check == SmiCheck::kInline) {
1040    JumpIfSmi(value, &done);
1041  }
1042
1043  CheckPageFlag(value,
1044                value,  // Used as scratch.
1045                MemoryChunk::kPointersToHereAreInterestingMask, eq, &done);
1046  CheckPageFlag(object,
1047                value,  // Used as scratch.
1048                MemoryChunk::kPointersFromHereAreInterestingMask, eq, &done);
1049
1050  // Record the actual write.
1051  if (lr_status == kLRHasNotBeenSaved) {
1052    push(r14);
1053  }
1054  CallRecordWriteStubSaveRegisters(object, slot_address, remembered_set_action,
1055                                   fp_mode);
1056  if (lr_status == kLRHasNotBeenSaved) {
1057    pop(r14);
1058  }
1059
1060  if (FLAG_debug_code) mov(slot_address, Operand(kZapValue));
1061
1062  bind(&done);
1063
1064  // Clobber clobbered registers when running with the debug-code flag
1065  // turned on to provoke errors.
1066  if (FLAG_debug_code) {
1067    mov(slot_address, Operand(bit_cast<intptr_t>(kZapValue + 12)));
1068    mov(value, Operand(bit_cast<intptr_t>(kZapValue + 16)));
1069  }
1070}
1071
1072void TurboAssembler::PushCommonFrame(Register marker_reg) {
1073  ASM_CODE_COMMENT(this);
1074  int fp_delta = 0;
1075  CleanseP(r14);
1076  if (marker_reg.is_valid()) {
1077    Push(r14, fp, marker_reg);
1078    fp_delta = 1;
1079  } else {
1080    Push(r14, fp);
1081    fp_delta = 0;
1082  }
1083  la(fp, MemOperand(sp, fp_delta * kSystemPointerSize));
1084}
1085
1086void TurboAssembler::PopCommonFrame(Register marker_reg) {
1087  if (marker_reg.is_valid()) {
1088    Pop(r14, fp, marker_reg);
1089  } else {
1090    Pop(r14, fp);
1091  }
1092}
1093
1094void TurboAssembler::PushStandardFrame(Register function_reg) {
1095  int fp_delta = 0;
1096  CleanseP(r14);
1097  if (function_reg.is_valid()) {
1098    Push(r14, fp, cp, function_reg);
1099    fp_delta = 2;
1100  } else {
1101    Push(r14, fp, cp);
1102    fp_delta = 1;
1103  }
1104  la(fp, MemOperand(sp, fp_delta * kSystemPointerSize));
1105  Push(kJavaScriptCallArgCountRegister);
1106}
1107
1108void TurboAssembler::RestoreFrameStateForTailCall() {
1109  // if (FLAG_enable_embedded_constant_pool) {
1110  //   LoadU64(kConstantPoolRegister,
1111  //         MemOperand(fp, StandardFrameConstants::kConstantPoolOffset));
1112  //   set_constant_pool_available(false);
1113  // }
1114  DCHECK(!FLAG_enable_embedded_constant_pool);
1115  LoadU64(r14, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
1116  LoadU64(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
1117}
1118
1119void TurboAssembler::CanonicalizeNaN(const DoubleRegister dst,
1120                                     const DoubleRegister src) {
1121  // Turn potential sNaN into qNaN
1122  if (dst != src) ldr(dst, src);
1123  lzdr(kDoubleRegZero);
1124  sdbr(dst, kDoubleRegZero);
1125}
1126
1127void TurboAssembler::ConvertIntToDouble(DoubleRegister dst, Register src) {
1128  cdfbr(dst, src);
1129}
1130
1131void TurboAssembler::ConvertUnsignedIntToDouble(DoubleRegister dst,
1132                                                Register src) {
1133  if (CpuFeatures::IsSupported(FLOATING_POINT_EXT)) {
1134    cdlfbr(Condition(5), Condition(0), dst, src);
1135  } else {
1136    // zero-extend src
1137    llgfr(src, src);
1138    // convert to double
1139    cdgbr(dst, src);
1140  }
1141}
1142
1143void TurboAssembler::ConvertIntToFloat(DoubleRegister dst, Register src) {
1144  cefbra(Condition(4), dst, src);
1145}
1146
1147void TurboAssembler::ConvertUnsignedIntToFloat(DoubleRegister dst,
1148                                               Register src) {
1149  celfbr(Condition(4), Condition(0), dst, src);
1150}
1151
1152void TurboAssembler::ConvertInt64ToFloat(DoubleRegister double_dst,
1153                                         Register src) {
1154  cegbr(double_dst, src);
1155}
1156
1157void TurboAssembler::ConvertInt64ToDouble(DoubleRegister double_dst,
1158                                          Register src) {
1159  cdgbr(double_dst, src);
1160}
1161
1162void TurboAssembler::ConvertUnsignedInt64ToFloat(DoubleRegister double_dst,
1163                                                 Register src) {
1164  celgbr(Condition(0), Condition(0), double_dst, src);
1165}
1166
1167void TurboAssembler::ConvertUnsignedInt64ToDouble(DoubleRegister double_dst,
1168                                                  Register src) {
1169  cdlgbr(Condition(0), Condition(0), double_dst, src);
1170}
1171
1172void TurboAssembler::ConvertFloat32ToInt64(const Register dst,
1173                                           const DoubleRegister double_input,
1174                                           FPRoundingMode rounding_mode) {
1175  Condition m = Condition(0);
1176  switch (rounding_mode) {
1177    case kRoundToZero:
1178      m = Condition(5);
1179      break;
1180    case kRoundToNearest:
1181      UNIMPLEMENTED();
1182    case kRoundToPlusInf:
1183      m = Condition(6);
1184      break;
1185    case kRoundToMinusInf:
1186      m = Condition(7);
1187      break;
1188    default:
1189      UNIMPLEMENTED();
1190  }
1191  cgebr(m, dst, double_input);
1192}
1193
1194void TurboAssembler::ConvertDoubleToInt64(const Register dst,
1195                                          const DoubleRegister double_input,
1196                                          FPRoundingMode rounding_mode) {
1197  Condition m = Condition(0);
1198  switch (rounding_mode) {
1199    case kRoundToZero:
1200      m = Condition(5);
1201      break;
1202    case kRoundToNearest:
1203      UNIMPLEMENTED();
1204    case kRoundToPlusInf:
1205      m = Condition(6);
1206      break;
1207    case kRoundToMinusInf:
1208      m = Condition(7);
1209      break;
1210    default:
1211      UNIMPLEMENTED();
1212  }
1213  cgdbr(m, dst, double_input);
1214}
1215
1216void TurboAssembler::ConvertDoubleToInt32(const Register dst,
1217                                          const DoubleRegister double_input,
1218                                          FPRoundingMode rounding_mode) {
1219  Condition m = Condition(0);
1220  switch (rounding_mode) {
1221    case kRoundToZero:
1222      m = Condition(5);
1223      break;
1224    case kRoundToNearest:
1225      m = Condition(4);
1226      break;
1227    case kRoundToPlusInf:
1228      m = Condition(6);
1229      break;
1230    case kRoundToMinusInf:
1231      m = Condition(7);
1232      break;
1233    default:
1234      UNIMPLEMENTED();
1235  }
1236#ifdef V8_TARGET_ARCH_S390X
1237  lghi(dst, Operand::Zero());
1238#endif
1239  cfdbr(m, dst, double_input);
1240}
1241
1242void TurboAssembler::ConvertFloat32ToInt32(const Register result,
1243                                           const DoubleRegister double_input,
1244                                           FPRoundingMode rounding_mode) {
1245  Condition m = Condition(0);
1246  switch (rounding_mode) {
1247    case kRoundToZero:
1248      m = Condition(5);
1249      break;
1250    case kRoundToNearest:
1251      m = Condition(4);
1252      break;
1253    case kRoundToPlusInf:
1254      m = Condition(6);
1255      break;
1256    case kRoundToMinusInf:
1257      m = Condition(7);
1258      break;
1259    default:
1260      UNIMPLEMENTED();
1261  }
1262#ifdef V8_TARGET_ARCH_S390X
1263  lghi(result, Operand::Zero());
1264#endif
1265  cfebr(m, result, double_input);
1266}
1267
1268void TurboAssembler::ConvertFloat32ToUnsignedInt32(
1269    const Register result, const DoubleRegister double_input,
1270    FPRoundingMode rounding_mode) {
1271  Condition m = Condition(0);
1272  switch (rounding_mode) {
1273    case kRoundToZero:
1274      m = Condition(5);
1275      break;
1276    case kRoundToNearest:
1277      UNIMPLEMENTED();
1278    case kRoundToPlusInf:
1279      m = Condition(6);
1280      break;
1281    case kRoundToMinusInf:
1282      m = Condition(7);
1283      break;
1284    default:
1285      UNIMPLEMENTED();
1286  }
1287#ifdef V8_TARGET_ARCH_S390X
1288  lghi(result, Operand::Zero());
1289#endif
1290  clfebr(m, Condition(0), result, double_input);
1291}
1292
1293void TurboAssembler::ConvertFloat32ToUnsignedInt64(
1294    const Register result, const DoubleRegister double_input,
1295    FPRoundingMode rounding_mode) {
1296  Condition m = Condition(0);
1297  switch (rounding_mode) {
1298    case kRoundToZero:
1299      m = Condition(5);
1300      break;
1301    case kRoundToNearest:
1302      UNIMPLEMENTED();
1303    case kRoundToPlusInf:
1304      m = Condition(6);
1305      break;
1306    case kRoundToMinusInf:
1307      m = Condition(7);
1308      break;
1309    default:
1310      UNIMPLEMENTED();
1311  }
1312  clgebr(m, Condition(0), result, double_input);
1313}
1314
1315void TurboAssembler::ConvertDoubleToUnsignedInt64(
1316    const Register dst, const DoubleRegister double_input,
1317    FPRoundingMode rounding_mode) {
1318  Condition m = Condition(0);
1319  switch (rounding_mode) {
1320    case kRoundToZero:
1321      m = Condition(5);
1322      break;
1323    case kRoundToNearest:
1324      UNIMPLEMENTED();
1325    case kRoundToPlusInf:
1326      m = Condition(6);
1327      break;
1328    case kRoundToMinusInf:
1329      m = Condition(7);
1330      break;
1331    default:
1332      UNIMPLEMENTED();
1333  }
1334  clgdbr(m, Condition(0), dst, double_input);
1335}
1336
1337void TurboAssembler::ConvertDoubleToUnsignedInt32(
1338    const Register dst, const DoubleRegister double_input,
1339    FPRoundingMode rounding_mode) {
1340  Condition m = Condition(0);
1341  switch (rounding_mode) {
1342    case kRoundToZero:
1343      m = Condition(5);
1344      break;
1345    case kRoundToNearest:
1346      UNIMPLEMENTED();
1347    case kRoundToPlusInf:
1348      m = Condition(6);
1349      break;
1350    case kRoundToMinusInf:
1351      m = Condition(7);
1352      break;
1353    default:
1354      UNIMPLEMENTED();
1355  }
1356#ifdef V8_TARGET_ARCH_S390X
1357  lghi(dst, Operand::Zero());
1358#endif
1359  clfdbr(m, Condition(0), dst, double_input);
1360}
1361
1362void TurboAssembler::MovDoubleToInt64(Register dst, DoubleRegister src) {
1363  lgdr(dst, src);
1364}
1365
1366void TurboAssembler::MovInt64ToDouble(DoubleRegister dst, Register src) {
1367  ldgr(dst, src);
1368}
1369
1370void TurboAssembler::StubPrologue(StackFrame::Type type, Register base,
1371                                  int prologue_offset) {
1372  {
1373    ConstantPoolUnavailableScope constant_pool_unavailable(this);
1374    mov(r1, Operand(StackFrame::TypeToMarker(type)));
1375    PushCommonFrame(r1);
1376  }
1377}
1378
1379void TurboAssembler::Prologue(Register base, int prologue_offset) {
1380  DCHECK(base != no_reg);
1381  PushStandardFrame(r3);
1382}
1383
1384void TurboAssembler::DropArguments(Register count, ArgumentsCountType type,
1385                                   ArgumentsCountMode mode) {
1386  int receiver_bytes =
1387      (mode == kCountExcludesReceiver) ? kSystemPointerSize : 0;
1388  switch (type) {
1389    case kCountIsInteger: {
1390      ShiftLeftU64(ip, count, Operand(kSystemPointerSizeLog2));
1391      lay(sp, MemOperand(sp, ip));
1392      break;
1393    }
1394    case kCountIsSmi: {
1395      STATIC_ASSERT(kSmiTagSize == 1 && kSmiTag == 0);
1396      SmiToPtrArrayOffset(count, count);
1397      AddS64(sp, sp, count);
1398      break;
1399    }
1400    case kCountIsBytes: {
1401      AddS64(sp, sp, count);
1402      break;
1403    }
1404  }
1405  if (receiver_bytes != 0) {
1406    AddS64(sp, sp, Operand(receiver_bytes));
1407  }
1408}
1409
1410void TurboAssembler::DropArgumentsAndPushNewReceiver(Register argc,
1411                                                     Register receiver,
1412                                                     ArgumentsCountType type,
1413                                                     ArgumentsCountMode mode) {
1414  DCHECK(!AreAliased(argc, receiver));
1415  if (mode == kCountExcludesReceiver) {
1416    // Drop arguments without receiver and override old receiver.
1417    DropArguments(argc, type, kCountIncludesReceiver);
1418    StoreU64(receiver, MemOperand(sp));
1419  } else {
1420    DropArguments(argc, type, mode);
1421    push(receiver);
1422  }
1423}
1424
1425void TurboAssembler::EnterFrame(StackFrame::Type type,
1426                                bool load_constant_pool_pointer_reg) {
1427  ASM_CODE_COMMENT(this);
1428  // We create a stack frame with:
1429  //    Return Addr <-- old sp
1430  //    Old FP      <-- new fp
1431  //    CP
1432  //    type
1433  //    CodeObject  <-- new sp
1434
1435  Register scratch = no_reg;
1436  if (!StackFrame::IsJavaScript(type)) {
1437    scratch = ip;
1438    mov(scratch, Operand(StackFrame::TypeToMarker(type)));
1439  }
1440  PushCommonFrame(scratch);
1441#if V8_ENABLE_WEBASSEMBLY
1442  if (type == StackFrame::WASM) Push(kWasmInstanceRegister);
1443#endif  // V8_ENABLE_WEBASSEMBLY
1444}
1445
1446int TurboAssembler::LeaveFrame(StackFrame::Type type, int stack_adjustment) {
1447  ASM_CODE_COMMENT(this);
1448  // Drop the execution stack down to the frame pointer and restore
1449  // the caller frame pointer, return address and constant pool pointer.
1450  LoadU64(r14, MemOperand(fp, StandardFrameConstants::kCallerPCOffset));
1451  if (is_int20(StandardFrameConstants::kCallerSPOffset + stack_adjustment)) {
1452    lay(r1, MemOperand(fp, StandardFrameConstants::kCallerSPOffset +
1453                               stack_adjustment));
1454  } else {
1455    AddS64(r1, fp,
1456           Operand(StandardFrameConstants::kCallerSPOffset + stack_adjustment));
1457  }
1458  LoadU64(fp, MemOperand(fp, StandardFrameConstants::kCallerFPOffset));
1459  mov(sp, r1);
1460  int frame_ends = pc_offset();
1461  return frame_ends;
1462}
1463
1464// ExitFrame layout (probably wrongish.. needs updating)
1465//
1466//  SP -> previousSP
1467//        LK reserved
1468//        sp_on_exit (for debug?)
1469// oldSP->prev SP
1470//        LK
1471//        <parameters on stack>
1472
1473// Prior to calling EnterExitFrame, we've got a bunch of parameters
1474// on the stack that we need to wrap a real frame around.. so first
1475// we reserve a slot for LK and push the previous SP which is captured
1476// in the fp register (r11)
1477// Then - we buy a new frame
1478
1479// r14
1480// oldFP <- newFP
1481// SP
1482// Floats
1483// gaps
1484// Args
1485// ABIRes <- newSP
1486void MacroAssembler::EnterExitFrame(bool save_doubles, int stack_space,
1487                                    StackFrame::Type frame_type) {
1488  DCHECK(frame_type == StackFrame::EXIT ||
1489         frame_type == StackFrame::BUILTIN_EXIT);
1490  // Set up the frame structure on the stack.
1491  DCHECK_EQ(2 * kSystemPointerSize, ExitFrameConstants::kCallerSPDisplacement);
1492  DCHECK_EQ(1 * kSystemPointerSize, ExitFrameConstants::kCallerPCOffset);
1493  DCHECK_EQ(0 * kSystemPointerSize, ExitFrameConstants::kCallerFPOffset);
1494  DCHECK_GT(stack_space, 0);
1495
1496  // This is an opportunity to build a frame to wrap
1497  // all of the pushes that have happened inside of V8
1498  // since we were called from C code
1499  CleanseP(r14);
1500  mov(r1, Operand(StackFrame::TypeToMarker(frame_type)));
1501  PushCommonFrame(r1);
1502  // Reserve room for saved entry sp.
1503  lay(sp, MemOperand(fp, -ExitFrameConstants::kFixedFrameSizeFromFp));
1504
1505  if (FLAG_debug_code) {
1506    StoreU64(MemOperand(fp, ExitFrameConstants::kSPOffset), Operand::Zero(),
1507             r1);
1508  }
1509
1510  // Save the frame pointer and the context in top.
1511  Move(r1, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
1512                                     isolate()));
1513  StoreU64(fp, MemOperand(r1));
1514  Move(r1,
1515       ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
1516  StoreU64(cp, MemOperand(r1));
1517
1518  // Optionally save all volatile double registers.
1519  if (save_doubles) {
1520    MultiPushDoubles(kCallerSavedDoubles);
1521    // Note that d0 will be accessible at
1522    //   fp - ExitFrameConstants::kFrameSize -
1523    //   kNumCallerSavedDoubles * kDoubleSize,
1524    // since the sp slot and code slot were pushed after the fp.
1525  }
1526
1527  lay(sp, MemOperand(sp, -stack_space * kSystemPointerSize));
1528
1529  // Allocate and align the frame preparing for calling the runtime
1530  // function.
1531  const int frame_alignment = TurboAssembler::ActivationFrameAlignment();
1532  if (frame_alignment > 0) {
1533    DCHECK_EQ(frame_alignment, 8);
1534    ClearRightImm(sp, sp, Operand(3));  // equivalent to &= -8
1535  }
1536
1537  lay(sp, MemOperand(sp, -kNumRequiredStackFrameSlots * kSystemPointerSize));
1538  StoreU64(MemOperand(sp), Operand::Zero(), r0);
1539  // Set the exit frame sp value to point just before the return address
1540  // location.
1541  lay(r1, MemOperand(sp, kStackFrameSPSlot * kSystemPointerSize));
1542  StoreU64(r1, MemOperand(fp, ExitFrameConstants::kSPOffset));
1543}
1544
1545int TurboAssembler::ActivationFrameAlignment() {
1546#if !defined(USE_SIMULATOR)
1547  // Running on the real platform. Use the alignment as mandated by the local
1548  // environment.
1549  // Note: This will break if we ever start generating snapshots on one S390
1550  // platform for another S390 platform with a different alignment.
1551  return base::OS::ActivationFrameAlignment();
1552#else  // Simulated
1553  // If we are using the simulator then we should always align to the expected
1554  // alignment. As the simulator is used to generate snapshots we do not know
1555  // if the target platform will need alignment, so this is controlled from a
1556  // flag.
1557  return FLAG_sim_stack_alignment;
1558#endif
1559}
1560
1561void MacroAssembler::LeaveExitFrame(bool save_doubles, Register argument_count,
1562                                    bool argument_count_is_length) {
1563  // Optionally restore all double registers.
1564  if (save_doubles) {
1565    // Calculate the stack location of the saved doubles and restore them.
1566    const int kNumRegs = kNumCallerSavedDoubles;
1567    lay(r5, MemOperand(fp, -(ExitFrameConstants::kFixedFrameSizeFromFp +
1568                             kNumRegs * kDoubleSize)));
1569    MultiPopDoubles(kCallerSavedDoubles, r5);
1570  }
1571
1572  // Clear top frame.
1573  Move(ip, ExternalReference::Create(IsolateAddressId::kCEntryFPAddress,
1574                                     isolate()));
1575  StoreU64(MemOperand(ip), Operand(0, RelocInfo::NO_INFO), r0);
1576
1577  // Restore current context from top and clear it in debug mode.
1578  Move(ip,
1579       ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
1580  LoadU64(cp, MemOperand(ip));
1581
1582#ifdef DEBUG
1583  mov(r1, Operand(Context::kInvalidContext));
1584  Move(ip,
1585       ExternalReference::Create(IsolateAddressId::kContextAddress, isolate()));
1586  StoreU64(r1, MemOperand(ip));
1587#endif
1588
1589  // Tear down the exit frame, pop the arguments, and return.
1590  LeaveFrame(StackFrame::EXIT);
1591
1592  if (argument_count.is_valid()) {
1593    if (!argument_count_is_length) {
1594      ShiftLeftU64(argument_count, argument_count,
1595                   Operand(kSystemPointerSizeLog2));
1596    }
1597    la(sp, MemOperand(sp, argument_count));
1598  }
1599}
1600
1601void TurboAssembler::MovFromFloatResult(const DoubleRegister dst) {
1602  Move(dst, d0);
1603}
1604
1605void TurboAssembler::MovFromFloatParameter(const DoubleRegister dst) {
1606  Move(dst, d0);
1607}
1608
1609MemOperand MacroAssembler::StackLimitAsMemOperand(StackLimitKind kind) {
1610  DCHECK(root_array_available());
1611  Isolate* isolate = this->isolate();
1612  ExternalReference limit =
1613      kind == StackLimitKind::kRealStackLimit
1614          ? ExternalReference::address_of_real_jslimit(isolate)
1615          : ExternalReference::address_of_jslimit(isolate);
1616  DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
1617
1618  intptr_t offset =
1619      TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
1620  CHECK(is_int32(offset));
1621  return MemOperand(kRootRegister, offset);
1622}
1623
1624void MacroAssembler::StackOverflowCheck(Register num_args, Register scratch,
1625                                        Label* stack_overflow) {
1626  // Check the stack for overflow. We are not trying to catch
1627  // interruptions (e.g. debug break and preemption) here, so the "real stack
1628  // limit" is checked.
1629  LoadU64(scratch, StackLimitAsMemOperand(StackLimitKind::kRealStackLimit));
1630  // Make scratch the space we have left. The stack might already be overflowed
1631  // here which will cause scratch to become negative.
1632  SubS64(scratch, sp, scratch);
1633  // Check if the arguments will overflow the stack.
1634  ShiftLeftU64(r0, num_args, Operand(kSystemPointerSizeLog2));
1635  CmpS64(scratch, r0);
1636  ble(stack_overflow);  // Signed comparison.
1637}
1638
1639void MacroAssembler::InvokePrologue(Register expected_parameter_count,
1640                                    Register actual_parameter_count,
1641                                    Label* done, InvokeType type) {
1642  Label regular_invoke;
1643
1644  //  r2: actual arguments count
1645  //  r3: function (passed through to callee)
1646  //  r4: expected arguments count
1647
1648  DCHECK_EQ(actual_parameter_count, r2);
1649  DCHECK_EQ(expected_parameter_count, r4);
1650
1651  // If the expected parameter count is equal to the adaptor sentinel, no need
1652  // to push undefined value as arguments.
1653  if (kDontAdaptArgumentsSentinel != 0) {
1654    CmpS64(expected_parameter_count, Operand(kDontAdaptArgumentsSentinel));
1655    beq(&regular_invoke);
1656  }
1657
1658  // If overapplication or if the actual argument count is equal to the
1659  // formal parameter count, no need to push extra undefined values.
1660  SubS64(expected_parameter_count, expected_parameter_count,
1661         actual_parameter_count);
1662  ble(&regular_invoke);
1663
1664  Label stack_overflow;
1665  Register scratch = r6;
1666  StackOverflowCheck(expected_parameter_count, scratch, &stack_overflow);
1667
1668  // Underapplication. Move the arguments already in the stack, including the
1669  // receiver and the return address.
1670  {
1671    Label copy, check;
1672    Register num = r7, src = r8, dest = ip;  // r7 and r8 are context and root.
1673    mov(src, sp);
1674    // Update stack pointer.
1675    ShiftLeftU64(scratch, expected_parameter_count,
1676                 Operand(kSystemPointerSizeLog2));
1677    SubS64(sp, sp, scratch);
1678    mov(dest, sp);
1679    ltgr(num, actual_parameter_count);
1680    b(&check);
1681    bind(&copy);
1682    LoadU64(r0, MemOperand(src));
1683    lay(src, MemOperand(src, kSystemPointerSize));
1684    StoreU64(r0, MemOperand(dest));
1685    lay(dest, MemOperand(dest, kSystemPointerSize));
1686    SubS64(num, num, Operand(1));
1687    bind(&check);
1688    b(gt, &copy);
1689  }
1690
1691  // Fill remaining expected arguments with undefined values.
1692  LoadRoot(scratch, RootIndex::kUndefinedValue);
1693  {
1694    Label loop;
1695    bind(&loop);
1696    StoreU64(scratch, MemOperand(ip));
1697    lay(ip, MemOperand(ip, kSystemPointerSize));
1698    SubS64(expected_parameter_count, expected_parameter_count, Operand(1));
1699    bgt(&loop);
1700  }
1701  b(&regular_invoke);
1702
1703  bind(&stack_overflow);
1704  {
1705    FrameScope frame(
1706        this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
1707    CallRuntime(Runtime::kThrowStackOverflow);
1708    bkpt(0);
1709  }
1710
1711  bind(&regular_invoke);
1712}
1713
1714void MacroAssembler::CheckDebugHook(Register fun, Register new_target,
1715                                    Register expected_parameter_count,
1716                                    Register actual_parameter_count) {
1717  Label skip_hook;
1718
1719  ExternalReference debug_hook_active =
1720      ExternalReference::debug_hook_on_function_call_address(isolate());
1721  Move(r6, debug_hook_active);
1722  tm(MemOperand(r6), Operand(0xFF));
1723  beq(&skip_hook);
1724
1725  {
1726    // Load receiver to pass it later to DebugOnFunctionCall hook.
1727    LoadReceiver(r6, actual_parameter_count);
1728    FrameScope frame(
1729        this, has_frame() ? StackFrame::NO_FRAME_TYPE : StackFrame::INTERNAL);
1730
1731    SmiTag(expected_parameter_count);
1732    Push(expected_parameter_count);
1733
1734    SmiTag(actual_parameter_count);
1735    Push(actual_parameter_count);
1736
1737    if (new_target.is_valid()) {
1738      Push(new_target);
1739    }
1740    Push(fun, fun, r6);
1741    CallRuntime(Runtime::kDebugOnFunctionCall);
1742    Pop(fun);
1743    if (new_target.is_valid()) {
1744      Pop(new_target);
1745    }
1746
1747    Pop(actual_parameter_count);
1748    SmiUntag(actual_parameter_count);
1749
1750    Pop(expected_parameter_count);
1751    SmiUntag(expected_parameter_count);
1752  }
1753  bind(&skip_hook);
1754}
1755
1756void MacroAssembler::InvokeFunctionCode(Register function, Register new_target,
1757                                        Register expected_parameter_count,
1758                                        Register actual_parameter_count,
1759                                        InvokeType type) {
1760  // You can't call a function without a valid frame.
1761  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
1762  DCHECK_EQ(function, r3);
1763  DCHECK_IMPLIES(new_target.is_valid(), new_target == r5);
1764
1765  // On function call, call into the debugger if necessary.
1766  CheckDebugHook(function, new_target, expected_parameter_count,
1767                 actual_parameter_count);
1768
1769  // Clear the new.target register if not given.
1770  if (!new_target.is_valid()) {
1771    LoadRoot(r5, RootIndex::kUndefinedValue);
1772  }
1773
1774  Label done;
1775  InvokePrologue(expected_parameter_count, actual_parameter_count, &done, type);
1776  // We call indirectly through the code field in the function to
1777  // allow recompilation to take effect without changing any of the
1778  // call sites.
1779  Register code = kJavaScriptCallCodeStartRegister;
1780  LoadTaggedPointerField(code,
1781                         FieldMemOperand(function, JSFunction::kCodeOffset));
1782  switch (type) {
1783    case InvokeType::kCall:
1784      CallCodeObject(code);
1785      break;
1786    case InvokeType::kJump:
1787      JumpCodeObject(code);
1788      break;
1789  }
1790  // Continue here if InvokePrologue does handle the invocation due to
1791  // mismatched parameter counts.
1792  bind(&done);
1793}
1794
1795void MacroAssembler::InvokeFunctionWithNewTarget(
1796    Register fun, Register new_target, Register actual_parameter_count,
1797    InvokeType type) {
1798  // You can't call a function without a valid frame.
1799  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
1800
1801  // Contract with called JS functions requires that function is passed in r3.
1802  DCHECK_EQ(fun, r3);
1803
1804  Register expected_reg = r4;
1805  Register temp_reg = r6;
1806  LoadTaggedPointerField(cp, FieldMemOperand(fun, JSFunction::kContextOffset));
1807  LoadTaggedPointerField(
1808      temp_reg, FieldMemOperand(fun, JSFunction::kSharedFunctionInfoOffset));
1809  LoadU16(
1810      expected_reg,
1811      FieldMemOperand(temp_reg,
1812                      SharedFunctionInfo::kFormalParameterCountOffset));
1813
1814  InvokeFunctionCode(fun, new_target, expected_reg, actual_parameter_count,
1815                     type);
1816}
1817
1818void MacroAssembler::InvokeFunction(Register function,
1819                                    Register expected_parameter_count,
1820                                    Register actual_parameter_count,
1821                                    InvokeType type) {
1822  // You can't call a function without a valid frame.
1823  DCHECK_IMPLIES(type == InvokeType::kCall, has_frame());
1824
1825  // Contract with called JS functions requires that function is passed in r3.
1826  DCHECK_EQ(function, r3);
1827
1828  // Get the function and setup the context.
1829  LoadTaggedPointerField(cp,
1830                         FieldMemOperand(function, JSFunction::kContextOffset));
1831
1832  InvokeFunctionCode(r3, no_reg, expected_parameter_count,
1833                     actual_parameter_count, type);
1834}
1835
1836void MacroAssembler::PushStackHandler() {
1837  // Adjust this code if not the case.
1838  STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
1839  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0 * kSystemPointerSize);
1840
1841  // Link the current handler as the next handler.
1842  Move(r7,
1843       ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
1844
1845  // Buy the full stack frame for 5 slots.
1846  lay(sp, MemOperand(sp, -StackHandlerConstants::kSize));
1847
1848  // Store padding.
1849  lghi(r0, Operand::Zero());
1850  StoreU64(r0, MemOperand(sp));  // Padding.
1851
1852  // Copy the old handler into the next handler slot.
1853  MoveChar(MemOperand(sp, StackHandlerConstants::kNextOffset), MemOperand(r7),
1854           Operand(kSystemPointerSize));
1855  // Set this new handler as the current one.
1856  StoreU64(sp, MemOperand(r7));
1857}
1858
1859void MacroAssembler::PopStackHandler() {
1860  STATIC_ASSERT(StackHandlerConstants::kSize == 2 * kSystemPointerSize);
1861  STATIC_ASSERT(StackHandlerConstants::kNextOffset == 0);
1862
1863  // Pop the Next Handler into r3 and store it into Handler Address reference.
1864  Pop(r3);
1865  Move(ip,
1866       ExternalReference::Create(IsolateAddressId::kHandlerAddress, isolate()));
1867  StoreU64(r3, MemOperand(ip));
1868
1869  Drop(1);  // Drop padding.
1870}
1871
1872void MacroAssembler::CompareObjectType(Register object, Register map,
1873                                       Register type_reg, InstanceType type) {
1874  const Register temp = type_reg == no_reg ? r0 : type_reg;
1875
1876  LoadMap(map, object);
1877  CompareInstanceType(map, temp, type);
1878}
1879
1880void MacroAssembler::CompareInstanceType(Register map, Register type_reg,
1881                                         InstanceType type) {
1882  STATIC_ASSERT(Map::kInstanceTypeOffset < 4096);
1883  STATIC_ASSERT(LAST_TYPE <= 0xFFFF);
1884  LoadS16(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
1885  CmpS64(type_reg, Operand(type));
1886}
1887
1888void MacroAssembler::CompareRange(Register value, unsigned lower_limit,
1889                                  unsigned higher_limit) {
1890  ASM_CODE_COMMENT(this);
1891  DCHECK_LT(lower_limit, higher_limit);
1892  if (lower_limit != 0) {
1893    UseScratchRegisterScope temps(this);
1894    Register scratch = temps.Acquire();
1895    mov(scratch, value);
1896    slgfi(scratch, Operand(lower_limit));
1897    CmpU64(scratch, Operand(higher_limit - lower_limit));
1898  } else {
1899    CmpU64(value, Operand(higher_limit));
1900  }
1901}
1902
1903void MacroAssembler::CompareInstanceTypeRange(Register map, Register type_reg,
1904                                              InstanceType lower_limit,
1905                                              InstanceType higher_limit) {
1906  DCHECK_LT(lower_limit, higher_limit);
1907  LoadU16(type_reg, FieldMemOperand(map, Map::kInstanceTypeOffset));
1908  CompareRange(type_reg, lower_limit, higher_limit);
1909}
1910
1911void MacroAssembler::CompareRoot(Register obj, RootIndex index) {
1912  int32_t offset = RootRegisterOffsetForRootIndex(index);
1913#ifdef V8_TARGET_BIG_ENDIAN
1914  offset += (COMPRESS_POINTERS_BOOL ? kTaggedSize : 0);
1915#endif
1916  CompareTagged(obj, MemOperand(kRootRegister, offset));
1917}
1918
1919void MacroAssembler::JumpIfIsInRange(Register value, unsigned lower_limit,
1920                                     unsigned higher_limit,
1921                                     Label* on_in_range) {
1922  CompareRange(value, lower_limit, higher_limit);
1923  ble(on_in_range);
1924}
1925
1926void TurboAssembler::TruncateDoubleToI(Isolate* isolate, Zone* zone,
1927                                       Register result,
1928                                       DoubleRegister double_input,
1929                                       StubCallMode stub_mode) {
1930  Label done;
1931
1932  TryInlineTruncateDoubleToI(result, double_input, &done);
1933
1934  // If we fell through then inline version didn't succeed - call stub instead.
1935  push(r14);
1936  // Put input on stack.
1937  lay(sp, MemOperand(sp, -kDoubleSize));
1938  StoreF64(double_input, MemOperand(sp));
1939
1940#if V8_ENABLE_WEBASSEMBLY
1941  if (stub_mode == StubCallMode::kCallWasmRuntimeStub) {
1942    Call(wasm::WasmCode::kDoubleToI, RelocInfo::WASM_STUB_CALL);
1943#else
1944  // For balance.
1945  if (false) {
1946#endif  // V8_ENABLE_WEBASSEMBLY
1947  } else {
1948    Call(BUILTIN_CODE(isolate, DoubleToI), RelocInfo::CODE_TARGET);
1949  }
1950
1951  LoadU64(result, MemOperand(sp, 0));
1952  la(sp, MemOperand(sp, kDoubleSize));
1953  pop(r14);
1954
1955  bind(&done);
1956}
1957
1958void TurboAssembler::TryInlineTruncateDoubleToI(Register result,
1959                                                DoubleRegister double_input,
1960                                                Label* done) {
1961  ConvertDoubleToInt64(result, double_input);
1962
1963  // Test for overflow
1964  TestIfInt32(result);
1965  beq(done);
1966}
1967
1968void MacroAssembler::CallRuntime(const Runtime::Function* f, int num_arguments,
1969                                 SaveFPRegsMode save_doubles) {
1970  // All parameters are on the stack.  r2 has the return value after call.
1971
1972  // If the expected number of arguments of the runtime function is
1973  // constant, we check that the actual number of arguments match the
1974  // expectation.
1975  CHECK(f->nargs < 0 || f->nargs == num_arguments);
1976
1977  // TODO(1236192): Most runtime routines don't need the number of
1978  // arguments passed in because it is constant. At some point we
1979  // should remove this need and make the runtime routine entry code
1980  // smarter.
1981  mov(r2, Operand(num_arguments));
1982  Move(r3, ExternalReference::Create(f));
1983#if V8_TARGET_ARCH_S390X
1984  Handle<Code> code =
1985      CodeFactory::CEntry(isolate(), f->result_size, save_doubles);
1986#else
1987  Handle<Code> code = CodeFactory::CEntry(isolate(), 1, save_doubles);
1988#endif
1989
1990  Call(code, RelocInfo::CODE_TARGET);
1991}
1992
1993void MacroAssembler::TailCallRuntime(Runtime::FunctionId fid) {
1994  const Runtime::Function* function = Runtime::FunctionForId(fid);
1995  DCHECK_EQ(1, function->result_size);
1996  if (function->nargs >= 0) {
1997    mov(r2, Operand(function->nargs));
1998  }
1999  JumpToExternalReference(ExternalReference::Create(fid));
2000}
2001
2002void MacroAssembler::JumpToExternalReference(const ExternalReference& builtin,
2003                                             bool builtin_exit_frame) {
2004  Move(r3, builtin);
2005  Handle<Code> code = CodeFactory::CEntry(isolate(), 1, SaveFPRegsMode::kIgnore,
2006                                          ArgvMode::kStack, builtin_exit_frame);
2007  Jump(code, RelocInfo::CODE_TARGET);
2008}
2009
2010void MacroAssembler::JumpToOffHeapInstructionStream(Address entry) {
2011  mov(kOffHeapTrampolineRegister, Operand(entry, RelocInfo::OFF_HEAP_TARGET));
2012  Jump(kOffHeapTrampolineRegister);
2013}
2014
2015void MacroAssembler::LoadWeakValue(Register out, Register in,
2016                                   Label* target_if_cleared) {
2017  CmpS32(in, Operand(kClearedWeakHeapObjectLower32));
2018  beq(target_if_cleared);
2019
2020  AndP(out, in, Operand(~kWeakHeapObjectMask));
2021}
2022
2023void MacroAssembler::EmitIncrementCounter(StatsCounter* counter, int value,
2024                                          Register scratch1,
2025                                          Register scratch2) {
2026  DCHECK(value > 0 && is_int8(value));
2027  if (FLAG_native_code_counters && counter->Enabled()) {
2028    Move(scratch2, ExternalReference::Create(counter));
2029    // @TODO(john.yan): can be optimized by asi()
2030    LoadS32(scratch1, MemOperand(scratch2));
2031    AddS64(scratch1, Operand(value));
2032    StoreU32(scratch1, MemOperand(scratch2));
2033  }
2034}
2035
2036void MacroAssembler::EmitDecrementCounter(StatsCounter* counter, int value,
2037                                          Register scratch1,
2038                                          Register scratch2) {
2039  DCHECK(value > 0 && is_int8(value));
2040  if (FLAG_native_code_counters && counter->Enabled()) {
2041    Move(scratch2, ExternalReference::Create(counter));
2042    // @TODO(john.yan): can be optimized by asi()
2043    LoadS32(scratch1, MemOperand(scratch2));
2044    AddS64(scratch1, Operand(-value));
2045    StoreU32(scratch1, MemOperand(scratch2));
2046  }
2047}
2048
2049void TurboAssembler::Assert(Condition cond, AbortReason reason, CRegister cr) {
2050  if (FLAG_debug_code) Check(cond, reason, cr);
2051}
2052
2053void TurboAssembler::AssertUnreachable(AbortReason reason) {
2054  if (FLAG_debug_code) Abort(reason);
2055}
2056
2057void TurboAssembler::Check(Condition cond, AbortReason reason, CRegister cr) {
2058  Label L;
2059  b(cond, &L);
2060  Abort(reason);
2061  // will not return here
2062  bind(&L);
2063}
2064
2065void TurboAssembler::Abort(AbortReason reason) {
2066  Label abort_start;
2067  bind(&abort_start);
2068  if (FLAG_code_comments) {
2069    const char* msg = GetAbortReason(reason);
2070    RecordComment("Abort message: ");
2071    RecordComment(msg);
2072  }
2073
2074  // Avoid emitting call to builtin if requested.
2075  if (trap_on_abort()) {
2076    stop();
2077    return;
2078  }
2079
2080  if (should_abort_hard()) {
2081    // We don't care if we constructed a frame. Just pretend we did.
2082    FrameScope assume_frame(this, StackFrame::NO_FRAME_TYPE);
2083    lgfi(r2, Operand(static_cast<int>(reason)));
2084    PrepareCallCFunction(1, 0, r3);
2085    Move(r3, ExternalReference::abort_with_reason());
2086    // Use Call directly to avoid any unneeded overhead. The function won't
2087    // return anyway.
2088    Call(r3);
2089    return;
2090  }
2091
2092  LoadSmiLiteral(r3, Smi::FromInt(static_cast<int>(reason)));
2093
2094  // Disable stub call restrictions to always allow calls to abort.
2095  if (!has_frame_) {
2096    // We don't actually want to generate a pile of code for this, so just
2097    // claim there is a stack frame, without generating one.
2098    FrameScope scope(this, StackFrame::NO_FRAME_TYPE);
2099    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
2100  } else {
2101    Call(BUILTIN_CODE(isolate(), Abort), RelocInfo::CODE_TARGET);
2102  }
2103  // will not return here
2104}
2105
2106void TurboAssembler::LoadMap(Register destination, Register object) {
2107  LoadTaggedPointerField(destination,
2108                         FieldMemOperand(object, HeapObject::kMapOffset));
2109}
2110
2111void MacroAssembler::LoadNativeContextSlot(Register dst, int index) {
2112  LoadMap(dst, cp);
2113  LoadTaggedPointerField(
2114      dst, FieldMemOperand(
2115               dst, Map::kConstructorOrBackPointerOrNativeContextOffset));
2116  LoadTaggedPointerField(dst, MemOperand(dst, Context::SlotOffset(index)));
2117}
2118
2119void TurboAssembler::AssertNotSmi(Register object) {
2120  if (FLAG_debug_code) {
2121    STATIC_ASSERT(kSmiTag == 0);
2122    TestIfSmi(object);
2123    Check(ne, AbortReason::kOperandIsASmi, cr0);
2124  }
2125}
2126
2127void TurboAssembler::AssertSmi(Register object) {
2128  if (FLAG_debug_code) {
2129    STATIC_ASSERT(kSmiTag == 0);
2130    TestIfSmi(object);
2131    Check(eq, AbortReason::kOperandIsNotASmi, cr0);
2132  }
2133}
2134
2135void MacroAssembler::AssertConstructor(Register object, Register scratch) {
2136  if (FLAG_debug_code) {
2137    STATIC_ASSERT(kSmiTag == 0);
2138    TestIfSmi(object);
2139    Check(ne, AbortReason::kOperandIsASmiAndNotAConstructor);
2140    LoadMap(scratch, object);
2141    tm(FieldMemOperand(scratch, Map::kBitFieldOffset),
2142       Operand(Map::Bits1::IsConstructorBit::kMask));
2143    Check(ne, AbortReason::kOperandIsNotAConstructor);
2144  }
2145}
2146
2147void MacroAssembler::AssertFunction(Register object) {
2148  if (FLAG_debug_code) {
2149    STATIC_ASSERT(kSmiTag == 0);
2150    TestIfSmi(object);
2151    Check(ne, AbortReason::kOperandIsASmiAndNotAFunction, cr0);
2152    push(object);
2153    LoadMap(object, object);
2154    CompareInstanceTypeRange(object, object, FIRST_JS_FUNCTION_TYPE,
2155                             LAST_JS_FUNCTION_TYPE);
2156    pop(object);
2157    Check(le, AbortReason::kOperandIsNotAFunction);
2158  }
2159}
2160
2161void MacroAssembler::AssertCallableFunction(Register object) {
2162  if (!FLAG_debug_code) return;
2163  ASM_CODE_COMMENT(this);
2164  STATIC_ASSERT(kSmiTag == 0);
2165  TestIfSmi(object);
2166  Check(ne, AbortReason::kOperandIsASmiAndNotAFunction);
2167  push(object);
2168  LoadMap(object, object);
2169  CompareInstanceTypeRange(object, object, FIRST_CALLABLE_JS_FUNCTION_TYPE,
2170                           LAST_CALLABLE_JS_FUNCTION_TYPE);
2171  pop(object);
2172  Check(le, AbortReason::kOperandIsNotACallableFunction);
2173}
2174
2175void MacroAssembler::AssertBoundFunction(Register object) {
2176  if (FLAG_debug_code) {
2177    STATIC_ASSERT(kSmiTag == 0);
2178    TestIfSmi(object);
2179    Check(ne, AbortReason::kOperandIsASmiAndNotABoundFunction, cr0);
2180    push(object);
2181    CompareObjectType(object, object, object, JS_BOUND_FUNCTION_TYPE);
2182    pop(object);
2183    Check(eq, AbortReason::kOperandIsNotABoundFunction);
2184  }
2185}
2186
2187void MacroAssembler::AssertGeneratorObject(Register object) {
2188  if (!FLAG_debug_code) return;
2189  TestIfSmi(object);
2190  Check(ne, AbortReason::kOperandIsASmiAndNotAGeneratorObject, cr0);
2191
2192  // Load map
2193  Register map = object;
2194  push(object);
2195  LoadMap(map, object);
2196
2197  // Check if JSGeneratorObject
2198  Label do_check;
2199  Register instance_type = object;
2200  CompareInstanceType(map, instance_type, JS_GENERATOR_OBJECT_TYPE);
2201  beq(&do_check);
2202
2203  // Check if JSAsyncFunctionObject (See MacroAssembler::CompareInstanceType)
2204  CmpS64(instance_type, Operand(JS_ASYNC_FUNCTION_OBJECT_TYPE));
2205  beq(&do_check);
2206
2207  // Check if JSAsyncGeneratorObject (See MacroAssembler::CompareInstanceType)
2208  CmpS64(instance_type, Operand(JS_ASYNC_GENERATOR_OBJECT_TYPE));
2209
2210  bind(&do_check);
2211  // Restore generator object to register and perform assertion
2212  pop(object);
2213  Check(eq, AbortReason::kOperandIsNotAGeneratorObject);
2214}
2215
2216void MacroAssembler::AssertUndefinedOrAllocationSite(Register object,
2217                                                     Register scratch) {
2218  if (FLAG_debug_code) {
2219    Label done_checking;
2220    AssertNotSmi(object);
2221    CompareRoot(object, RootIndex::kUndefinedValue);
2222    beq(&done_checking, Label::kNear);
2223    LoadMap(scratch, object);
2224    CompareInstanceType(scratch, scratch, ALLOCATION_SITE_TYPE);
2225    Assert(eq, AbortReason::kExpectedUndefinedOrCell);
2226    bind(&done_checking);
2227  }
2228}
2229
2230static const int kRegisterPassedArguments = 5;
2231
2232int TurboAssembler::CalculateStackPassedWords(int num_reg_arguments,
2233                                              int num_double_arguments) {
2234  int stack_passed_words = 0;
2235  if (num_double_arguments > DoubleRegister::kNumRegisters) {
2236    stack_passed_words +=
2237        2 * (num_double_arguments - DoubleRegister::kNumRegisters);
2238  }
2239  // Up to five simple arguments are passed in registers r2..r6
2240  if (num_reg_arguments > kRegisterPassedArguments) {
2241    stack_passed_words += num_reg_arguments - kRegisterPassedArguments;
2242  }
2243  return stack_passed_words;
2244}
2245
2246void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
2247                                          int num_double_arguments,
2248                                          Register scratch) {
2249  int frame_alignment = ActivationFrameAlignment();
2250  int stack_passed_arguments =
2251      CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
2252  int stack_space = kNumRequiredStackFrameSlots;
2253  if (frame_alignment > kSystemPointerSize) {
2254    // Make stack end at alignment and make room for stack arguments
2255    // -- preserving original value of sp.
2256    mov(scratch, sp);
2257    lay(sp, MemOperand(sp, -(stack_passed_arguments + 1) * kSystemPointerSize));
2258    DCHECK(base::bits::IsPowerOfTwo(frame_alignment));
2259    ClearRightImm(sp, sp,
2260                  Operand(base::bits::WhichPowerOfTwo(frame_alignment)));
2261    StoreU64(scratch,
2262             MemOperand(sp, (stack_passed_arguments)*kSystemPointerSize));
2263  } else {
2264    stack_space += stack_passed_arguments;
2265  }
2266  lay(sp, MemOperand(sp, (-stack_space) * kSystemPointerSize));
2267}
2268
2269void TurboAssembler::PrepareCallCFunction(int num_reg_arguments,
2270                                          Register scratch) {
2271  PrepareCallCFunction(num_reg_arguments, 0, scratch);
2272}
2273
2274void TurboAssembler::MovToFloatParameter(DoubleRegister src) { Move(d0, src); }
2275
2276void TurboAssembler::MovToFloatResult(DoubleRegister src) { Move(d0, src); }
2277
2278void TurboAssembler::MovToFloatParameters(DoubleRegister src1,
2279                                          DoubleRegister src2) {
2280  if (src2 == d0) {
2281    DCHECK(src1 != d2);
2282    Move(d2, src2);
2283    Move(d0, src1);
2284  } else {
2285    Move(d0, src1);
2286    Move(d2, src2);
2287  }
2288}
2289
2290void TurboAssembler::CallCFunction(ExternalReference function,
2291                                   int num_reg_arguments,
2292                                   int num_double_arguments) {
2293  Move(ip, function);
2294  CallCFunctionHelper(ip, num_reg_arguments, num_double_arguments);
2295}
2296
2297void TurboAssembler::CallCFunction(Register function, int num_reg_arguments,
2298                                   int num_double_arguments) {
2299  CallCFunctionHelper(function, num_reg_arguments, num_double_arguments);
2300}
2301
2302void TurboAssembler::CallCFunction(ExternalReference function,
2303                                   int num_arguments) {
2304  CallCFunction(function, num_arguments, 0);
2305}
2306
2307void TurboAssembler::CallCFunction(Register function, int num_arguments) {
2308  CallCFunction(function, num_arguments, 0);
2309}
2310
2311void TurboAssembler::CallCFunctionHelper(Register function,
2312                                         int num_reg_arguments,
2313                                         int num_double_arguments) {
2314  DCHECK_LE(num_reg_arguments + num_double_arguments, kMaxCParameters);
2315  DCHECK(has_frame());
2316
2317  // Save the frame pointer and PC so that the stack layout remains iterable,
2318  // even without an ExitFrame which normally exists between JS and C frames.
2319  Register addr_scratch = r1;
2320  // See x64 code for reasoning about how to address the isolate data fields.
2321  if (root_array_available()) {
2322    LoadPC(r0);
2323    StoreU64(r0, MemOperand(kRootRegister,
2324                            IsolateData::fast_c_call_caller_pc_offset()));
2325    StoreU64(fp, MemOperand(kRootRegister,
2326                            IsolateData::fast_c_call_caller_fp_offset()));
2327  } else {
2328    DCHECK_NOT_NULL(isolate());
2329
2330    Move(addr_scratch,
2331         ExternalReference::fast_c_call_caller_pc_address(isolate()));
2332    LoadPC(r0);
2333    StoreU64(r0, MemOperand(addr_scratch));
2334    Move(addr_scratch,
2335         ExternalReference::fast_c_call_caller_fp_address(isolate()));
2336    StoreU64(fp, MemOperand(addr_scratch));
2337  }
2338
2339  // Just call directly. The function called cannot cause a GC, or
2340  // allow preemption, so the return address in the link register
2341  // stays correct.
2342  Register dest = function;
2343  if (ABI_CALL_VIA_IP) {
2344    Move(ip, function);
2345    dest = ip;
2346  }
2347
2348  Call(dest);
2349
2350  // We don't unset the PC; the FP is the source of truth.
2351  Register zero_scratch = r0;
2352  lghi(zero_scratch, Operand::Zero());
2353
2354  if (root_array_available()) {
2355    StoreU64(
2356        zero_scratch,
2357        MemOperand(kRootRegister, IsolateData::fast_c_call_caller_fp_offset()));
2358  } else {
2359    DCHECK_NOT_NULL(isolate());
2360    Move(addr_scratch,
2361         ExternalReference::fast_c_call_caller_fp_address(isolate()));
2362    StoreU64(zero_scratch, MemOperand(addr_scratch));
2363  }
2364
2365  int stack_passed_arguments =
2366      CalculateStackPassedWords(num_reg_arguments, num_double_arguments);
2367  int stack_space = kNumRequiredStackFrameSlots + stack_passed_arguments;
2368  if (ActivationFrameAlignment() > kSystemPointerSize) {
2369    // Load the original stack pointer (pre-alignment) from the stack
2370    LoadU64(sp, MemOperand(sp, stack_space * kSystemPointerSize));
2371  } else {
2372    la(sp, MemOperand(sp, stack_space * kSystemPointerSize));
2373  }
2374}
2375
2376void TurboAssembler::CheckPageFlag(
2377    Register object,
2378    Register scratch,  // scratch may be same register as object
2379    int mask, Condition cc, Label* condition_met) {
2380  DCHECK(cc == ne || cc == eq);
2381  ClearRightImm(scratch, object, Operand(kPageSizeBits));
2382
2383  if (base::bits::IsPowerOfTwo(mask)) {
2384    // If it's a power of two, we can use Test-Under-Mask Memory-Imm form
2385    // which allows testing of a single byte in memory.
2386    int32_t byte_offset = 4;
2387    uint32_t shifted_mask = mask;
2388    // Determine the byte offset to be tested
2389    if (mask <= 0x80) {
2390      byte_offset = kSystemPointerSize - 1;
2391    } else if (mask < 0x8000) {
2392      byte_offset = kSystemPointerSize - 2;
2393      shifted_mask = mask >> 8;
2394    } else if (mask < 0x800000) {
2395      byte_offset = kSystemPointerSize - 3;
2396      shifted_mask = mask >> 16;
2397    } else {
2398      byte_offset = kSystemPointerSize - 4;
2399      shifted_mask = mask >> 24;
2400    }
2401#if V8_TARGET_LITTLE_ENDIAN
2402    // Reverse the byte_offset if emulating on little endian platform
2403    byte_offset = kSystemPointerSize - byte_offset - 1;
2404#endif
2405    tm(MemOperand(scratch, BasicMemoryChunk::kFlagsOffset + byte_offset),
2406       Operand(shifted_mask));
2407  } else {
2408    LoadU64(scratch, MemOperand(scratch, BasicMemoryChunk::kFlagsOffset));
2409    AndP(r0, scratch, Operand(mask));
2410  }
2411  // Should be okay to remove rc
2412
2413  if (cc == ne) {
2414    bne(condition_met);
2415  }
2416  if (cc == eq) {
2417    beq(condition_met);
2418  }
2419}
2420
2421Register GetRegisterThatIsNotOneOf(Register reg1, Register reg2, Register reg3,
2422                                   Register reg4, Register reg5,
2423                                   Register reg6) {
2424  RegList regs = {reg1, reg2, reg3, reg4, reg5, reg6};
2425
2426  const RegisterConfiguration* config = RegisterConfiguration::Default();
2427  for (int i = 0; i < config->num_allocatable_general_registers(); ++i) {
2428    int code = config->GetAllocatableGeneralCode(i);
2429    Register candidate = Register::from_code(code);
2430    if (regs.has(candidate)) continue;
2431    return candidate;
2432  }
2433  UNREACHABLE();
2434}
2435
2436void TurboAssembler::mov(Register dst, Register src) { lgr(dst, src); }
2437
2438void TurboAssembler::mov(Register dst, const Operand& src) {
2439  int64_t value = 0;
2440
2441  if (src.is_heap_object_request()) {
2442    RequestHeapObject(src.heap_object_request());
2443  } else {
2444    value = src.immediate();
2445  }
2446
2447  if (src.rmode() != RelocInfo::NO_INFO) {
2448    // some form of relocation needed
2449    RecordRelocInfo(src.rmode(), value);
2450  }
2451
2452  int32_t hi_32 = static_cast<int32_t>(value >> 32);
2453  int32_t lo_32 = static_cast<int32_t>(value);
2454
2455  if (src.rmode() == RelocInfo::NO_INFO) {
2456    if (hi_32 == 0) {
2457      if (is_uint16(lo_32)) {
2458        llill(dst, Operand(lo_32));
2459        return;
2460      }
2461      llilf(dst, Operand(lo_32));
2462      return;
2463    } else if (lo_32 == 0) {
2464      if (is_uint16(hi_32)) {
2465        llihl(dst, Operand(hi_32));
2466        return;
2467      }
2468      llihf(dst, Operand(hi_32));
2469      return;
2470    } else if (is_int16(value)) {
2471      lghi(dst, Operand(value));
2472      return;
2473    } else if (is_int32(value)) {
2474      lgfi(dst, Operand(value));
2475      return;
2476    }
2477  }
2478
2479  iihf(dst, Operand(hi_32));
2480  iilf(dst, Operand(lo_32));
2481}
2482
2483void TurboAssembler::MulS32(Register dst, const MemOperand& src1) {
2484  if (is_uint12(src1.offset())) {
2485    ms(dst, src1);
2486  } else if (is_int20(src1.offset())) {
2487    msy(dst, src1);
2488  } else {
2489    UNIMPLEMENTED();
2490  }
2491}
2492
2493void TurboAssembler::MulS32(Register dst, Register src1) { msr(dst, src1); }
2494
2495void TurboAssembler::MulS32(Register dst, const Operand& src1) {
2496  msfi(dst, src1);
2497}
2498
2499#define Generate_MulHigh32(instr) \
2500  {                               \
2501    lgfr(dst, src1);              \
2502    instr(dst, src2);             \
2503    srlg(dst, dst, Operand(32));  \
2504  }
2505
2506void TurboAssembler::MulHighS32(Register dst, Register src1,
2507                                const MemOperand& src2) {
2508  Generate_MulHigh32(msgf);
2509}
2510
2511void TurboAssembler::MulHighS32(Register dst, Register src1, Register src2) {
2512  if (dst == src2) {
2513    std::swap(src1, src2);
2514  }
2515  Generate_MulHigh32(msgfr);
2516}
2517
2518void TurboAssembler::MulHighS32(Register dst, Register src1,
2519                                const Operand& src2) {
2520  Generate_MulHigh32(msgfi);
2521}
2522
2523#undef Generate_MulHigh32
2524
2525#define Generate_MulHighU32(instr) \
2526  {                                \
2527    lr(r1, src1);                  \
2528    instr(r0, src2);               \
2529    LoadU32(dst, r0);               \
2530  }
2531
2532void TurboAssembler::MulHighU32(Register dst, Register src1,
2533                                const MemOperand& src2) {
2534  Generate_MulHighU32(ml);
2535}
2536
2537void TurboAssembler::MulHighU32(Register dst, Register src1, Register src2) {
2538  Generate_MulHighU32(mlr);
2539}
2540
2541void TurboAssembler::MulHighU32(Register dst, Register src1,
2542                                const Operand& src2) {
2543  USE(dst);
2544  USE(src1);
2545  USE(src2);
2546  UNREACHABLE();
2547}
2548
2549#undef Generate_MulHighU32
2550
2551#define Generate_Mul32WithOverflowIfCCUnequal(instr) \
2552  {                                                  \
2553    lgfr(dst, src1);                                 \
2554    instr(dst, src2);                                \
2555    cgfr(dst, dst);                                  \
2556  }
2557
2558void TurboAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
2559                                                  const MemOperand& src2) {
2560  Register result = dst;
2561  if (src2.rx() == dst || src2.rb() == dst) dst = r0;
2562  Generate_Mul32WithOverflowIfCCUnequal(msgf);
2563  if (result != dst) llgfr(result, dst);
2564}
2565
2566void TurboAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
2567                                                  Register src2) {
2568  if (dst == src2) {
2569    std::swap(src1, src2);
2570  }
2571  Generate_Mul32WithOverflowIfCCUnequal(msgfr);
2572}
2573
2574void TurboAssembler::Mul32WithOverflowIfCCUnequal(Register dst, Register src1,
2575                                                  const Operand& src2) {
2576  Generate_Mul32WithOverflowIfCCUnequal(msgfi);
2577}
2578
2579#undef Generate_Mul32WithOverflowIfCCUnequal
2580
2581#define Generate_Div32(instr) \
2582  {                           \
2583    lgfr(r1, src1);           \
2584    instr(r0, src2);          \
2585    LoadU32(dst, r1);          \
2586  }
2587
2588void TurboAssembler::DivS32(Register dst, Register src1,
2589                            const MemOperand& src2) {
2590  Generate_Div32(dsgf);
2591}
2592
2593void TurboAssembler::DivS32(Register dst, Register src1, Register src2) {
2594  Generate_Div32(dsgfr);
2595}
2596
2597#undef Generate_Div32
2598
2599#define Generate_DivU32(instr) \
2600  {                            \
2601    lr(r0, src1);              \
2602    srdl(r0, Operand(32));     \
2603    instr(r0, src2);           \
2604    LoadU32(dst, r1);           \
2605  }
2606
2607void TurboAssembler::DivU32(Register dst, Register src1,
2608                            const MemOperand& src2) {
2609  Generate_DivU32(dl);
2610}
2611
2612void TurboAssembler::DivU32(Register dst, Register src1, Register src2) {
2613  Generate_DivU32(dlr);
2614}
2615
2616#undef Generate_DivU32
2617
2618#define Generate_Div64(instr) \
2619  {                           \
2620    lgr(r1, src1);            \
2621    instr(r0, src2);          \
2622    lgr(dst, r1);             \
2623  }
2624
2625void TurboAssembler::DivS64(Register dst, Register src1,
2626                            const MemOperand& src2) {
2627  Generate_Div64(dsg);
2628}
2629
2630void TurboAssembler::DivS64(Register dst, Register src1, Register src2) {
2631  Generate_Div64(dsgr);
2632}
2633
2634#undef Generate_Div64
2635
2636#define Generate_DivU64(instr) \
2637  {                            \
2638    lgr(r1, src1);             \
2639    lghi(r0, Operand::Zero()); \
2640    instr(r0, src2);           \
2641    lgr(dst, r1);              \
2642  }
2643
2644void TurboAssembler::DivU64(Register dst, Register src1,
2645                            const MemOperand& src2) {
2646  Generate_DivU64(dlg);
2647}
2648
2649void TurboAssembler::DivU64(Register dst, Register src1, Register src2) {
2650  Generate_DivU64(dlgr);
2651}
2652
2653#undef Generate_DivU64
2654
2655#define Generate_Mod32(instr) \
2656  {                           \
2657    lgfr(r1, src1);           \
2658    instr(r0, src2);          \
2659    LoadU32(dst, r0);          \
2660  }
2661
2662void TurboAssembler::ModS32(Register dst, Register src1,
2663                            const MemOperand& src2) {
2664  Generate_Mod32(dsgf);
2665}
2666
2667void TurboAssembler::ModS32(Register dst, Register src1, Register src2) {
2668  Generate_Mod32(dsgfr);
2669}
2670
2671#undef Generate_Mod32
2672
2673#define Generate_ModU32(instr) \
2674  {                            \
2675    lr(r0, src1);              \
2676    srdl(r0, Operand(32));     \
2677    instr(r0, src2);           \
2678    LoadU32(dst, r0);           \
2679  }
2680
2681void TurboAssembler::ModU32(Register dst, Register src1,
2682                            const MemOperand& src2) {
2683  Generate_ModU32(dl);
2684}
2685
2686void TurboAssembler::ModU32(Register dst, Register src1, Register src2) {
2687  Generate_ModU32(dlr);
2688}
2689
2690#undef Generate_ModU32
2691
2692#define Generate_Mod64(instr) \
2693  {                           \
2694    lgr(r1, src1);            \
2695    instr(r0, src2);          \
2696    lgr(dst, r0);             \
2697  }
2698
2699void TurboAssembler::ModS64(Register dst, Register src1,
2700                            const MemOperand& src2) {
2701  Generate_Mod64(dsg);
2702}
2703
2704void TurboAssembler::ModS64(Register dst, Register src1, Register src2) {
2705  Generate_Mod64(dsgr);
2706}
2707
2708#undef Generate_Mod64
2709
2710#define Generate_ModU64(instr) \
2711  {                            \
2712    lgr(r1, src1);             \
2713    lghi(r0, Operand::Zero()); \
2714    instr(r0, src2);           \
2715    lgr(dst, r0);              \
2716  }
2717
2718void TurboAssembler::ModU64(Register dst, Register src1,
2719                            const MemOperand& src2) {
2720  Generate_ModU64(dlg);
2721}
2722
2723void TurboAssembler::ModU64(Register dst, Register src1, Register src2) {
2724  Generate_ModU64(dlgr);
2725}
2726
2727#undef Generate_ModU64
2728
2729void TurboAssembler::MulS64(Register dst, const Operand& opnd) {
2730  msgfi(dst, opnd);
2731}
2732
2733void TurboAssembler::MulS64(Register dst, Register src) { msgr(dst, src); }
2734
2735void TurboAssembler::MulS64(Register dst, const MemOperand& opnd) {
2736  msg(dst, opnd);
2737}
2738
2739void TurboAssembler::Sqrt(DoubleRegister result, DoubleRegister input) {
2740  sqdbr(result, input);
2741}
2742void TurboAssembler::Sqrt(DoubleRegister result, const MemOperand& input) {
2743  if (is_uint12(input.offset())) {
2744    sqdb(result, input);
2745  } else {
2746    ldy(result, input);
2747    sqdbr(result, result);
2748  }
2749}
2750//----------------------------------------------------------------------------
2751//  Add Instructions
2752//----------------------------------------------------------------------------
2753
2754// Add 32-bit (Register dst = Register dst + Immediate opnd)
2755void TurboAssembler::AddS32(Register dst, const Operand& opnd) {
2756  if (is_int16(opnd.immediate()))
2757    ahi(dst, opnd);
2758  else
2759    afi(dst, opnd);
2760}
2761
2762// Add Pointer Size (Register dst = Register dst + Immediate opnd)
2763void TurboAssembler::AddS64(Register dst, const Operand& opnd) {
2764  if (is_int16(opnd.immediate()))
2765    aghi(dst, opnd);
2766  else
2767    agfi(dst, opnd);
2768}
2769
2770void TurboAssembler::AddS32(Register dst, Register src, int32_t opnd) {
2771  AddS32(dst, src, Operand(opnd));
2772}
2773
2774// Add 32-bit (Register dst = Register src + Immediate opnd)
2775void TurboAssembler::AddS32(Register dst, Register src, const Operand& opnd) {
2776  if (dst != src) {
2777    if (CpuFeatures::IsSupported(DISTINCT_OPS) && is_int16(opnd.immediate())) {
2778      ahik(dst, src, opnd);
2779      return;
2780    }
2781    lr(dst, src);
2782  }
2783  AddS32(dst, opnd);
2784}
2785
2786void TurboAssembler::AddS64(Register dst, Register src, int32_t opnd) {
2787  AddS64(dst, src, Operand(opnd));
2788}
2789
2790// Add Pointer Size (Register dst = Register src + Immediate opnd)
2791void TurboAssembler::AddS64(Register dst, Register src, const Operand& opnd) {
2792  if (dst != src) {
2793    if (CpuFeatures::IsSupported(DISTINCT_OPS) && is_int16(opnd.immediate())) {
2794      aghik(dst, src, opnd);
2795      return;
2796    }
2797    mov(dst, src);
2798  }
2799  AddS64(dst, opnd);
2800}
2801
2802// Add 32-bit (Register dst = Register dst + Register src)
2803void TurboAssembler::AddS32(Register dst, Register src) { ar(dst, src); }
2804
2805// Add Pointer Size (Register dst = Register dst + Register src)
2806void TurboAssembler::AddS64(Register dst, Register src) { agr(dst, src); }
2807
2808// Add 32-bit (Register dst = Register src1 + Register src2)
2809void TurboAssembler::AddS32(Register dst, Register src1, Register src2) {
2810  if (dst != src1 && dst != src2) {
2811    // We prefer to generate AR/AGR, over the non clobbering ARK/AGRK
2812    // as AR is a smaller instruction
2813    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2814      ark(dst, src1, src2);
2815      return;
2816    } else {
2817      lr(dst, src1);
2818    }
2819  } else if (dst == src2) {
2820    src2 = src1;
2821  }
2822  ar(dst, src2);
2823}
2824
2825// Add Pointer Size (Register dst = Register src1 + Register src2)
2826void TurboAssembler::AddS64(Register dst, Register src1, Register src2) {
2827  if (dst != src1 && dst != src2) {
2828    // We prefer to generate AR/AGR, over the non clobbering ARK/AGRK
2829    // as AR is a smaller instruction
2830    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2831      agrk(dst, src1, src2);
2832      return;
2833    } else {
2834      mov(dst, src1);
2835    }
2836  } else if (dst == src2) {
2837    src2 = src1;
2838  }
2839  agr(dst, src2);
2840}
2841
2842// Add 32-bit (Register-Memory)
2843void TurboAssembler::AddS32(Register dst, const MemOperand& opnd) {
2844  DCHECK(is_int20(opnd.offset()));
2845  if (is_uint12(opnd.offset()))
2846    a(dst, opnd);
2847  else
2848    ay(dst, opnd);
2849}
2850
2851// Add Pointer Size (Register-Memory)
2852void TurboAssembler::AddS64(Register dst, const MemOperand& opnd) {
2853  DCHECK(is_int20(opnd.offset()));
2854  ag(dst, opnd);
2855}
2856
2857// Add 32-bit (Memory - Immediate)
2858void TurboAssembler::AddS32(const MemOperand& opnd, const Operand& imm) {
2859  DCHECK(is_int8(imm.immediate()));
2860  DCHECK(is_int20(opnd.offset()));
2861  DCHECK(CpuFeatures::IsSupported(GENERAL_INSTR_EXT));
2862  asi(opnd, imm);
2863}
2864
2865// Add Pointer-sized (Memory - Immediate)
2866void TurboAssembler::AddS64(const MemOperand& opnd, const Operand& imm) {
2867  DCHECK(is_int8(imm.immediate()));
2868  DCHECK(is_int20(opnd.offset()));
2869  DCHECK(CpuFeatures::IsSupported(GENERAL_INSTR_EXT));
2870  agsi(opnd, imm);
2871}
2872
2873//----------------------------------------------------------------------------
2874//  Add Logical Instructions
2875//----------------------------------------------------------------------------
2876
2877// Add Logical 32-bit (Register dst = Register src1 + Register src2)
2878void TurboAssembler::AddU32(Register dst, Register src1, Register src2) {
2879  if (dst != src2 && dst != src1) {
2880    lr(dst, src1);
2881    alr(dst, src2);
2882  } else if (dst != src2) {
2883    // dst == src1
2884    DCHECK(dst == src1);
2885    alr(dst, src2);
2886  } else {
2887    // dst == src2
2888    DCHECK(dst == src2);
2889    alr(dst, src1);
2890  }
2891}
2892
2893// Add Logical 32-bit (Register dst = Register dst + Immediate opnd)
2894void TurboAssembler::AddU32(Register dst, const Operand& imm) {
2895  alfi(dst, imm);
2896}
2897
2898// Add Logical Pointer Size (Register dst = Register dst + Immediate opnd)
2899void TurboAssembler::AddU64(Register dst, const Operand& imm) {
2900  algfi(dst, imm);
2901}
2902
2903void TurboAssembler::AddU64(Register dst, Register src1, Register src2) {
2904  if (dst != src2 && dst != src1) {
2905    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2906      algrk(dst, src1, src2);
2907    } else {
2908      lgr(dst, src1);
2909      algr(dst, src2);
2910    }
2911  } else if (dst != src2) {
2912    // dst == src1
2913    DCHECK(dst == src1);
2914    algr(dst, src2);
2915  } else {
2916    // dst == src2
2917    DCHECK(dst == src2);
2918    algr(dst, src1);
2919  }
2920}
2921
2922// Add Logical 32-bit (Register-Memory)
2923void TurboAssembler::AddU32(Register dst, const MemOperand& opnd) {
2924  DCHECK(is_int20(opnd.offset()));
2925  if (is_uint12(opnd.offset()))
2926    al_z(dst, opnd);
2927  else
2928    aly(dst, opnd);
2929}
2930
2931// Add Logical Pointer Size (Register-Memory)
2932void TurboAssembler::AddU64(Register dst, const MemOperand& opnd) {
2933  DCHECK(is_int20(opnd.offset()));
2934  alg(dst, opnd);
2935}
2936
2937//----------------------------------------------------------------------------
2938//  Subtract Instructions
2939//----------------------------------------------------------------------------
2940
2941// Subtract Logical 32-bit (Register dst = Register src1 - Register src2)
2942void TurboAssembler::SubU32(Register dst, Register src1, Register src2) {
2943  if (dst != src2 && dst != src1) {
2944    lr(dst, src1);
2945    slr(dst, src2);
2946  } else if (dst != src2) {
2947    // dst == src1
2948    DCHECK(dst == src1);
2949    slr(dst, src2);
2950  } else {
2951    // dst == src2
2952    DCHECK(dst == src2);
2953    lr(r0, dst);
2954    SubU32(dst, src1, r0);
2955  }
2956}
2957
2958// Subtract 32-bit (Register dst = Register dst - Immediate opnd)
2959void TurboAssembler::SubS32(Register dst, const Operand& imm) {
2960  AddS32(dst, Operand(-(imm.immediate())));
2961}
2962
2963// Subtract Pointer Size (Register dst = Register dst - Immediate opnd)
2964void TurboAssembler::SubS64(Register dst, const Operand& imm) {
2965  AddS64(dst, Operand(-(imm.immediate())));
2966}
2967
2968void TurboAssembler::SubS32(Register dst, Register src, int32_t imm) {
2969  SubS32(dst, src, Operand(imm));
2970}
2971
2972// Subtract 32-bit (Register dst = Register src - Immediate opnd)
2973void TurboAssembler::SubS32(Register dst, Register src, const Operand& imm) {
2974  AddS32(dst, src, Operand(-(imm.immediate())));
2975}
2976
2977void TurboAssembler::SubS64(Register dst, Register src, int32_t imm) {
2978  SubS64(dst, src, Operand(imm));
2979}
2980
2981// Subtract Pointer Sized (Register dst = Register src - Immediate opnd)
2982void TurboAssembler::SubS64(Register dst, Register src, const Operand& imm) {
2983  AddS64(dst, src, Operand(-(imm.immediate())));
2984}
2985
2986// Subtract 32-bit (Register dst = Register dst - Register src)
2987void TurboAssembler::SubS32(Register dst, Register src) { sr(dst, src); }
2988
2989// Subtract Pointer Size (Register dst = Register dst - Register src)
2990void TurboAssembler::SubS64(Register dst, Register src) { sgr(dst, src); }
2991
2992// Subtract 32-bit (Register = Register - Register)
2993void TurboAssembler::SubS32(Register dst, Register src1, Register src2) {
2994  // Use non-clobbering version if possible
2995  if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
2996    srk(dst, src1, src2);
2997    return;
2998  }
2999  if (dst != src1 && dst != src2) lr(dst, src1);
3000  // In scenario where we have dst = src - dst, we need to swap and negate
3001  if (dst != src1 && dst == src2) {
3002    Label done;
3003    lcr(dst, dst);  // dst = -dst
3004    b(overflow, &done);
3005    ar(dst, src1);  // dst = dst + src
3006    bind(&done);
3007  } else {
3008    sr(dst, src2);
3009  }
3010}
3011
3012// Subtract Pointer Sized (Register = Register - Register)
3013void TurboAssembler::SubS64(Register dst, Register src1, Register src2) {
3014  // Use non-clobbering version if possible
3015  if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3016    sgrk(dst, src1, src2);
3017    return;
3018  }
3019  if (dst != src1 && dst != src2) mov(dst, src1);
3020  // In scenario where we have dst = src - dst, we need to swap and negate
3021  if (dst != src1 && dst == src2) {
3022    Label done;
3023    lcgr(dst, dst);  // dst = -dst
3024    b(overflow, &done);
3025    AddS64(dst, src1);  // dst = dst + src
3026    bind(&done);
3027  } else {
3028    SubS64(dst, src2);
3029  }
3030}
3031
3032// Subtract 32-bit (Register-Memory)
3033void TurboAssembler::SubS32(Register dst, const MemOperand& opnd) {
3034  DCHECK(is_int20(opnd.offset()));
3035  if (is_uint12(opnd.offset()))
3036    s(dst, opnd);
3037  else
3038    sy(dst, opnd);
3039}
3040
3041// Subtract Pointer Sized (Register - Memory)
3042void TurboAssembler::SubS64(Register dst, const MemOperand& opnd) {
3043#if V8_TARGET_ARCH_S390X
3044  sg(dst, opnd);
3045#else
3046  SubS32(dst, opnd);
3047#endif
3048}
3049
3050void TurboAssembler::MovIntToFloat(DoubleRegister dst, Register src) {
3051  sllg(r0, src, Operand(32));
3052  ldgr(dst, r0);
3053}
3054
3055void TurboAssembler::MovFloatToInt(Register dst, DoubleRegister src) {
3056  lgdr(dst, src);
3057  srlg(dst, dst, Operand(32));
3058}
3059
3060// Load And Subtract 32-bit (similar to laa/lan/lao/lax)
3061void TurboAssembler::LoadAndSub32(Register dst, Register src,
3062                                  const MemOperand& opnd) {
3063  lcr(dst, src);
3064  laa(dst, dst, opnd);
3065}
3066
3067void TurboAssembler::LoadAndSub64(Register dst, Register src,
3068                                  const MemOperand& opnd) {
3069  lcgr(dst, src);
3070  laag(dst, dst, opnd);
3071}
3072
3073//----------------------------------------------------------------------------
3074//  Subtract Logical Instructions
3075//----------------------------------------------------------------------------
3076
3077// Subtract Logical 32-bit (Register - Memory)
3078void TurboAssembler::SubU32(Register dst, const MemOperand& opnd) {
3079  DCHECK(is_int20(opnd.offset()));
3080  if (is_uint12(opnd.offset()))
3081    sl(dst, opnd);
3082  else
3083    sly(dst, opnd);
3084}
3085
3086// Subtract Logical Pointer Sized (Register - Memory)
3087void TurboAssembler::SubU64(Register dst, const MemOperand& opnd) {
3088  DCHECK(is_int20(opnd.offset()));
3089#if V8_TARGET_ARCH_S390X
3090  slgf(dst, opnd);
3091#else
3092  SubU32(dst, opnd);
3093#endif
3094}
3095
3096//----------------------------------------------------------------------------
3097//  Bitwise Operations
3098//----------------------------------------------------------------------------
3099
3100// AND 32-bit - dst = dst & src
3101void TurboAssembler::And(Register dst, Register src) { nr(dst, src); }
3102
3103// AND Pointer Size - dst = dst & src
3104void TurboAssembler::AndP(Register dst, Register src) { ngr(dst, src); }
3105
3106// Non-clobbering AND 32-bit - dst = src1 & src1
3107void TurboAssembler::And(Register dst, Register src1, Register src2) {
3108  if (dst != src1 && dst != src2) {
3109    // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3110    // as XR is a smaller instruction
3111    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3112      nrk(dst, src1, src2);
3113      return;
3114    } else {
3115      lr(dst, src1);
3116    }
3117  } else if (dst == src2) {
3118    src2 = src1;
3119  }
3120  And(dst, src2);
3121}
3122
3123// Non-clobbering AND pointer size - dst = src1 & src1
3124void TurboAssembler::AndP(Register dst, Register src1, Register src2) {
3125  if (dst != src1 && dst != src2) {
3126    // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3127    // as XR is a smaller instruction
3128    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3129      ngrk(dst, src1, src2);
3130      return;
3131    } else {
3132      mov(dst, src1);
3133    }
3134  } else if (dst == src2) {
3135    src2 = src1;
3136  }
3137  AndP(dst, src2);
3138}
3139
3140// AND 32-bit (Reg - Mem)
3141void TurboAssembler::And(Register dst, const MemOperand& opnd) {
3142  DCHECK(is_int20(opnd.offset()));
3143  if (is_uint12(opnd.offset()))
3144    n(dst, opnd);
3145  else
3146    ny(dst, opnd);
3147}
3148
3149// AND Pointer Size (Reg - Mem)
3150void TurboAssembler::AndP(Register dst, const MemOperand& opnd) {
3151  DCHECK(is_int20(opnd.offset()));
3152#if V8_TARGET_ARCH_S390X
3153  ng(dst, opnd);
3154#else
3155  And(dst, opnd);
3156#endif
3157}
3158
3159// AND 32-bit - dst = dst & imm
3160void TurboAssembler::And(Register dst, const Operand& opnd) { nilf(dst, opnd); }
3161
3162// AND Pointer Size - dst = dst & imm
3163void TurboAssembler::AndP(Register dst, const Operand& opnd) {
3164#if V8_TARGET_ARCH_S390X
3165  intptr_t value = opnd.immediate();
3166  if (value >> 32 != -1) {
3167    // this may not work b/c condition code won't be set correctly
3168    nihf(dst, Operand(value >> 32));
3169  }
3170  nilf(dst, Operand(value & 0xFFFFFFFF));
3171#else
3172  And(dst, opnd);
3173#endif
3174}
3175
3176// AND 32-bit - dst = src & imm
3177void TurboAssembler::And(Register dst, Register src, const Operand& opnd) {
3178  if (dst != src) lr(dst, src);
3179  nilf(dst, opnd);
3180}
3181
3182// AND Pointer Size - dst = src & imm
3183void TurboAssembler::AndP(Register dst, Register src, const Operand& opnd) {
3184  // Try to exploit RISBG first
3185  intptr_t value = opnd.immediate();
3186  if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
3187    intptr_t shifted_value = value;
3188    int trailing_zeros = 0;
3189
3190    // We start checking how many trailing zeros are left at the end.
3191    while ((0 != shifted_value) && (0 == (shifted_value & 1))) {
3192      trailing_zeros++;
3193      shifted_value >>= 1;
3194    }
3195
3196    // If temp (value with right-most set of zeros shifted out) is 1 less
3197    // than power of 2, we have consecutive bits of 1.
3198    // Special case: If shift_value is zero, we cannot use RISBG, as it requires
3199    //               selection of at least 1 bit.
3200    if ((0 != shifted_value) && base::bits::IsPowerOfTwo(shifted_value + 1)) {
3201      int startBit =
3202          base::bits::CountLeadingZeros64(shifted_value) - trailing_zeros;
3203      int endBit = 63 - trailing_zeros;
3204      // Start: startBit, End: endBit, Shift = 0, true = zero unselected bits.
3205      RotateInsertSelectBits(dst, src, Operand(startBit), Operand(endBit),
3206                             Operand::Zero(), true);
3207      return;
3208    } else if (-1 == shifted_value) {
3209      // A Special case in which all top bits up to MSB are 1's.  In this case,
3210      // we can set startBit to be 0.
3211      int endBit = 63 - trailing_zeros;
3212      RotateInsertSelectBits(dst, src, Operand::Zero(), Operand(endBit),
3213                             Operand::Zero(), true);
3214      return;
3215    }
3216  }
3217
3218  // If we are &'ing zero, we can just whack the dst register and skip copy
3219  if (dst != src && (0 != value)) mov(dst, src);
3220  AndP(dst, opnd);
3221}
3222
3223// OR 32-bit - dst = dst & src
3224void TurboAssembler::Or(Register dst, Register src) { or_z(dst, src); }
3225
3226// OR Pointer Size - dst = dst & src
3227void TurboAssembler::OrP(Register dst, Register src) { ogr(dst, src); }
3228
3229// Non-clobbering OR 32-bit - dst = src1 & src1
3230void TurboAssembler::Or(Register dst, Register src1, Register src2) {
3231  if (dst != src1 && dst != src2) {
3232    // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3233    // as XR is a smaller instruction
3234    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3235      ork(dst, src1, src2);
3236      return;
3237    } else {
3238      lr(dst, src1);
3239    }
3240  } else if (dst == src2) {
3241    src2 = src1;
3242  }
3243  Or(dst, src2);
3244}
3245
3246// Non-clobbering OR pointer size - dst = src1 & src1
3247void TurboAssembler::OrP(Register dst, Register src1, Register src2) {
3248  if (dst != src1 && dst != src2) {
3249    // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3250    // as XR is a smaller instruction
3251    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3252      ogrk(dst, src1, src2);
3253      return;
3254    } else {
3255      mov(dst, src1);
3256    }
3257  } else if (dst == src2) {
3258    src2 = src1;
3259  }
3260  OrP(dst, src2);
3261}
3262
3263// OR 32-bit (Reg - Mem)
3264void TurboAssembler::Or(Register dst, const MemOperand& opnd) {
3265  DCHECK(is_int20(opnd.offset()));
3266  if (is_uint12(opnd.offset()))
3267    o(dst, opnd);
3268  else
3269    oy(dst, opnd);
3270}
3271
3272// OR Pointer Size (Reg - Mem)
3273void TurboAssembler::OrP(Register dst, const MemOperand& opnd) {
3274  DCHECK(is_int20(opnd.offset()));
3275#if V8_TARGET_ARCH_S390X
3276  og(dst, opnd);
3277#else
3278  Or(dst, opnd);
3279#endif
3280}
3281
3282// OR 32-bit - dst = dst & imm
3283void TurboAssembler::Or(Register dst, const Operand& opnd) { oilf(dst, opnd); }
3284
3285// OR Pointer Size - dst = dst & imm
3286void TurboAssembler::OrP(Register dst, const Operand& opnd) {
3287#if V8_TARGET_ARCH_S390X
3288  intptr_t value = opnd.immediate();
3289  if (value >> 32 != 0) {
3290    // this may not work b/c condition code won't be set correctly
3291    oihf(dst, Operand(value >> 32));
3292  }
3293  oilf(dst, Operand(value & 0xFFFFFFFF));
3294#else
3295  Or(dst, opnd);
3296#endif
3297}
3298
3299// OR 32-bit - dst = src & imm
3300void TurboAssembler::Or(Register dst, Register src, const Operand& opnd) {
3301  if (dst != src) lr(dst, src);
3302  oilf(dst, opnd);
3303}
3304
3305// OR Pointer Size - dst = src & imm
3306void TurboAssembler::OrP(Register dst, Register src, const Operand& opnd) {
3307  if (dst != src) mov(dst, src);
3308  OrP(dst, opnd);
3309}
3310
3311// XOR 32-bit - dst = dst & src
3312void TurboAssembler::Xor(Register dst, Register src) { xr(dst, src); }
3313
3314// XOR Pointer Size - dst = dst & src
3315void TurboAssembler::XorP(Register dst, Register src) { xgr(dst, src); }
3316
3317// Non-clobbering XOR 32-bit - dst = src1 & src1
3318void TurboAssembler::Xor(Register dst, Register src1, Register src2) {
3319  if (dst != src1 && dst != src2) {
3320    // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3321    // as XR is a smaller instruction
3322    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3323      xrk(dst, src1, src2);
3324      return;
3325    } else {
3326      lr(dst, src1);
3327    }
3328  } else if (dst == src2) {
3329    src2 = src1;
3330  }
3331  Xor(dst, src2);
3332}
3333
3334// Non-clobbering XOR pointer size - dst = src1 & src1
3335void TurboAssembler::XorP(Register dst, Register src1, Register src2) {
3336  if (dst != src1 && dst != src2) {
3337    // We prefer to generate XR/XGR, over the non clobbering XRK/XRK
3338    // as XR is a smaller instruction
3339    if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3340      xgrk(dst, src1, src2);
3341      return;
3342    } else {
3343      mov(dst, src1);
3344    }
3345  } else if (dst == src2) {
3346    src2 = src1;
3347  }
3348  XorP(dst, src2);
3349}
3350
3351// XOR 32-bit (Reg - Mem)
3352void TurboAssembler::Xor(Register dst, const MemOperand& opnd) {
3353  DCHECK(is_int20(opnd.offset()));
3354  if (is_uint12(opnd.offset()))
3355    x(dst, opnd);
3356  else
3357    xy(dst, opnd);
3358}
3359
3360// XOR Pointer Size (Reg - Mem)
3361void TurboAssembler::XorP(Register dst, const MemOperand& opnd) {
3362  DCHECK(is_int20(opnd.offset()));
3363#if V8_TARGET_ARCH_S390X
3364  xg(dst, opnd);
3365#else
3366  Xor(dst, opnd);
3367#endif
3368}
3369
3370// XOR 32-bit - dst = dst & imm
3371void TurboAssembler::Xor(Register dst, const Operand& opnd) { xilf(dst, opnd); }
3372
3373// XOR Pointer Size - dst = dst & imm
3374void TurboAssembler::XorP(Register dst, const Operand& opnd) {
3375#if V8_TARGET_ARCH_S390X
3376  intptr_t value = opnd.immediate();
3377  xihf(dst, Operand(value >> 32));
3378  xilf(dst, Operand(value & 0xFFFFFFFF));
3379#else
3380  Xor(dst, opnd);
3381#endif
3382}
3383
3384// XOR 32-bit - dst = src & imm
3385void TurboAssembler::Xor(Register dst, Register src, const Operand& opnd) {
3386  if (dst != src) lr(dst, src);
3387  xilf(dst, opnd);
3388}
3389
3390// XOR Pointer Size - dst = src & imm
3391void TurboAssembler::XorP(Register dst, Register src, const Operand& opnd) {
3392  if (dst != src) mov(dst, src);
3393  XorP(dst, opnd);
3394}
3395
3396void TurboAssembler::Not32(Register dst, Register src) {
3397  if (src != no_reg && src != dst) lr(dst, src);
3398  xilf(dst, Operand(0xFFFFFFFF));
3399}
3400
3401void TurboAssembler::Not64(Register dst, Register src) {
3402  if (src != no_reg && src != dst) lgr(dst, src);
3403  xihf(dst, Operand(0xFFFFFFFF));
3404  xilf(dst, Operand(0xFFFFFFFF));
3405}
3406
3407void TurboAssembler::NotP(Register dst, Register src) {
3408#if V8_TARGET_ARCH_S390X
3409  Not64(dst, src);
3410#else
3411  Not32(dst, src);
3412#endif
3413}
3414
3415void TurboAssembler::LoadPositiveP(Register result, Register input) {
3416#if V8_TARGET_ARCH_S390X
3417  lpgr(result, input);
3418#else
3419  lpr(result, input);
3420#endif
3421}
3422
3423void TurboAssembler::LoadPositive32(Register result, Register input) {
3424  lpr(result, input);
3425  lgfr(result, result);
3426}
3427
3428//-----------------------------------------------------------------------------
3429//  Compare Helpers
3430//-----------------------------------------------------------------------------
3431
3432// Compare 32-bit Register vs Register
3433void TurboAssembler::CmpS32(Register src1, Register src2) { cr_z(src1, src2); }
3434
3435// Compare Pointer Sized Register vs Register
3436void TurboAssembler::CmpS64(Register src1, Register src2) { cgr(src1, src2); }
3437
3438// Compare 32-bit Register vs Immediate
3439// This helper will set up proper relocation entries if required.
3440void TurboAssembler::CmpS32(Register dst, const Operand& opnd) {
3441  if (opnd.rmode() == RelocInfo::NO_INFO) {
3442    intptr_t value = opnd.immediate();
3443    if (is_int16(value))
3444      chi(dst, opnd);
3445    else
3446      cfi(dst, opnd);
3447  } else {
3448    // Need to generate relocation record here
3449    RecordRelocInfo(opnd.rmode(), opnd.immediate());
3450    cfi(dst, opnd);
3451  }
3452}
3453
3454// Compare Pointer Sized  Register vs Immediate
3455// This helper will set up proper relocation entries if required.
3456void TurboAssembler::CmpS64(Register dst, const Operand& opnd) {
3457  if (opnd.rmode() == RelocInfo::NO_INFO) {
3458    cgfi(dst, opnd);
3459  } else {
3460    mov(r0, opnd);  // Need to generate 64-bit relocation
3461    cgr(dst, r0);
3462  }
3463}
3464
3465// Compare 32-bit Register vs Memory
3466void TurboAssembler::CmpS32(Register dst, const MemOperand& opnd) {
3467  // make sure offset is within 20 bit range
3468  DCHECK(is_int20(opnd.offset()));
3469  if (is_uint12(opnd.offset()))
3470    c(dst, opnd);
3471  else
3472    cy(dst, opnd);
3473}
3474
3475// Compare Pointer Size Register vs Memory
3476void TurboAssembler::CmpS64(Register dst, const MemOperand& opnd) {
3477  // make sure offset is within 20 bit range
3478  DCHECK(is_int20(opnd.offset()));
3479  cg(dst, opnd);
3480}
3481
3482// Using cs or scy based on the offset
3483void TurboAssembler::CmpAndSwap(Register old_val, Register new_val,
3484                                const MemOperand& opnd) {
3485  if (is_uint12(opnd.offset())) {
3486    cs(old_val, new_val, opnd);
3487  } else {
3488    csy(old_val, new_val, opnd);
3489  }
3490}
3491
3492void TurboAssembler::CmpAndSwap64(Register old_val, Register new_val,
3493                                  const MemOperand& opnd) {
3494  DCHECK(is_int20(opnd.offset()));
3495  csg(old_val, new_val, opnd);
3496}
3497
3498//-----------------------------------------------------------------------------
3499// Compare Logical Helpers
3500//-----------------------------------------------------------------------------
3501
3502// Compare Logical 32-bit Register vs Register
3503void TurboAssembler::CmpU32(Register dst, Register src) { clr(dst, src); }
3504
3505// Compare Logical Pointer Sized Register vs Register
3506void TurboAssembler::CmpU64(Register dst, Register src) {
3507#ifdef V8_TARGET_ARCH_S390X
3508  clgr(dst, src);
3509#else
3510  CmpU32(dst, src);
3511#endif
3512}
3513
3514// Compare Logical 32-bit Register vs Immediate
3515void TurboAssembler::CmpU32(Register dst, const Operand& opnd) {
3516  clfi(dst, opnd);
3517}
3518
3519// Compare Logical Pointer Sized Register vs Immediate
3520void TurboAssembler::CmpU64(Register dst, const Operand& opnd) {
3521#if V8_TARGET_ARCH_S390X
3522  DCHECK_EQ(static_cast<uint32_t>(opnd.immediate() >> 32), 0);
3523  clgfi(dst, opnd);
3524#else
3525  CmpU32(dst, opnd);
3526#endif
3527}
3528
3529// Compare Logical 32-bit Register vs Memory
3530void TurboAssembler::CmpU32(Register dst, const MemOperand& opnd) {
3531  // make sure offset is within 20 bit range
3532  DCHECK(is_int20(opnd.offset()));
3533  if (is_uint12(opnd.offset()))
3534    cl(dst, opnd);
3535  else
3536    cly(dst, opnd);
3537}
3538
3539// Compare Logical Pointer Sized Register vs Memory
3540void TurboAssembler::CmpU64(Register dst, const MemOperand& opnd) {
3541  // make sure offset is within 20 bit range
3542  DCHECK(is_int20(opnd.offset()));
3543#if V8_TARGET_ARCH_S390X
3544  clg(dst, opnd);
3545#else
3546  CmpU32(dst, opnd);
3547#endif
3548}
3549
3550void TurboAssembler::Branch(Condition c, const Operand& opnd) {
3551  intptr_t value = opnd.immediate();
3552  if (is_int16(value))
3553    brc(c, opnd);
3554  else
3555    brcl(c, opnd);
3556}
3557
3558// Branch On Count.  Decrement R1, and branch if R1 != 0.
3559void TurboAssembler::BranchOnCount(Register r1, Label* l) {
3560  int32_t offset = branch_offset(l);
3561  if (is_int16(offset)) {
3562#if V8_TARGET_ARCH_S390X
3563    brctg(r1, Operand(offset));
3564#else
3565    brct(r1, Operand(offset));
3566#endif
3567  } else {
3568    AddS64(r1, Operand(-1));
3569    Branch(ne, Operand(offset));
3570  }
3571}
3572
3573void TurboAssembler::LoadSmiLiteral(Register dst, Smi smi) {
3574  intptr_t value = static_cast<intptr_t>(smi.ptr());
3575#if defined(V8_COMPRESS_POINTERS) || defined(V8_31BIT_SMIS_ON_64BIT_ARCH)
3576  llilf(dst, Operand(value));
3577#else
3578  DCHECK_EQ(value & 0xFFFFFFFF, 0);
3579  // The smi value is loaded in upper 32-bits.  Lower 32-bit are zeros.
3580  llihf(dst, Operand(value >> 32));
3581#endif
3582}
3583
3584void TurboAssembler::CmpSmiLiteral(Register src1, Smi smi, Register scratch) {
3585#if defined(V8_COMPRESS_POINTERS) || defined(V8_31BIT_SMIS_ON_64BIT_ARCH)
3586  // CFI takes 32-bit immediate.
3587  cfi(src1, Operand(smi));
3588#else
3589  if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
3590    cih(src1, Operand(static_cast<intptr_t>(smi.ptr()) >> 32));
3591  } else {
3592    LoadSmiLiteral(scratch, smi);
3593    cgr(src1, scratch);
3594  }
3595#endif
3596}
3597
3598void TurboAssembler::LoadU64(Register dst, const MemOperand& mem,
3599                             Register scratch) {
3600  int offset = mem.offset();
3601
3602  MemOperand src = mem;
3603  if (!is_int20(offset)) {
3604    DCHECK(scratch != no_reg && scratch != r0 && mem.rx() == r0);
3605    DCHECK(scratch != mem.rb());
3606    mov(scratch, Operand(offset));
3607    src = MemOperand(mem.rb(), scratch);
3608  }
3609  lg(dst, src);
3610}
3611
3612// Store a "pointer" sized value to the memory location
3613void TurboAssembler::StoreU64(Register src, const MemOperand& mem,
3614                              Register scratch) {
3615  if (!is_int20(mem.offset())) {
3616    DCHECK(scratch != no_reg);
3617    DCHECK(scratch != r0);
3618    mov(scratch, Operand(mem.offset()));
3619    stg(src, MemOperand(mem.rb(), scratch));
3620  } else {
3621    stg(src, mem);
3622  }
3623}
3624
3625// Store a "pointer" sized constant to the memory location
3626void TurboAssembler::StoreU64(const MemOperand& mem, const Operand& opnd,
3627                              Register scratch) {
3628  // Relocations not supported
3629  DCHECK_EQ(opnd.rmode(), RelocInfo::NO_INFO);
3630
3631  // Try to use MVGHI/MVHI
3632  if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT) && is_uint12(mem.offset()) &&
3633      mem.getIndexRegister() == r0 && is_int16(opnd.immediate())) {
3634    mvghi(mem, opnd);
3635  } else {
3636    mov(scratch, opnd);
3637    StoreU64(scratch, mem);
3638  }
3639}
3640
3641void TurboAssembler::LoadMultipleP(Register dst1, Register dst2,
3642                                   const MemOperand& mem) {
3643#if V8_TARGET_ARCH_S390X
3644  DCHECK(is_int20(mem.offset()));
3645  lmg(dst1, dst2, mem);
3646#else
3647  if (is_uint12(mem.offset())) {
3648    lm(dst1, dst2, mem);
3649  } else {
3650    DCHECK(is_int20(mem.offset()));
3651    lmy(dst1, dst2, mem);
3652  }
3653#endif
3654}
3655
3656void TurboAssembler::StoreMultipleP(Register src1, Register src2,
3657                                    const MemOperand& mem) {
3658#if V8_TARGET_ARCH_S390X
3659  DCHECK(is_int20(mem.offset()));
3660  stmg(src1, src2, mem);
3661#else
3662  if (is_uint12(mem.offset())) {
3663    stm(src1, src2, mem);
3664  } else {
3665    DCHECK(is_int20(mem.offset()));
3666    stmy(src1, src2, mem);
3667  }
3668#endif
3669}
3670
3671void TurboAssembler::LoadMultipleW(Register dst1, Register dst2,
3672                                   const MemOperand& mem) {
3673  if (is_uint12(mem.offset())) {
3674    lm(dst1, dst2, mem);
3675  } else {
3676    DCHECK(is_int20(mem.offset()));
3677    lmy(dst1, dst2, mem);
3678  }
3679}
3680
3681void TurboAssembler::StoreMultipleW(Register src1, Register src2,
3682                                    const MemOperand& mem) {
3683  if (is_uint12(mem.offset())) {
3684    stm(src1, src2, mem);
3685  } else {
3686    DCHECK(is_int20(mem.offset()));
3687    stmy(src1, src2, mem);
3688  }
3689}
3690
3691// Load 32-bits and sign extend if necessary.
3692void TurboAssembler::LoadS32(Register dst, Register src) {
3693#if V8_TARGET_ARCH_S390X
3694  lgfr(dst, src);
3695#else
3696  if (dst != src) lr(dst, src);
3697#endif
3698}
3699
3700// Load 32-bits and sign extend if necessary.
3701void TurboAssembler::LoadS32(Register dst, const MemOperand& mem,
3702                           Register scratch) {
3703  int offset = mem.offset();
3704
3705  if (!is_int20(offset)) {
3706    DCHECK(scratch != no_reg);
3707    mov(scratch, Operand(offset));
3708#if V8_TARGET_ARCH_S390X
3709    lgf(dst, MemOperand(mem.rb(), scratch));
3710#else
3711    l(dst, MemOperand(mem.rb(), scratch));
3712#endif
3713  } else {
3714#if V8_TARGET_ARCH_S390X
3715    lgf(dst, mem);
3716#else
3717    if (is_uint12(offset)) {
3718      l(dst, mem);
3719    } else {
3720      ly(dst, mem);
3721    }
3722#endif
3723  }
3724}
3725
3726// Load 32-bits and zero extend if necessary.
3727void TurboAssembler::LoadU32(Register dst, Register src) {
3728#if V8_TARGET_ARCH_S390X
3729  llgfr(dst, src);
3730#else
3731  if (dst != src) lr(dst, src);
3732#endif
3733}
3734
3735// Variable length depending on whether offset fits into immediate field
3736// MemOperand of RX or RXY format
3737void TurboAssembler::LoadU32(Register dst, const MemOperand& mem,
3738                            Register scratch) {
3739  Register base = mem.rb();
3740  int offset = mem.offset();
3741
3742#if V8_TARGET_ARCH_S390X
3743  if (is_int20(offset)) {
3744    llgf(dst, mem);
3745  } else if (scratch != no_reg) {
3746    // Materialize offset into scratch register.
3747    mov(scratch, Operand(offset));
3748    llgf(dst, MemOperand(base, scratch));
3749  } else {
3750    DCHECK(false);
3751  }
3752#else
3753  bool use_RXform = false;
3754  bool use_RXYform = false;
3755  if (is_uint12(offset)) {
3756    // RX-format supports unsigned 12-bits offset.
3757    use_RXform = true;
3758  } else if (is_int20(offset)) {
3759    // RXY-format supports signed 20-bits offset.
3760    use_RXYform = true;
3761  } else if (scratch != no_reg) {
3762    // Materialize offset into scratch register.
3763    mov(scratch, Operand(offset));
3764  } else {
3765    DCHECK(false);
3766  }
3767
3768  if (use_RXform) {
3769    l(dst, mem);
3770  } else if (use_RXYform) {
3771    ly(dst, mem);
3772  } else {
3773    ly(dst, MemOperand(base, scratch));
3774  }
3775#endif
3776}
3777
3778void TurboAssembler::LoadU16(Register dst, const MemOperand& mem) {
3779  // TODO(s390x): Add scratch reg
3780#if V8_TARGET_ARCH_S390X
3781  llgh(dst, mem);
3782#else
3783  llh(dst, mem);
3784#endif
3785}
3786
3787void TurboAssembler::LoadU16(Register dst, Register src) {
3788#if V8_TARGET_ARCH_S390X
3789  llghr(dst, src);
3790#else
3791  llhr(dst, src);
3792#endif
3793}
3794
3795void TurboAssembler::LoadS8(Register dst, const MemOperand& mem) {
3796  // TODO(s390x): Add scratch reg
3797#if V8_TARGET_ARCH_S390X
3798  lgb(dst, mem);
3799#else
3800  lb(dst, mem);
3801#endif
3802}
3803
3804void TurboAssembler::LoadS8(Register dst, Register src) {
3805#if V8_TARGET_ARCH_S390X
3806  lgbr(dst, src);
3807#else
3808  lbr(dst, src);
3809#endif
3810}
3811
3812void TurboAssembler::LoadU8(Register dst, const MemOperand& mem) {
3813  // TODO(s390x): Add scratch reg
3814#if V8_TARGET_ARCH_S390X
3815  llgc(dst, mem);
3816#else
3817  llc(dst, mem);
3818#endif
3819}
3820
3821void TurboAssembler::LoadU8(Register dst, Register src) {
3822#if V8_TARGET_ARCH_S390X
3823  llgcr(dst, src);
3824#else
3825  llcr(dst, src);
3826#endif
3827}
3828
3829#ifdef V8_TARGET_BIG_ENDIAN
3830void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
3831                               Register scratch) {
3832  lrvg(dst, mem);
3833}
3834
3835void TurboAssembler::LoadS32LE(Register dst, const MemOperand& opnd,
3836                               Register scratch) {
3837  lrv(dst, opnd);
3838  LoadS32(dst, dst);
3839}
3840
3841void TurboAssembler::LoadU32LE(Register dst, const MemOperand& opnd,
3842                               Register scratch) {
3843  lrv(dst, opnd);
3844  LoadU32(dst, dst);
3845}
3846
3847void TurboAssembler::LoadU16LE(Register dst, const MemOperand& opnd) {
3848  lrvh(dst, opnd);
3849  LoadU16(dst, dst);
3850}
3851
3852void TurboAssembler::LoadS16LE(Register dst, const MemOperand& opnd) {
3853  lrvh(dst, opnd);
3854  LoadS16(dst, dst);
3855}
3856
3857void TurboAssembler::LoadV128LE(DoubleRegister dst, const MemOperand& opnd,
3858                                Register scratch0, Register scratch1) {
3859  bool use_vlbr = CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
3860                  is_uint12(opnd.offset());
3861  if (use_vlbr) {
3862    vlbr(dst, opnd, Condition(4));
3863  } else {
3864    lrvg(scratch0, opnd);
3865    lrvg(scratch1,
3866         MemOperand(opnd.rx(), opnd.rb(), opnd.offset() + kSystemPointerSize));
3867    vlvgp(dst, scratch1, scratch0);
3868  }
3869}
3870
3871void TurboAssembler::LoadF64LE(DoubleRegister dst, const MemOperand& opnd,
3872                               Register scratch) {
3873  lrvg(scratch, opnd);
3874  ldgr(dst, scratch);
3875}
3876
3877void TurboAssembler::LoadF32LE(DoubleRegister dst, const MemOperand& opnd,
3878                               Register scratch) {
3879  lrv(scratch, opnd);
3880  ShiftLeftU64(scratch, scratch, Operand(32));
3881  ldgr(dst, scratch);
3882}
3883
3884void TurboAssembler::StoreU64LE(Register src, const MemOperand& mem,
3885                                Register scratch) {
3886  if (!is_int20(mem.offset())) {
3887    DCHECK(scratch != no_reg);
3888    DCHECK(scratch != r0);
3889    mov(scratch, Operand(mem.offset()));
3890    strvg(src, MemOperand(mem.rb(), scratch));
3891  } else {
3892    strvg(src, mem);
3893  }
3894}
3895
3896void TurboAssembler::StoreU32LE(Register src, const MemOperand& mem,
3897                                Register scratch) {
3898  if (!is_int20(mem.offset())) {
3899    DCHECK(scratch != no_reg);
3900    DCHECK(scratch != r0);
3901    mov(scratch, Operand(mem.offset()));
3902    strv(src, MemOperand(mem.rb(), scratch));
3903  } else {
3904    strv(src, mem);
3905  }
3906}
3907
3908void TurboAssembler::StoreU16LE(Register src, const MemOperand& mem,
3909                                Register scratch) {
3910  if (!is_int20(mem.offset())) {
3911    DCHECK(scratch != no_reg);
3912    DCHECK(scratch != r0);
3913    mov(scratch, Operand(mem.offset()));
3914    strvh(src, MemOperand(mem.rb(), scratch));
3915  } else {
3916    strvh(src, mem);
3917  }
3918}
3919
3920void TurboAssembler::StoreF64LE(DoubleRegister src, const MemOperand& opnd,
3921                                Register scratch) {
3922  DCHECK(is_uint12(opnd.offset()));
3923  lgdr(scratch, src);
3924  strvg(scratch, opnd);
3925}
3926
3927void TurboAssembler::StoreF32LE(DoubleRegister src, const MemOperand& opnd,
3928                                Register scratch) {
3929  DCHECK(is_uint12(opnd.offset()));
3930  lgdr(scratch, src);
3931  ShiftRightU64(scratch, scratch, Operand(32));
3932  strv(scratch, opnd);
3933}
3934
3935void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
3936                                 Register scratch1, Register scratch2) {
3937  bool use_vstbr = CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2) &&
3938                   is_uint12(mem.offset());
3939  if (use_vstbr) {
3940    vstbr(src, mem, Condition(4));
3941  } else {
3942    vlgv(scratch1, src, MemOperand(r0, 1), Condition(3));
3943    vlgv(scratch2, src, MemOperand(r0, 0), Condition(3));
3944    strvg(scratch1, mem);
3945    strvg(scratch2,
3946          MemOperand(mem.rx(), mem.rb(), mem.offset() + kSystemPointerSize));
3947  }
3948}
3949
3950#else
3951void TurboAssembler::LoadU64LE(Register dst, const MemOperand& mem,
3952                               Register scratch) {
3953  LoadU64(dst, mem, scratch);
3954}
3955
3956void TurboAssembler::LoadS32LE(Register dst, const MemOperand& opnd,
3957                               Register scratch) {
3958  LoadS32(dst, opnd, scratch);
3959}
3960
3961void TurboAssembler::LoadU32LE(Register dst, const MemOperand& opnd,
3962                               Register scratch) {
3963  LoadU32(dst, opnd, scratch);
3964}
3965
3966void TurboAssembler::LoadU16LE(Register dst, const MemOperand& opnd) {
3967  LoadU16(dst, opnd);
3968}
3969
3970void TurboAssembler::LoadS16LE(Register dst, const MemOperand& opnd) {
3971  LoadS16(dst, opnd);
3972}
3973
3974void TurboAssembler::LoadV128LE(DoubleRegister dst, const MemOperand& opnd,
3975                                Register scratch0, Register scratch1) {
3976  USE(scratch1);
3977  LoadV128(dst, opnd, scratch0);
3978}
3979
3980void TurboAssembler::LoadF64LE(DoubleRegister dst, const MemOperand& opnd,
3981                               Register scratch) {
3982  USE(scratch);
3983  LoadF64(dst, opnd);
3984}
3985
3986void TurboAssembler::LoadF32LE(DoubleRegister dst, const MemOperand& opnd,
3987                               Register scratch) {
3988  USE(scratch);
3989  LoadF32(dst, opnd);
3990}
3991
3992void TurboAssembler::StoreU64LE(Register src, const MemOperand& mem,
3993                                Register scratch) {
3994  StoreU64(src, mem, scratch);
3995}
3996
3997void TurboAssembler::StoreU32LE(Register src, const MemOperand& mem,
3998                                Register scratch) {
3999  StoreU32(src, mem, scratch);
4000}
4001
4002void TurboAssembler::StoreU16LE(Register src, const MemOperand& mem,
4003                                Register scratch) {
4004  StoreU16(src, mem, scratch);
4005}
4006
4007void TurboAssembler::StoreF64LE(DoubleRegister src, const MemOperand& opnd,
4008                                Register scratch) {
4009  StoreF64(src, opnd);
4010}
4011
4012void TurboAssembler::StoreF32LE(DoubleRegister src, const MemOperand& opnd,
4013                                Register scratch) {
4014  StoreF32(src, opnd);
4015}
4016
4017void TurboAssembler::StoreV128LE(Simd128Register src, const MemOperand& mem,
4018                                 Register scratch1, Register scratch2) {
4019  StoreV128(src, mem, scratch1);
4020}
4021
4022#endif
4023
4024// Load And Test (Reg <- Reg)
4025void TurboAssembler::LoadAndTest32(Register dst, Register src) {
4026  ltr(dst, src);
4027}
4028
4029// Load And Test Pointer Sized (Reg <- Reg)
4030void TurboAssembler::LoadAndTestP(Register dst, Register src) {
4031#if V8_TARGET_ARCH_S390X
4032  ltgr(dst, src);
4033#else
4034  ltr(dst, src);
4035#endif
4036}
4037
4038// Load And Test 32-bit (Reg <- Mem)
4039void TurboAssembler::LoadAndTest32(Register dst, const MemOperand& mem) {
4040  lt_z(dst, mem);
4041}
4042
4043// Load And Test Pointer Sized (Reg <- Mem)
4044void TurboAssembler::LoadAndTestP(Register dst, const MemOperand& mem) {
4045#if V8_TARGET_ARCH_S390X
4046  ltg(dst, mem);
4047#else
4048  lt_z(dst, mem);
4049#endif
4050}
4051
4052// Load On Condition Pointer Sized (Reg <- Reg)
4053void TurboAssembler::LoadOnConditionP(Condition cond, Register dst,
4054                                      Register src) {
4055#if V8_TARGET_ARCH_S390X
4056  locgr(cond, dst, src);
4057#else
4058  locr(cond, dst, src);
4059#endif
4060}
4061
4062// Load Double Precision (64-bit) Floating Point number from memory
4063void TurboAssembler::LoadF64(DoubleRegister dst, const MemOperand& mem) {
4064  // for 32bit and 64bit we all use 64bit floating point regs
4065  if (is_uint12(mem.offset())) {
4066    ld(dst, mem);
4067  } else {
4068    ldy(dst, mem);
4069  }
4070}
4071
4072// Load Single Precision (32-bit) Floating Point number from memory
4073void TurboAssembler::LoadF32(DoubleRegister dst, const MemOperand& mem) {
4074  if (is_uint12(mem.offset())) {
4075    le_z(dst, mem);
4076  } else {
4077    DCHECK(is_int20(mem.offset()));
4078    ley(dst, mem);
4079  }
4080}
4081
4082void TurboAssembler::LoadV128(Simd128Register dst, const MemOperand& mem,
4083                              Register scratch) {
4084  DCHECK(scratch != r0);
4085  if (is_uint12(mem.offset())) {
4086    vl(dst, mem, Condition(0));
4087  } else {
4088    DCHECK(is_int20(mem.offset()));
4089    lay(scratch, mem);
4090    vl(dst, MemOperand(scratch), Condition(0));
4091  }
4092}
4093
4094// Store Double Precision (64-bit) Floating Point number to memory
4095void TurboAssembler::StoreF64(DoubleRegister dst, const MemOperand& mem) {
4096  if (is_uint12(mem.offset())) {
4097    std(dst, mem);
4098  } else {
4099    stdy(dst, mem);
4100  }
4101}
4102
4103// Store Single Precision (32-bit) Floating Point number to memory
4104void TurboAssembler::StoreF32(DoubleRegister src, const MemOperand& mem) {
4105  if (is_uint12(mem.offset())) {
4106    ste(src, mem);
4107  } else {
4108    stey(src, mem);
4109  }
4110}
4111
4112void TurboAssembler::StoreV128(Simd128Register src, const MemOperand& mem,
4113                               Register scratch) {
4114  DCHECK(scratch != r0);
4115  if (is_uint12(mem.offset())) {
4116    vst(src, mem, Condition(0));
4117  } else {
4118    DCHECK(is_int20(mem.offset()));
4119    lay(scratch, mem);
4120    vst(src, MemOperand(scratch), Condition(0));
4121  }
4122}
4123
4124void TurboAssembler::AddF32(DoubleRegister dst, DoubleRegister lhs,
4125                            DoubleRegister rhs) {
4126  if (dst == lhs) {
4127    aebr(dst, rhs);
4128  } else if (dst == rhs) {
4129    aebr(dst, lhs);
4130  } else {
4131    ler(dst, lhs);
4132    aebr(dst, rhs);
4133  }
4134}
4135
4136void TurboAssembler::SubF32(DoubleRegister dst, DoubleRegister lhs,
4137                            DoubleRegister rhs) {
4138  if (dst == lhs) {
4139    sebr(dst, rhs);
4140  } else if (dst == rhs) {
4141    sebr(dst, lhs);
4142    lcebr(dst, dst);
4143  } else {
4144    ler(dst, lhs);
4145    sebr(dst, rhs);
4146  }
4147}
4148
4149void TurboAssembler::MulF32(DoubleRegister dst, DoubleRegister lhs,
4150                            DoubleRegister rhs) {
4151  if (dst == lhs) {
4152    meebr(dst, rhs);
4153  } else if (dst == rhs) {
4154    meebr(dst, lhs);
4155  } else {
4156    ler(dst, lhs);
4157    meebr(dst, rhs);
4158  }
4159}
4160
4161void TurboAssembler::DivF32(DoubleRegister dst, DoubleRegister lhs,
4162                            DoubleRegister rhs) {
4163  if (dst == lhs) {
4164    debr(dst, rhs);
4165  } else if (dst == rhs) {
4166    lay(sp, MemOperand(sp, -kSystemPointerSize));
4167    StoreF32(dst, MemOperand(sp));
4168    ler(dst, lhs);
4169    deb(dst, MemOperand(sp));
4170    la(sp, MemOperand(sp, kSystemPointerSize));
4171  } else {
4172    ler(dst, lhs);
4173    debr(dst, rhs);
4174  }
4175}
4176
4177void TurboAssembler::AddF64(DoubleRegister dst, DoubleRegister lhs,
4178                            DoubleRegister rhs) {
4179  if (dst == lhs) {
4180    adbr(dst, rhs);
4181  } else if (dst == rhs) {
4182    adbr(dst, lhs);
4183  } else {
4184    ldr(dst, lhs);
4185    adbr(dst, rhs);
4186  }
4187}
4188
4189void TurboAssembler::SubF64(DoubleRegister dst, DoubleRegister lhs,
4190                            DoubleRegister rhs) {
4191  if (dst == lhs) {
4192    sdbr(dst, rhs);
4193  } else if (dst == rhs) {
4194    sdbr(dst, lhs);
4195    lcdbr(dst, dst);
4196  } else {
4197    ldr(dst, lhs);
4198    sdbr(dst, rhs);
4199  }
4200}
4201
4202void TurboAssembler::MulF64(DoubleRegister dst, DoubleRegister lhs,
4203                            DoubleRegister rhs) {
4204  if (dst == lhs) {
4205    mdbr(dst, rhs);
4206  } else if (dst == rhs) {
4207    mdbr(dst, lhs);
4208  } else {
4209    ldr(dst, lhs);
4210    mdbr(dst, rhs);
4211  }
4212}
4213
4214void TurboAssembler::DivF64(DoubleRegister dst, DoubleRegister lhs,
4215                            DoubleRegister rhs) {
4216  if (dst == lhs) {
4217    ddbr(dst, rhs);
4218  } else if (dst == rhs) {
4219    lay(sp, MemOperand(sp, -kSystemPointerSize));
4220    StoreF64(dst, MemOperand(sp));
4221    ldr(dst, lhs);
4222    ddb(dst, MemOperand(sp));
4223    la(sp, MemOperand(sp, kSystemPointerSize));
4224  } else {
4225    ldr(dst, lhs);
4226    ddbr(dst, rhs);
4227  }
4228}
4229
4230void TurboAssembler::AddFloat32(DoubleRegister dst, const MemOperand& opnd,
4231                                DoubleRegister scratch) {
4232  if (is_uint12(opnd.offset())) {
4233    aeb(dst, opnd);
4234  } else {
4235    ley(scratch, opnd);
4236    aebr(dst, scratch);
4237  }
4238}
4239
4240void TurboAssembler::AddFloat64(DoubleRegister dst, const MemOperand& opnd,
4241                                DoubleRegister scratch) {
4242  if (is_uint12(opnd.offset())) {
4243    adb(dst, opnd);
4244  } else {
4245    ldy(scratch, opnd);
4246    adbr(dst, scratch);
4247  }
4248}
4249
4250void TurboAssembler::SubFloat32(DoubleRegister dst, const MemOperand& opnd,
4251                                DoubleRegister scratch) {
4252  if (is_uint12(opnd.offset())) {
4253    seb(dst, opnd);
4254  } else {
4255    ley(scratch, opnd);
4256    sebr(dst, scratch);
4257  }
4258}
4259
4260void TurboAssembler::SubFloat64(DoubleRegister dst, const MemOperand& opnd,
4261                                DoubleRegister scratch) {
4262  if (is_uint12(opnd.offset())) {
4263    sdb(dst, opnd);
4264  } else {
4265    ldy(scratch, opnd);
4266    sdbr(dst, scratch);
4267  }
4268}
4269
4270void TurboAssembler::MulFloat32(DoubleRegister dst, const MemOperand& opnd,
4271                                DoubleRegister scratch) {
4272  if (is_uint12(opnd.offset())) {
4273    meeb(dst, opnd);
4274  } else {
4275    ley(scratch, opnd);
4276    meebr(dst, scratch);
4277  }
4278}
4279
4280void TurboAssembler::MulFloat64(DoubleRegister dst, const MemOperand& opnd,
4281                                DoubleRegister scratch) {
4282  if (is_uint12(opnd.offset())) {
4283    mdb(dst, opnd);
4284  } else {
4285    ldy(scratch, opnd);
4286    mdbr(dst, scratch);
4287  }
4288}
4289
4290void TurboAssembler::DivFloat32(DoubleRegister dst, const MemOperand& opnd,
4291                                DoubleRegister scratch) {
4292  if (is_uint12(opnd.offset())) {
4293    deb(dst, opnd);
4294  } else {
4295    ley(scratch, opnd);
4296    debr(dst, scratch);
4297  }
4298}
4299
4300void TurboAssembler::DivFloat64(DoubleRegister dst, const MemOperand& opnd,
4301                                DoubleRegister scratch) {
4302  if (is_uint12(opnd.offset())) {
4303    ddb(dst, opnd);
4304  } else {
4305    ldy(scratch, opnd);
4306    ddbr(dst, scratch);
4307  }
4308}
4309
4310void TurboAssembler::LoadF32AsF64(DoubleRegister dst, const MemOperand& opnd,
4311                                  DoubleRegister scratch) {
4312  if (is_uint12(opnd.offset())) {
4313    ldeb(dst, opnd);
4314  } else {
4315    ley(scratch, opnd);
4316    ldebr(dst, scratch);
4317  }
4318}
4319
4320// Variable length depending on whether offset fits into immediate field
4321// MemOperand of RX or RXY format
4322void TurboAssembler::StoreU32(Register src, const MemOperand& mem,
4323                              Register scratch) {
4324  Register base = mem.rb();
4325  int offset = mem.offset();
4326
4327  bool use_RXform = false;
4328  bool use_RXYform = false;
4329
4330  if (is_uint12(offset)) {
4331    // RX-format supports unsigned 12-bits offset.
4332    use_RXform = true;
4333  } else if (is_int20(offset)) {
4334    // RXY-format supports signed 20-bits offset.
4335    use_RXYform = true;
4336  } else if (scratch != no_reg) {
4337    // Materialize offset into scratch register.
4338    mov(scratch, Operand(offset));
4339  } else {
4340    // scratch is no_reg
4341    DCHECK(false);
4342  }
4343
4344  if (use_RXform) {
4345    st(src, mem);
4346  } else if (use_RXYform) {
4347    sty(src, mem);
4348  } else {
4349    StoreU32(src, MemOperand(base, scratch));
4350  }
4351}
4352
4353void TurboAssembler::LoadS16(Register dst, Register src) {
4354#if V8_TARGET_ARCH_S390X
4355  lghr(dst, src);
4356#else
4357  lhr(dst, src);
4358#endif
4359}
4360
4361// Loads 16-bits half-word value from memory and sign extends to pointer
4362// sized register
4363void TurboAssembler::LoadS16(Register dst, const MemOperand& mem,
4364                                   Register scratch) {
4365  Register base = mem.rb();
4366  int offset = mem.offset();
4367
4368  if (!is_int20(offset)) {
4369    DCHECK(scratch != no_reg);
4370    mov(scratch, Operand(offset));
4371#if V8_TARGET_ARCH_S390X
4372    lgh(dst, MemOperand(base, scratch));
4373#else
4374    lh(dst, MemOperand(base, scratch));
4375#endif
4376  } else {
4377#if V8_TARGET_ARCH_S390X
4378    lgh(dst, mem);
4379#else
4380    if (is_uint12(offset)) {
4381      lh(dst, mem);
4382    } else {
4383      lhy(dst, mem);
4384    }
4385#endif
4386  }
4387}
4388
4389// Variable length depending on whether offset fits into immediate field
4390// MemOperand current only supports d-form
4391void TurboAssembler::StoreU16(Register src, const MemOperand& mem,
4392                              Register scratch) {
4393  Register base = mem.rb();
4394  int offset = mem.offset();
4395
4396  if (is_uint12(offset)) {
4397    sth(src, mem);
4398  } else if (is_int20(offset)) {
4399    sthy(src, mem);
4400  } else {
4401    DCHECK(scratch != no_reg);
4402    mov(scratch, Operand(offset));
4403    sth(src, MemOperand(base, scratch));
4404  }
4405}
4406
4407// Variable length depending on whether offset fits into immediate field
4408// MemOperand current only supports d-form
4409void TurboAssembler::StoreU8(Register src, const MemOperand& mem,
4410                             Register scratch) {
4411  Register base = mem.rb();
4412  int offset = mem.offset();
4413
4414  if (is_uint12(offset)) {
4415    stc(src, mem);
4416  } else if (is_int20(offset)) {
4417    stcy(src, mem);
4418  } else {
4419    DCHECK(scratch != no_reg);
4420    mov(scratch, Operand(offset));
4421    stc(src, MemOperand(base, scratch));
4422  }
4423}
4424
4425// Shift left logical for 32-bit integer types.
4426void TurboAssembler::ShiftLeftU32(Register dst, Register src,
4427                                  const Operand& val) {
4428  ShiftLeftU32(dst, src, r0, val);
4429}
4430
4431// Shift left logical for 32-bit integer types.
4432void TurboAssembler::ShiftLeftU32(Register dst, Register src, Register val,
4433                                  const Operand& val2) {
4434  if (dst == src) {
4435    sll(dst, val, val2);
4436  } else if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
4437    sllk(dst, src, val, val2);
4438  } else {
4439    DCHECK(dst != val || val == r0);  // The lr/sll path clobbers val.
4440    lr(dst, src);
4441    sll(dst, val, val2);
4442  }
4443}
4444
4445// Shift left logical for 32-bit integer types.
4446void TurboAssembler::ShiftLeftU64(Register dst, Register src,
4447                                  const Operand& val) {
4448  ShiftLeftU64(dst, src, r0, val);
4449}
4450
4451// Shift left logical for 32-bit integer types.
4452void TurboAssembler::ShiftLeftU64(Register dst, Register src, Register val,
4453                                  const Operand& val2) {
4454  sllg(dst, src, val, val2);
4455}
4456
4457// Shift right logical for 32-bit integer types.
4458void TurboAssembler::ShiftRightU32(Register dst, Register src,
4459                                   const Operand& val) {
4460  ShiftRightU32(dst, src, r0, val);
4461}
4462
4463// Shift right logical for 32-bit integer types.
4464void TurboAssembler::ShiftRightU32(Register dst, Register src, Register val,
4465                                   const Operand& val2) {
4466  if (dst == src) {
4467    srl(dst, val, val2);
4468  } else if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
4469    srlk(dst, src, val, val2);
4470  } else {
4471    DCHECK(dst != val || val == r0);  // The lr/srl path clobbers val.
4472    lr(dst, src);
4473    srl(dst, val, val2);
4474  }
4475}
4476
4477void TurboAssembler::ShiftRightU64(Register dst, Register src, Register val,
4478                                   const Operand& val2) {
4479  srlg(dst, src, val, val2);
4480}
4481
4482// Shift right logical for 64-bit integer types.
4483void TurboAssembler::ShiftRightU64(Register dst, Register src,
4484                                   const Operand& val) {
4485  ShiftRightU64(dst, src, r0, val);
4486}
4487
4488// Shift right arithmetic for 32-bit integer types.
4489void TurboAssembler::ShiftRightS32(Register dst, Register src,
4490                                   const Operand& val) {
4491  ShiftRightS32(dst, src, r0, val);
4492}
4493
4494// Shift right arithmetic for 32-bit integer types.
4495void TurboAssembler::ShiftRightS32(Register dst, Register src, Register val,
4496                                   const Operand& val2) {
4497  if (dst == src) {
4498    sra(dst, val, val2);
4499  } else if (CpuFeatures::IsSupported(DISTINCT_OPS)) {
4500    srak(dst, src, val, val2);
4501  } else {
4502    DCHECK(dst != val || val == r0);  // The lr/sra path clobbers val.
4503    lr(dst, src);
4504    sra(dst, val, val2);
4505  }
4506}
4507
4508// Shift right arithmetic for 64-bit integer types.
4509void TurboAssembler::ShiftRightS64(Register dst, Register src,
4510                                   const Operand& val) {
4511  ShiftRightS64(dst, src, r0, val);
4512}
4513
4514// Shift right arithmetic for 64-bit integer types.
4515void TurboAssembler::ShiftRightS64(Register dst, Register src, Register val,
4516                                   const Operand& val2) {
4517  srag(dst, src, val, val2);
4518}
4519
4520// Clear right most # of bits
4521void TurboAssembler::ClearRightImm(Register dst, Register src,
4522                                   const Operand& val) {
4523  int numBitsToClear = val.immediate() % (kSystemPointerSize * 8);
4524
4525  // Try to use RISBG if possible
4526  if (CpuFeatures::IsSupported(GENERAL_INSTR_EXT)) {
4527    int endBit = 63 - numBitsToClear;
4528    RotateInsertSelectBits(dst, src, Operand::Zero(), Operand(endBit),
4529                           Operand::Zero(), true);
4530    return;
4531  }
4532
4533  uint64_t hexMask = ~((1L << numBitsToClear) - 1);
4534
4535  // S390 AND instr clobbers source.  Make a copy if necessary
4536  if (dst != src) mov(dst, src);
4537
4538  if (numBitsToClear <= 16) {
4539    nill(dst, Operand(static_cast<uint16_t>(hexMask)));
4540  } else if (numBitsToClear <= 32) {
4541    nilf(dst, Operand(static_cast<uint32_t>(hexMask)));
4542  } else if (numBitsToClear <= 64) {
4543    nilf(dst, Operand(static_cast<intptr_t>(0)));
4544    nihf(dst, Operand(hexMask >> 32));
4545  }
4546}
4547
4548void TurboAssembler::Popcnt32(Register dst, Register src) {
4549  DCHECK(src != r0);
4550  DCHECK(dst != r0);
4551
4552  popcnt(dst, src);
4553  ShiftRightU32(r0, dst, Operand(16));
4554  ar(dst, r0);
4555  ShiftRightU32(r0, dst, Operand(8));
4556  ar(dst, r0);
4557  llgcr(dst, dst);
4558}
4559
4560#ifdef V8_TARGET_ARCH_S390X
4561void TurboAssembler::Popcnt64(Register dst, Register src) {
4562  DCHECK(src != r0);
4563  DCHECK(dst != r0);
4564
4565  popcnt(dst, src);
4566  ShiftRightU64(r0, dst, Operand(32));
4567  AddS64(dst, r0);
4568  ShiftRightU64(r0, dst, Operand(16));
4569  AddS64(dst, r0);
4570  ShiftRightU64(r0, dst, Operand(8));
4571  AddS64(dst, r0);
4572  LoadU8(dst, dst);
4573}
4574#endif
4575
4576void TurboAssembler::SwapP(Register src, Register dst, Register scratch) {
4577  if (src == dst) return;
4578  DCHECK(!AreAliased(src, dst, scratch));
4579  mov(scratch, src);
4580  mov(src, dst);
4581  mov(dst, scratch);
4582}
4583
4584void TurboAssembler::SwapP(Register src, MemOperand dst, Register scratch) {
4585  if (dst.rx() != r0) DCHECK(!AreAliased(src, dst.rx(), scratch));
4586  if (dst.rb() != r0) DCHECK(!AreAliased(src, dst.rb(), scratch));
4587  DCHECK(!AreAliased(src, scratch));
4588  mov(scratch, src);
4589  LoadU64(src, dst);
4590  StoreU64(scratch, dst);
4591}
4592
4593void TurboAssembler::SwapP(MemOperand src, MemOperand dst, Register scratch_0,
4594                           Register scratch_1) {
4595  if (src.rx() != r0) DCHECK(!AreAliased(src.rx(), scratch_0, scratch_1));
4596  if (src.rb() != r0) DCHECK(!AreAliased(src.rb(), scratch_0, scratch_1));
4597  if (dst.rx() != r0) DCHECK(!AreAliased(dst.rx(), scratch_0, scratch_1));
4598  if (dst.rb() != r0) DCHECK(!AreAliased(dst.rb(), scratch_0, scratch_1));
4599  DCHECK(!AreAliased(scratch_0, scratch_1));
4600  LoadU64(scratch_0, src);
4601  LoadU64(scratch_1, dst);
4602  StoreU64(scratch_0, dst);
4603  StoreU64(scratch_1, src);
4604}
4605
4606void TurboAssembler::SwapFloat32(DoubleRegister src, DoubleRegister dst,
4607                                 DoubleRegister scratch) {
4608  if (src == dst) return;
4609  DCHECK(!AreAliased(src, dst, scratch));
4610  ldr(scratch, src);
4611  ldr(src, dst);
4612  ldr(dst, scratch);
4613}
4614
4615void TurboAssembler::SwapFloat32(DoubleRegister src, MemOperand dst,
4616                                 DoubleRegister scratch) {
4617  DCHECK(!AreAliased(src, scratch));
4618  ldr(scratch, src);
4619  LoadF32(src, dst);
4620  StoreF32(scratch, dst);
4621}
4622
4623void TurboAssembler::SwapFloat32(MemOperand src, MemOperand dst,
4624                                 DoubleRegister scratch) {
4625  // push d0, to be used as scratch
4626  lay(sp, MemOperand(sp, -kDoubleSize));
4627  StoreF64(d0, MemOperand(sp));
4628  LoadF32(scratch, src);
4629  LoadF32(d0, dst);
4630  StoreF32(scratch, dst);
4631  StoreF32(d0, src);
4632  // restore d0
4633  LoadF64(d0, MemOperand(sp));
4634  lay(sp, MemOperand(sp, kDoubleSize));
4635}
4636
4637void TurboAssembler::SwapDouble(DoubleRegister src, DoubleRegister dst,
4638                                DoubleRegister scratch) {
4639  if (src == dst) return;
4640  DCHECK(!AreAliased(src, dst, scratch));
4641  ldr(scratch, src);
4642  ldr(src, dst);
4643  ldr(dst, scratch);
4644}
4645
4646void TurboAssembler::SwapDouble(DoubleRegister src, MemOperand dst,
4647                                DoubleRegister scratch) {
4648  DCHECK(!AreAliased(src, scratch));
4649  ldr(scratch, src);
4650  LoadF64(src, dst);
4651  StoreF64(scratch, dst);
4652}
4653
4654void TurboAssembler::SwapDouble(MemOperand src, MemOperand dst,
4655                                DoubleRegister scratch) {
4656  // push d0, to be used as scratch
4657  lay(sp, MemOperand(sp, -kDoubleSize));
4658  StoreF64(d0, MemOperand(sp));
4659  LoadF64(scratch, src);
4660  LoadF64(d0, dst);
4661  StoreF64(scratch, dst);
4662  StoreF64(d0, src);
4663  // restore d0
4664  LoadF64(d0, MemOperand(sp));
4665  lay(sp, MemOperand(sp, kDoubleSize));
4666}
4667
4668void TurboAssembler::SwapSimd128(Simd128Register src, Simd128Register dst,
4669                                 Simd128Register scratch) {
4670  if (src == dst) return;
4671  vlr(scratch, src, Condition(0), Condition(0), Condition(0));
4672  vlr(src, dst, Condition(0), Condition(0), Condition(0));
4673  vlr(dst, scratch, Condition(0), Condition(0), Condition(0));
4674}
4675
4676void TurboAssembler::SwapSimd128(Simd128Register src, MemOperand dst,
4677                                 Simd128Register scratch) {
4678  DCHECK(!AreAliased(src, scratch));
4679  vlr(scratch, src, Condition(0), Condition(0), Condition(0));
4680  LoadV128(src, dst, ip);
4681  StoreV128(scratch, dst, ip);
4682}
4683
4684void TurboAssembler::SwapSimd128(MemOperand src, MemOperand dst,
4685                                 Simd128Register scratch) {
4686  // push d0, to be used as scratch
4687  lay(sp, MemOperand(sp, -kSimd128Size));
4688  StoreV128(d0, MemOperand(sp), ip);
4689  LoadV128(scratch, src, ip);
4690  LoadV128(d0, dst, ip);
4691  StoreV128(scratch, dst, ip);
4692  StoreV128(d0, src, ip);
4693  // restore d0
4694  LoadV128(d0, MemOperand(sp), ip);
4695  lay(sp, MemOperand(sp, kSimd128Size));
4696}
4697
4698void TurboAssembler::ComputeCodeStartAddress(Register dst) {
4699  larl(dst, Operand(-pc_offset() / 2));
4700}
4701
4702void TurboAssembler::LoadPC(Register dst) {
4703  Label current_pc;
4704  larl(dst, &current_pc);
4705  bind(&current_pc);
4706}
4707
4708void TurboAssembler::JumpIfEqual(Register x, int32_t y, Label* dest) {
4709  CmpS32(x, Operand(y));
4710  beq(dest);
4711}
4712
4713void TurboAssembler::JumpIfLessThan(Register x, int32_t y, Label* dest) {
4714  CmpS32(x, Operand(y));
4715  blt(dest);
4716}
4717
4718void TurboAssembler::LoadEntryFromBuiltinIndex(Register builtin_index) {
4719  STATIC_ASSERT(kSystemPointerSize == 8);
4720  STATIC_ASSERT(kSmiTagSize == 1);
4721  STATIC_ASSERT(kSmiTag == 0);
4722  // The builtin_index register contains the builtin index as a Smi.
4723  if (SmiValuesAre32Bits()) {
4724    ShiftRightS64(builtin_index, builtin_index,
4725                  Operand(kSmiShift - kSystemPointerSizeLog2));
4726  } else {
4727    DCHECK(SmiValuesAre31Bits());
4728    ShiftLeftU64(builtin_index, builtin_index,
4729                 Operand(kSystemPointerSizeLog2 - kSmiShift));
4730  }
4731  LoadU64(builtin_index, MemOperand(kRootRegister, builtin_index,
4732                                    IsolateData::builtin_entry_table_offset()));
4733}
4734
4735void TurboAssembler::CallBuiltinByIndex(Register builtin_index) {
4736  LoadEntryFromBuiltinIndex(builtin_index);
4737  Call(builtin_index);
4738}
4739
4740void TurboAssembler::LoadEntryFromBuiltin(Builtin builtin,
4741                                          Register destination) {
4742  ASM_CODE_COMMENT(this);
4743  LoadU64(destination, EntryFromBuiltinAsOperand(builtin));
4744}
4745
4746MemOperand TurboAssembler::EntryFromBuiltinAsOperand(Builtin builtin) {
4747  ASM_CODE_COMMENT(this);
4748  DCHECK(root_array_available());
4749  return MemOperand(kRootRegister,
4750                    IsolateData::BuiltinEntrySlotOffset(builtin));
4751}
4752
4753void TurboAssembler::LoadCodeObjectEntry(Register destination,
4754                                         Register code_object) {
4755  // Code objects are called differently depending on whether we are generating
4756  // builtin code (which will later be embedded into the binary) or compiling
4757  // user JS code at runtime.
4758  // * Builtin code runs in --jitless mode and thus must not call into on-heap
4759  //   Code targets. Instead, we dispatch through the builtins entry table.
4760  // * Codegen at runtime does not have this restriction and we can use the
4761  //   shorter, branchless instruction sequence. The assumption here is that
4762  //   targets are usually generated code and not builtin Code objects.
4763
4764  if (options().isolate_independent_code) {
4765    DCHECK(root_array_available());
4766    Label if_code_is_off_heap, out;
4767
4768    Register scratch = r1;
4769
4770    DCHECK(!AreAliased(destination, scratch));
4771    DCHECK(!AreAliased(code_object, scratch));
4772
4773    // Check whether the Code object is an off-heap trampoline. If so, call its
4774    // (off-heap) entry point directly without going through the (on-heap)
4775    // trampoline.  Otherwise, just call the Code object as always.
4776    LoadS32(scratch, FieldMemOperand(code_object, Code::kFlagsOffset));
4777    tmlh(scratch, Operand(Code::IsOffHeapTrampoline::kMask >> 16));
4778    bne(&if_code_is_off_heap);
4779
4780    // Not an off-heap trampoline, the entry point is at
4781    // Code::raw_instruction_start().
4782    AddS64(destination, code_object,
4783           Operand(Code::kHeaderSize - kHeapObjectTag));
4784    b(&out);
4785
4786    // An off-heap trampoline, the entry point is loaded from the builtin entry
4787    // table.
4788    bind(&if_code_is_off_heap);
4789    LoadS32(scratch, FieldMemOperand(code_object, Code::kBuiltinIndexOffset));
4790    ShiftLeftU64(destination, scratch, Operand(kSystemPointerSizeLog2));
4791    AddS64(destination, destination, kRootRegister);
4792    LoadU64(destination,
4793            MemOperand(destination, IsolateData::builtin_entry_table_offset()));
4794
4795    bind(&out);
4796  } else {
4797    AddS64(destination, code_object,
4798           Operand(Code::kHeaderSize - kHeapObjectTag));
4799  }
4800}
4801
4802void TurboAssembler::CallCodeObject(Register code_object) {
4803  LoadCodeObjectEntry(code_object, code_object);
4804  Call(code_object);
4805}
4806
4807void TurboAssembler::JumpCodeObject(Register code_object, JumpMode jump_mode) {
4808  DCHECK_EQ(JumpMode::kJump, jump_mode);
4809  LoadCodeObjectEntry(code_object, code_object);
4810  Jump(code_object);
4811}
4812
4813void TurboAssembler::StoreReturnAddressAndCall(Register target) {
4814  // This generates the final instruction sequence for calls to C functions
4815  // once an exit frame has been constructed.
4816  //
4817  // Note that this assumes the caller code (i.e. the Code object currently
4818  // being generated) is immovable or that the callee function cannot trigger
4819  // GC, since the callee function will return to it.
4820
4821  Label return_label;
4822  larl(r14, &return_label);  // Generate the return addr of call later.
4823  StoreU64(r14, MemOperand(sp, kStackFrameRASlot * kSystemPointerSize));
4824
4825  // zLinux ABI requires caller's frame to have sufficient space for callee
4826  // preserved regsiter save area.
4827  b(target);
4828  bind(&return_label);
4829}
4830
4831void TurboAssembler::CallForDeoptimization(Builtin target, int, Label* exit,
4832                                           DeoptimizeKind kind, Label* ret,
4833                                           Label*) {
4834  ASM_CODE_COMMENT(this);
4835  LoadU64(ip, MemOperand(kRootRegister,
4836                         IsolateData::BuiltinEntrySlotOffset(target)));
4837  Call(ip);
4838  DCHECK_EQ(SizeOfCodeGeneratedSince(exit),
4839            (kind == DeoptimizeKind::kLazy) ? Deoptimizer::kLazyDeoptExitSize
4840                                            : Deoptimizer::kEagerDeoptExitSize);
4841}
4842
4843void TurboAssembler::Trap() { stop(); }
4844void TurboAssembler::DebugBreak() { stop(); }
4845
4846void TurboAssembler::CountLeadingZerosU32(Register dst, Register src,
4847                                          Register scratch_pair) {
4848  llgfr(dst, src);
4849  flogr(scratch_pair,
4850        dst);  // will modify a register pair scratch and scratch + 1
4851  AddS32(dst, scratch_pair, Operand(-32));
4852}
4853
4854void TurboAssembler::CountLeadingZerosU64(Register dst, Register src,
4855                                          Register scratch_pair) {
4856  flogr(scratch_pair,
4857        src);  // will modify a register pair scratch and scratch + 1
4858  mov(dst, scratch_pair);
4859}
4860
4861void TurboAssembler::CountTrailingZerosU32(Register dst, Register src,
4862                                           Register scratch_pair) {
4863  Register scratch0 = scratch_pair;
4864  Register scratch1 = Register::from_code(scratch_pair.code() + 1);
4865  DCHECK(!AreAliased(dst, scratch0, scratch1));
4866  DCHECK(!AreAliased(src, scratch0, scratch1));
4867
4868  Label done;
4869  // Check if src is all zeros.
4870  ltr(scratch1, src);
4871  mov(dst, Operand(32));
4872  beq(&done);
4873  llgfr(scratch1, scratch1);
4874  lcgr(scratch0, scratch1);
4875  ngr(scratch1, scratch0);
4876  flogr(scratch0, scratch1);
4877  mov(dst, Operand(63));
4878  SubS64(dst, scratch0);
4879  bind(&done);
4880}
4881
4882void TurboAssembler::CountTrailingZerosU64(Register dst, Register src,
4883                                           Register scratch_pair) {
4884  Register scratch0 = scratch_pair;
4885  Register scratch1 = Register::from_code(scratch_pair.code() + 1);
4886  DCHECK(!AreAliased(dst, scratch0, scratch1));
4887  DCHECK(!AreAliased(src, scratch0, scratch1));
4888
4889  Label done;
4890  // Check if src is all zeros.
4891  ltgr(scratch1, src);
4892  mov(dst, Operand(64));
4893  beq(&done);
4894  lcgr(scratch0, scratch1);
4895  ngr(scratch0, scratch1);
4896  flogr(scratch0, scratch0);
4897  mov(dst, Operand(63));
4898  SubS64(dst, scratch0);
4899  bind(&done);
4900}
4901
4902void TurboAssembler::AtomicCmpExchangeHelper(Register addr, Register output,
4903                                             Register old_value,
4904                                             Register new_value, int start,
4905                                             int end, int shift_amount,
4906                                             int offset, Register temp0,
4907                                             Register temp1) {
4908  LoadU32(temp0, MemOperand(addr, offset));
4909  llgfr(temp1, temp0);
4910  RotateInsertSelectBits(temp0, old_value, Operand(start), Operand(end),
4911                         Operand(shift_amount), false);
4912  RotateInsertSelectBits(temp1, new_value, Operand(start), Operand(end),
4913                         Operand(shift_amount), false);
4914  CmpAndSwap(temp0, temp1, MemOperand(addr, offset));
4915  RotateInsertSelectBits(output, temp0, Operand(start + shift_amount),
4916                         Operand(end + shift_amount),
4917                         Operand(64 - shift_amount), true);
4918}
4919
4920void TurboAssembler::AtomicCmpExchangeU8(Register addr, Register output,
4921                                         Register old_value, Register new_value,
4922                                         Register temp0, Register temp1) {
4923#ifdef V8_TARGET_BIG_ENDIAN
4924#define ATOMIC_COMP_EXCHANGE_BYTE(i)                                        \
4925  {                                                                         \
4926    constexpr int idx = (i);                                                \
4927    static_assert(idx <= 3 && idx >= 0, "idx is out of range!");            \
4928    constexpr int start = 32 + 8 * idx;                                     \
4929    constexpr int end = start + 7;                                          \
4930    constexpr int shift_amount = (3 - idx) * 8;                             \
4931    AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4932                            shift_amount, -idx, temp0, temp1);              \
4933  }
4934#else
4935#define ATOMIC_COMP_EXCHANGE_BYTE(i)                                        \
4936  {                                                                         \
4937    constexpr int idx = (i);                                                \
4938    static_assert(idx <= 3 && idx >= 0, "idx is out of range!");            \
4939    constexpr int start = 32 + 8 * (3 - idx);                               \
4940    constexpr int end = start + 7;                                          \
4941    constexpr int shift_amount = idx * 8;                                   \
4942    AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4943                            shift_amount, -idx, temp0, temp1);              \
4944  }
4945#endif
4946
4947  Label one, two, three, done;
4948  tmll(addr, Operand(3));
4949  b(Condition(1), &three);
4950  b(Condition(2), &two);
4951  b(Condition(4), &one);
4952  /* ending with 0b00 */
4953  ATOMIC_COMP_EXCHANGE_BYTE(0);
4954  b(&done);
4955  /* ending with 0b01 */
4956  bind(&one);
4957  ATOMIC_COMP_EXCHANGE_BYTE(1);
4958  b(&done);
4959  /* ending with 0b10 */
4960  bind(&two);
4961  ATOMIC_COMP_EXCHANGE_BYTE(2);
4962  b(&done);
4963  /* ending with 0b11 */
4964  bind(&three);
4965  ATOMIC_COMP_EXCHANGE_BYTE(3);
4966  bind(&done);
4967}
4968
4969void TurboAssembler::AtomicCmpExchangeU16(Register addr, Register output,
4970                                          Register old_value,
4971                                          Register new_value, Register temp0,
4972                                          Register temp1) {
4973#ifdef V8_TARGET_BIG_ENDIAN
4974#define ATOMIC_COMP_EXCHANGE_HALFWORD(i)                                    \
4975  {                                                                         \
4976    constexpr int idx = (i);                                                \
4977    static_assert(idx <= 1 && idx >= 0, "idx is out of range!");            \
4978    constexpr int start = 32 + 16 * idx;                                    \
4979    constexpr int end = start + 15;                                         \
4980    constexpr int shift_amount = (1 - idx) * 16;                            \
4981    AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4982                            shift_amount, -idx * 2, temp0, temp1);          \
4983  }
4984#else
4985#define ATOMIC_COMP_EXCHANGE_HALFWORD(i)                                    \
4986  {                                                                         \
4987    constexpr int idx = (i);                                                \
4988    static_assert(idx <= 1 && idx >= 0, "idx is out of range!");            \
4989    constexpr int start = 32 + 16 * (1 - idx);                              \
4990    constexpr int end = start + 15;                                         \
4991    constexpr int shift_amount = idx * 16;                                  \
4992    AtomicCmpExchangeHelper(addr, output, old_value, new_value, start, end, \
4993                            shift_amount, -idx * 2, temp0, temp1);          \
4994  }
4995#endif
4996
4997  Label two, done;
4998  tmll(addr, Operand(3));
4999  b(Condition(2), &two);
5000  ATOMIC_COMP_EXCHANGE_HALFWORD(0);
5001  b(&done);
5002  bind(&two);
5003  ATOMIC_COMP_EXCHANGE_HALFWORD(1);
5004  bind(&done);
5005}
5006
5007void TurboAssembler::AtomicExchangeHelper(Register addr, Register value,
5008                                          Register output, int start, int end,
5009                                          int shift_amount, int offset,
5010                                          Register scratch) {
5011  Label do_cs;
5012  LoadU32(output, MemOperand(addr, offset));
5013  bind(&do_cs);
5014  llgfr(scratch, output);
5015  RotateInsertSelectBits(scratch, value, Operand(start), Operand(end),
5016                         Operand(shift_amount), false);
5017  csy(output, scratch, MemOperand(addr, offset));
5018  bne(&do_cs, Label::kNear);
5019  srl(output, Operand(shift_amount));
5020}
5021
5022void TurboAssembler::AtomicExchangeU8(Register addr, Register value,
5023                                      Register output, Register scratch) {
5024#ifdef V8_TARGET_BIG_ENDIAN
5025#define ATOMIC_EXCHANGE_BYTE(i)                                               \
5026  {                                                                           \
5027    constexpr int idx = (i);                                                  \
5028    static_assert(idx <= 3 && idx >= 0, "idx is out of range!");              \
5029    constexpr int start = 32 + 8 * idx;                                       \
5030    constexpr int end = start + 7;                                            \
5031    constexpr int shift_amount = (3 - idx) * 8;                               \
5032    AtomicExchangeHelper(addr, value, output, start, end, shift_amount, -idx, \
5033                         scratch);                                            \
5034  }
5035#else
5036#define ATOMIC_EXCHANGE_BYTE(i)                                               \
5037  {                                                                           \
5038    constexpr int idx = (i);                                                  \
5039    static_assert(idx <= 3 && idx >= 0, "idx is out of range!");              \
5040    constexpr int start = 32 + 8 * (3 - idx);                                 \
5041    constexpr int end = start + 7;                                            \
5042    constexpr int shift_amount = idx * 8;                                     \
5043    AtomicExchangeHelper(addr, value, output, start, end, shift_amount, -idx, \
5044                         scratch);                                            \
5045  }
5046#endif
5047  Label three, two, one, done;
5048  tmll(addr, Operand(3));
5049  b(Condition(1), &three);
5050  b(Condition(2), &two);
5051  b(Condition(4), &one);
5052
5053  // end with 0b00
5054  ATOMIC_EXCHANGE_BYTE(0);
5055  b(&done);
5056
5057  // ending with 0b01
5058  bind(&one);
5059  ATOMIC_EXCHANGE_BYTE(1);
5060  b(&done);
5061
5062  // ending with 0b10
5063  bind(&two);
5064  ATOMIC_EXCHANGE_BYTE(2);
5065  b(&done);
5066
5067  // ending with 0b11
5068  bind(&three);
5069  ATOMIC_EXCHANGE_BYTE(3);
5070
5071  bind(&done);
5072}
5073
5074void TurboAssembler::AtomicExchangeU16(Register addr, Register value,
5075                                       Register output, Register scratch) {
5076#ifdef V8_TARGET_BIG_ENDIAN
5077#define ATOMIC_EXCHANGE_HALFWORD(i)                                     \
5078  {                                                                     \
5079    constexpr int idx = (i);                                            \
5080    static_assert(idx <= 1 && idx >= 0, "idx is out of range!");        \
5081    constexpr int start = 32 + 16 * idx;                                \
5082    constexpr int end = start + 15;                                     \
5083    constexpr int shift_amount = (1 - idx) * 16;                        \
5084    AtomicExchangeHelper(addr, value, output, start, end, shift_amount, \
5085                         -idx * 2, scratch);                            \
5086  }
5087#else
5088#define ATOMIC_EXCHANGE_HALFWORD(i)                                     \
5089  {                                                                     \
5090    constexpr int idx = (i);                                            \
5091    static_assert(idx <= 1 && idx >= 0, "idx is out of range!");        \
5092    constexpr int start = 32 + 16 * (1 - idx);                          \
5093    constexpr int end = start + 15;                                     \
5094    constexpr int shift_amount = idx * 16;                              \
5095    AtomicExchangeHelper(addr, value, output, start, end, shift_amount, \
5096                         -idx * 2, scratch);                            \
5097  }
5098#endif
5099  Label two, done;
5100  tmll(addr, Operand(3));
5101  b(Condition(2), &two);
5102
5103  // end with 0b00
5104  ATOMIC_EXCHANGE_HALFWORD(0);
5105  b(&done);
5106
5107  // ending with 0b10
5108  bind(&two);
5109  ATOMIC_EXCHANGE_HALFWORD(1);
5110
5111  bind(&done);
5112}
5113
5114// Simd Support.
5115void TurboAssembler::F64x2Splat(Simd128Register dst, Simd128Register src) {
5116  vrep(dst, src, Operand(0), Condition(3));
5117}
5118
5119void TurboAssembler::F32x4Splat(Simd128Register dst, Simd128Register src) {
5120  vrep(dst, src, Operand(0), Condition(2));
5121}
5122
5123void TurboAssembler::I64x2Splat(Simd128Register dst, Register src) {
5124  vlvg(dst, src, MemOperand(r0, 0), Condition(3));
5125  vrep(dst, dst, Operand(0), Condition(3));
5126}
5127
5128void TurboAssembler::I32x4Splat(Simd128Register dst, Register src) {
5129  vlvg(dst, src, MemOperand(r0, 0), Condition(2));
5130  vrep(dst, dst, Operand(0), Condition(2));
5131}
5132
5133void TurboAssembler::I16x8Splat(Simd128Register dst, Register src) {
5134  vlvg(dst, src, MemOperand(r0, 0), Condition(1));
5135  vrep(dst, dst, Operand(0), Condition(1));
5136}
5137
5138void TurboAssembler::I8x16Splat(Simd128Register dst, Register src) {
5139  vlvg(dst, src, MemOperand(r0, 0), Condition(0));
5140  vrep(dst, dst, Operand(0), Condition(0));
5141}
5142
5143void TurboAssembler::F64x2ExtractLane(DoubleRegister dst, Simd128Register src,
5144                                      uint8_t imm_lane_idx, Register) {
5145  vrep(dst, src, Operand(1 - imm_lane_idx), Condition(3));
5146}
5147
5148void TurboAssembler::F32x4ExtractLane(DoubleRegister dst, Simd128Register src,
5149                                      uint8_t imm_lane_idx, Register) {
5150  vrep(dst, src, Operand(3 - imm_lane_idx), Condition(2));
5151}
5152
5153void TurboAssembler::I64x2ExtractLane(Register dst, Simd128Register src,
5154                                      uint8_t imm_lane_idx, Register) {
5155  vlgv(dst, src, MemOperand(r0, 1 - imm_lane_idx), Condition(3));
5156}
5157
5158void TurboAssembler::I32x4ExtractLane(Register dst, Simd128Register src,
5159                                      uint8_t imm_lane_idx, Register) {
5160  vlgv(dst, src, MemOperand(r0, 3 - imm_lane_idx), Condition(2));
5161}
5162
5163void TurboAssembler::I16x8ExtractLaneU(Register dst, Simd128Register src,
5164                                       uint8_t imm_lane_idx, Register) {
5165  vlgv(dst, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1));
5166}
5167
5168void TurboAssembler::I16x8ExtractLaneS(Register dst, Simd128Register src,
5169                                       uint8_t imm_lane_idx, Register scratch) {
5170  vlgv(scratch, src, MemOperand(r0, 7 - imm_lane_idx), Condition(1));
5171  lghr(dst, scratch);
5172}
5173
5174void TurboAssembler::I8x16ExtractLaneU(Register dst, Simd128Register src,
5175                                       uint8_t imm_lane_idx, Register) {
5176  vlgv(dst, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0));
5177}
5178
5179void TurboAssembler::I8x16ExtractLaneS(Register dst, Simd128Register src,
5180                                       uint8_t imm_lane_idx, Register scratch) {
5181  vlgv(scratch, src, MemOperand(r0, 15 - imm_lane_idx), Condition(0));
5182  lgbr(dst, scratch);
5183}
5184
5185void TurboAssembler::F64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
5186                                      DoubleRegister src2, uint8_t imm_lane_idx,
5187                                      Register scratch) {
5188  vlgv(scratch, src2, MemOperand(r0, 0), Condition(3));
5189  if (src1 != dst) {
5190    vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5191  }
5192  vlvg(dst, scratch, MemOperand(r0, 1 - imm_lane_idx), Condition(3));
5193}
5194
5195void TurboAssembler::F32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
5196                                      DoubleRegister src2, uint8_t imm_lane_idx,
5197                                      Register scratch) {
5198  vlgv(scratch, src2, MemOperand(r0, 0), Condition(2));
5199  if (src1 != dst) {
5200    vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5201  }
5202  vlvg(dst, scratch, MemOperand(r0, 3 - imm_lane_idx), Condition(2));
5203}
5204
5205void TurboAssembler::I64x2ReplaceLane(Simd128Register dst, Simd128Register src1,
5206                                      Register src2, uint8_t imm_lane_idx,
5207                                      Register) {
5208  if (src1 != dst) {
5209    vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5210  }
5211  vlvg(dst, src2, MemOperand(r0, 1 - imm_lane_idx), Condition(3));
5212}
5213
5214void TurboAssembler::I32x4ReplaceLane(Simd128Register dst, Simd128Register src1,
5215                                      Register src2, uint8_t imm_lane_idx,
5216                                      Register) {
5217  if (src1 != dst) {
5218    vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5219  }
5220  vlvg(dst, src2, MemOperand(r0, 3 - imm_lane_idx), Condition(2));
5221}
5222
5223void TurboAssembler::I16x8ReplaceLane(Simd128Register dst, Simd128Register src1,
5224                                      Register src2, uint8_t imm_lane_idx,
5225                                      Register) {
5226  if (src1 != dst) {
5227    vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5228  }
5229  vlvg(dst, src2, MemOperand(r0, 7 - imm_lane_idx), Condition(1));
5230}
5231
5232void TurboAssembler::I8x16ReplaceLane(Simd128Register dst, Simd128Register src1,
5233                                      Register src2, uint8_t imm_lane_idx,
5234                                      Register) {
5235  if (src1 != dst) {
5236    vlr(dst, src1, Condition(0), Condition(0), Condition(0));
5237  }
5238  vlvg(dst, src2, MemOperand(r0, 15 - imm_lane_idx), Condition(0));
5239}
5240
5241void TurboAssembler::S128Not(Simd128Register dst, Simd128Register src) {
5242  vno(dst, src, src, Condition(0), Condition(0), Condition(0));
5243}
5244
5245void TurboAssembler::S128Zero(Simd128Register dst, Simd128Register src) {
5246  vx(dst, src, src, Condition(0), Condition(0), Condition(0));
5247}
5248
5249void TurboAssembler::S128AllOnes(Simd128Register dst, Simd128Register src) {
5250  vceq(dst, src, src, Condition(0), Condition(3));
5251}
5252
5253void TurboAssembler::S128Select(Simd128Register dst, Simd128Register src1,
5254                                Simd128Register src2, Simd128Register mask) {
5255  vsel(dst, src1, src2, mask, Condition(0), Condition(0));
5256}
5257
5258#define SIMD_UNOP_LIST_VRR_A(V)             \
5259  V(F64x2Abs, vfpso, 2, 0, 3)               \
5260  V(F64x2Neg, vfpso, 0, 0, 3)               \
5261  V(F64x2Sqrt, vfsq, 0, 0, 3)               \
5262  V(F64x2Ceil, vfi, 6, 0, 3)                \
5263  V(F64x2Floor, vfi, 7, 0, 3)               \
5264  V(F64x2Trunc, vfi, 5, 0, 3)               \
5265  V(F64x2NearestInt, vfi, 4, 0, 3)          \
5266  V(F32x4Abs, vfpso, 2, 0, 2)               \
5267  V(F32x4Neg, vfpso, 0, 0, 2)               \
5268  V(F32x4Sqrt, vfsq, 0, 0, 2)               \
5269  V(F32x4Ceil, vfi, 6, 0, 2)                \
5270  V(F32x4Floor, vfi, 7, 0, 2)               \
5271  V(F32x4Trunc, vfi, 5, 0, 2)               \
5272  V(F32x4NearestInt, vfi, 4, 0, 2)          \
5273  V(I64x2Abs, vlp, 0, 0, 3)                 \
5274  V(I64x2Neg, vlc, 0, 0, 3)                 \
5275  V(I64x2SConvertI32x4Low, vupl, 0, 0, 2)   \
5276  V(I64x2SConvertI32x4High, vuph, 0, 0, 2)  \
5277  V(I64x2UConvertI32x4Low, vupll, 0, 0, 2)  \
5278  V(I64x2UConvertI32x4High, vuplh, 0, 0, 2) \
5279  V(I32x4Abs, vlp, 0, 0, 2)                 \
5280  V(I32x4Neg, vlc, 0, 0, 2)                 \
5281  V(I32x4SConvertI16x8Low, vupl, 0, 0, 1)   \
5282  V(I32x4SConvertI16x8High, vuph, 0, 0, 1)  \
5283  V(I32x4UConvertI16x8Low, vupll, 0, 0, 1)  \
5284  V(I32x4UConvertI16x8High, vuplh, 0, 0, 1) \
5285  V(I16x8Abs, vlp, 0, 0, 1)                 \
5286  V(I16x8Neg, vlc, 0, 0, 1)                 \
5287  V(I16x8SConvertI8x16Low, vupl, 0, 0, 0)   \
5288  V(I16x8SConvertI8x16High, vuph, 0, 0, 0)  \
5289  V(I16x8UConvertI8x16Low, vupll, 0, 0, 0)  \
5290  V(I16x8UConvertI8x16High, vuplh, 0, 0, 0) \
5291  V(I8x16Abs, vlp, 0, 0, 0)                 \
5292  V(I8x16Neg, vlc, 0, 0, 0)                 \
5293  V(I8x16Popcnt, vpopct, 0, 0, 0)
5294
5295#define EMIT_SIMD_UNOP_VRR_A(name, op, c1, c2, c3)                      \
5296  void TurboAssembler::name(Simd128Register dst, Simd128Register src) { \
5297    op(dst, src, Condition(c1), Condition(c2), Condition(c3));          \
5298  }
5299SIMD_UNOP_LIST_VRR_A(EMIT_SIMD_UNOP_VRR_A)
5300#undef EMIT_SIMD_UNOP_VRR_A
5301#undef SIMD_UNOP_LIST_VRR_A
5302
5303#define SIMD_BINOP_LIST_VRR_B(V) \
5304  V(I64x2Eq, vceq, 0, 3)         \
5305  V(I64x2GtS, vch, 0, 3)         \
5306  V(I32x4Eq, vceq, 0, 2)         \
5307  V(I32x4GtS, vch, 0, 2)         \
5308  V(I32x4GtU, vchl, 0, 2)        \
5309  V(I16x8Eq, vceq, 0, 1)         \
5310  V(I16x8GtS, vch, 0, 1)         \
5311  V(I16x8GtU, vchl, 0, 1)        \
5312  V(I8x16Eq, vceq, 0, 0)         \
5313  V(I8x16GtS, vch, 0, 0)         \
5314  V(I8x16GtU, vchl, 0, 0)
5315
5316#define EMIT_SIMD_BINOP_VRR_B(name, op, c1, c2)                        \
5317  void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5318                            Simd128Register src2) {                    \
5319    op(dst, src1, src2, Condition(c1), Condition(c2));                 \
5320  }
5321SIMD_BINOP_LIST_VRR_B(EMIT_SIMD_BINOP_VRR_B)
5322#undef EMIT_SIMD_BINOP_VRR_B
5323#undef SIMD_BINOP_LIST_VRR_B
5324
5325#define SIMD_BINOP_LIST_VRR_C(V)           \
5326  V(F64x2Add, vfa, 0, 0, 3)                \
5327  V(F64x2Sub, vfs, 0, 0, 3)                \
5328  V(F64x2Mul, vfm, 0, 0, 3)                \
5329  V(F64x2Div, vfd, 0, 0, 3)                \
5330  V(F64x2Min, vfmin, 1, 0, 3)              \
5331  V(F64x2Max, vfmax, 1, 0, 3)              \
5332  V(F64x2Eq, vfce, 0, 0, 3)                \
5333  V(F64x2Pmin, vfmin, 3, 0, 3)             \
5334  V(F64x2Pmax, vfmax, 3, 0, 3)             \
5335  V(F32x4Add, vfa, 0, 0, 2)                \
5336  V(F32x4Sub, vfs, 0, 0, 2)                \
5337  V(F32x4Mul, vfm, 0, 0, 2)                \
5338  V(F32x4Div, vfd, 0, 0, 2)                \
5339  V(F32x4Min, vfmin, 1, 0, 2)              \
5340  V(F32x4Max, vfmax, 1, 0, 2)              \
5341  V(F32x4Eq, vfce, 0, 0, 2)                \
5342  V(F32x4Pmin, vfmin, 3, 0, 2)             \
5343  V(F32x4Pmax, vfmax, 3, 0, 2)             \
5344  V(I64x2Add, va, 0, 0, 3)                 \
5345  V(I64x2Sub, vs, 0, 0, 3)                 \
5346  V(I32x4Add, va, 0, 0, 2)                 \
5347  V(I32x4Sub, vs, 0, 0, 2)                 \
5348  V(I32x4Mul, vml, 0, 0, 2)                \
5349  V(I32x4MinS, vmn, 0, 0, 2)               \
5350  V(I32x4MinU, vmnl, 0, 0, 2)              \
5351  V(I32x4MaxS, vmx, 0, 0, 2)               \
5352  V(I32x4MaxU, vmxl, 0, 0, 2)              \
5353  V(I16x8Add, va, 0, 0, 1)                 \
5354  V(I16x8Sub, vs, 0, 0, 1)                 \
5355  V(I16x8Mul, vml, 0, 0, 1)                \
5356  V(I16x8MinS, vmn, 0, 0, 1)               \
5357  V(I16x8MinU, vmnl, 0, 0, 1)              \
5358  V(I16x8MaxS, vmx, 0, 0, 1)               \
5359  V(I16x8MaxU, vmxl, 0, 0, 1)              \
5360  V(I16x8RoundingAverageU, vavgl, 0, 0, 1) \
5361  V(I8x16Add, va, 0, 0, 0)                 \
5362  V(I8x16Sub, vs, 0, 0, 0)                 \
5363  V(I8x16MinS, vmn, 0, 0, 0)               \
5364  V(I8x16MinU, vmnl, 0, 0, 0)              \
5365  V(I8x16MaxS, vmx, 0, 0, 0)               \
5366  V(I8x16MaxU, vmxl, 0, 0, 0)              \
5367  V(I8x16RoundingAverageU, vavgl, 0, 0, 0) \
5368  V(S128And, vn, 0, 0, 0)                  \
5369  V(S128Or, vo, 0, 0, 0)                   \
5370  V(S128Xor, vx, 0, 0, 0)                  \
5371  V(S128AndNot, vnc, 0, 0, 0)
5372
5373#define EMIT_SIMD_BINOP_VRR_C(name, op, c1, c2, c3)                    \
5374  void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5375                            Simd128Register src2) {                    \
5376    op(dst, src1, src2, Condition(c1), Condition(c2), Condition(c3));  \
5377  }
5378SIMD_BINOP_LIST_VRR_C(EMIT_SIMD_BINOP_VRR_C)
5379#undef EMIT_SIMD_BINOP_VRR_C
5380#undef SIMD_BINOP_LIST_VRR_C
5381
5382#define SIMD_SHIFT_LIST(V) \
5383  V(I64x2Shl, veslv, 3)    \
5384  V(I64x2ShrS, vesrav, 3)  \
5385  V(I64x2ShrU, vesrlv, 3)  \
5386  V(I32x4Shl, veslv, 2)    \
5387  V(I32x4ShrS, vesrav, 2)  \
5388  V(I32x4ShrU, vesrlv, 2)  \
5389  V(I16x8Shl, veslv, 1)    \
5390  V(I16x8ShrS, vesrav, 1)  \
5391  V(I16x8ShrU, vesrlv, 1)  \
5392  V(I8x16Shl, veslv, 0)    \
5393  V(I8x16ShrS, vesrav, 0)  \
5394  V(I8x16ShrU, vesrlv, 0)
5395
5396#define EMIT_SIMD_SHIFT(name, op, c1)                                  \
5397  void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5398                            Register src2, Simd128Register scratch) {  \
5399    vlvg(scratch, src2, MemOperand(r0, 0), Condition(c1));             \
5400    vrep(scratch, scratch, Operand(0), Condition(c1));                 \
5401    op(dst, src1, scratch, Condition(0), Condition(0), Condition(c1)); \
5402  }                                                                    \
5403  void TurboAssembler::name(Simd128Register dst, Simd128Register src1, \
5404                            const Operand& src2, Register scratch1,    \
5405                            Simd128Register scratch2) {                \
5406    mov(scratch1, src2);                                               \
5407    name(dst, src1, scratch1, scratch2);                               \
5408  }
5409SIMD_SHIFT_LIST(EMIT_SIMD_SHIFT)
5410#undef EMIT_SIMD_SHIFT
5411#undef SIMD_SHIFT_LIST
5412
5413#define SIMD_EXT_MUL_LIST(V)                    \
5414  V(I64x2ExtMulLowI32x4S, vme, vmo, vmrl, 2)    \
5415  V(I64x2ExtMulHighI32x4S, vme, vmo, vmrh, 2)   \
5416  V(I64x2ExtMulLowI32x4U, vmle, vmlo, vmrl, 2)  \
5417  V(I64x2ExtMulHighI32x4U, vmle, vmlo, vmrh, 2) \
5418  V(I32x4ExtMulLowI16x8S, vme, vmo, vmrl, 1)    \
5419  V(I32x4ExtMulHighI16x8S, vme, vmo, vmrh, 1)   \
5420  V(I32x4ExtMulLowI16x8U, vmle, vmlo, vmrl, 1)  \
5421  V(I32x4ExtMulHighI16x8U, vmle, vmlo, vmrh, 1) \
5422  V(I16x8ExtMulLowI8x16S, vme, vmo, vmrl, 0)    \
5423  V(I16x8ExtMulHighI8x16S, vme, vmo, vmrh, 0)   \
5424  V(I16x8ExtMulLowI8x16U, vmle, vmlo, vmrl, 0)  \
5425  V(I16x8ExtMulHighI8x16U, vmle, vmlo, vmrh, 0)
5426
5427#define EMIT_SIMD_EXT_MUL(name, mul_even, mul_odd, merge, mode)                \
5428  void TurboAssembler::name(Simd128Register dst, Simd128Register src1,         \
5429                            Simd128Register src2, Simd128Register scratch) {   \
5430    mul_even(scratch, src1, src2, Condition(0), Condition(0),                  \
5431             Condition(mode));                                                 \
5432    mul_odd(dst, src1, src2, Condition(0), Condition(0), Condition(mode));     \
5433    merge(dst, scratch, dst, Condition(0), Condition(0), Condition(mode + 1)); \
5434  }
5435SIMD_EXT_MUL_LIST(EMIT_SIMD_EXT_MUL)
5436#undef EMIT_SIMD_EXT_MUL
5437#undef SIMD_EXT_MUL_LIST
5438
5439#define SIMD_ALL_TRUE_LIST(V) \
5440  V(I64x2AllTrue, 3)          \
5441  V(I32x4AllTrue, 2)          \
5442  V(I16x8AllTrue, 1)          \
5443  V(I8x16AllTrue, 0)
5444
5445#define EMIT_SIMD_ALL_TRUE(name, mode)                                     \
5446  void TurboAssembler::name(Register dst, Simd128Register src,             \
5447                            Register scratch1, Simd128Register scratch2) { \
5448    mov(scratch1, Operand(1));                                             \
5449    xgr(dst, dst);                                                         \
5450    vx(scratch2, scratch2, scratch2, Condition(0), Condition(0),           \
5451       Condition(2));                                                      \
5452    vceq(scratch2, src, scratch2, Condition(0), Condition(mode));          \
5453    vtm(scratch2, scratch2, Condition(0), Condition(0), Condition(0));     \
5454    locgr(Condition(8), dst, scratch1);                                    \
5455  }
5456SIMD_ALL_TRUE_LIST(EMIT_SIMD_ALL_TRUE)
5457#undef EMIT_SIMD_ALL_TRUE
5458#undef SIMD_ALL_TRUE_LIST
5459
5460#define SIMD_QFM_LIST(V) \
5461  V(F64x2Qfma, vfma, 3)  \
5462  V(F64x2Qfms, vfnms, 3) \
5463  V(F32x4Qfma, vfma, 2)  \
5464  V(F32x4Qfms, vfnms, 2)
5465
5466#define EMIT_SIMD_QFM(name, op, c1)                                       \
5467  void TurboAssembler::name(Simd128Register dst, Simd128Register src1,    \
5468                            Simd128Register src2, Simd128Register src3) { \
5469    op(dst, src2, src3, src1, Condition(c1), Condition(0));               \
5470  }
5471SIMD_QFM_LIST(EMIT_SIMD_QFM)
5472#undef EMIT_SIMD_QFM
5473#undef SIMD_QFM_LIST
5474
5475void TurboAssembler::I64x2Mul(Simd128Register dst, Simd128Register src1,
5476                              Simd128Register src2, Register scratch1,
5477                              Register scratch2, Register scratch3) {
5478  Register scratch_1 = scratch1;
5479  Register scratch_2 = scratch2;
5480  for (int i = 0; i < 2; i++) {
5481    vlgv(scratch_1, src1, MemOperand(r0, i), Condition(3));
5482    vlgv(scratch_2, src2, MemOperand(r0, i), Condition(3));
5483    MulS64(scratch_1, scratch_2);
5484    scratch_1 = scratch2;
5485    scratch_2 = scratch3;
5486  }
5487  vlvgp(dst, scratch1, scratch2);
5488}
5489
5490void TurboAssembler::F64x2Ne(Simd128Register dst, Simd128Register src1,
5491                             Simd128Register src2) {
5492  vfce(dst, src1, src2, Condition(0), Condition(0), Condition(3));
5493  vno(dst, dst, dst, Condition(0), Condition(0), Condition(3));
5494}
5495
5496void TurboAssembler::F64x2Lt(Simd128Register dst, Simd128Register src1,
5497                             Simd128Register src2) {
5498  vfch(dst, src2, src1, Condition(0), Condition(0), Condition(3));
5499}
5500
5501void TurboAssembler::F64x2Le(Simd128Register dst, Simd128Register src1,
5502                             Simd128Register src2) {
5503  vfche(dst, src2, src1, Condition(0), Condition(0), Condition(3));
5504}
5505
5506void TurboAssembler::F32x4Ne(Simd128Register dst, Simd128Register src1,
5507                             Simd128Register src2) {
5508  vfce(dst, src1, src2, Condition(0), Condition(0), Condition(2));
5509  vno(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5510}
5511
5512void TurboAssembler::F32x4Lt(Simd128Register dst, Simd128Register src1,
5513                             Simd128Register src2) {
5514  vfch(dst, src2, src1, Condition(0), Condition(0), Condition(2));
5515}
5516
5517void TurboAssembler::F32x4Le(Simd128Register dst, Simd128Register src1,
5518                             Simd128Register src2) {
5519  vfche(dst, src2, src1, Condition(0), Condition(0), Condition(2));
5520}
5521
5522void TurboAssembler::I64x2Ne(Simd128Register dst, Simd128Register src1,
5523                             Simd128Register src2) {
5524  vceq(dst, src1, src2, Condition(0), Condition(3));
5525  vno(dst, dst, dst, Condition(0), Condition(0), Condition(3));
5526}
5527
5528void TurboAssembler::I64x2GeS(Simd128Register dst, Simd128Register src1,
5529                              Simd128Register src2) {
5530  // Compute !(B > A) which is equal to A >= B.
5531  vch(dst, src2, src1, Condition(0), Condition(3));
5532  vno(dst, dst, dst, Condition(0), Condition(0), Condition(3));
5533}
5534
5535void TurboAssembler::I32x4Ne(Simd128Register dst, Simd128Register src1,
5536                             Simd128Register src2) {
5537  vceq(dst, src1, src2, Condition(0), Condition(2));
5538  vno(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5539}
5540
5541void TurboAssembler::I32x4GeS(Simd128Register dst, Simd128Register src1,
5542                              Simd128Register src2) {
5543  // Compute !(B > A) which is equal to A >= B.
5544  vch(dst, src2, src1, Condition(0), Condition(2));
5545  vno(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5546}
5547
5548void TurboAssembler::I32x4GeU(Simd128Register dst, Simd128Register src1,
5549                              Simd128Register src2, Simd128Register scratch) {
5550  vceq(scratch, src1, src2, Condition(0), Condition(2));
5551  vchl(dst, src1, src2, Condition(0), Condition(2));
5552  vo(dst, dst, scratch, Condition(0), Condition(0), Condition(2));
5553}
5554
5555void TurboAssembler::I16x8Ne(Simd128Register dst, Simd128Register src1,
5556                             Simd128Register src2) {
5557  vceq(dst, src1, src2, Condition(0), Condition(1));
5558  vno(dst, dst, dst, Condition(0), Condition(0), Condition(1));
5559}
5560
5561void TurboAssembler::I16x8GeS(Simd128Register dst, Simd128Register src1,
5562                              Simd128Register src2) {
5563  // Compute !(B > A) which is equal to A >= B.
5564  vch(dst, src2, src1, Condition(0), Condition(1));
5565  vno(dst, dst, dst, Condition(0), Condition(0), Condition(1));
5566}
5567
5568void TurboAssembler::I16x8GeU(Simd128Register dst, Simd128Register src1,
5569                              Simd128Register src2, Simd128Register scratch) {
5570  vceq(scratch, src1, src2, Condition(0), Condition(1));
5571  vchl(dst, src1, src2, Condition(0), Condition(1));
5572  vo(dst, dst, scratch, Condition(0), Condition(0), Condition(1));
5573}
5574
5575void TurboAssembler::I8x16Ne(Simd128Register dst, Simd128Register src1,
5576                             Simd128Register src2) {
5577  vceq(dst, src1, src2, Condition(0), Condition(0));
5578  vno(dst, dst, dst, Condition(0), Condition(0), Condition(0));
5579}
5580
5581void TurboAssembler::I8x16GeS(Simd128Register dst, Simd128Register src1,
5582                              Simd128Register src2) {
5583  // Compute !(B > A) which is equal to A >= B.
5584  vch(dst, src2, src1, Condition(0), Condition(0));
5585  vno(dst, dst, dst, Condition(0), Condition(0), Condition(0));
5586}
5587
5588void TurboAssembler::I8x16GeU(Simd128Register dst, Simd128Register src1,
5589                              Simd128Register src2, Simd128Register scratch) {
5590  vceq(scratch, src1, src2, Condition(0), Condition(0));
5591  vchl(dst, src1, src2, Condition(0), Condition(0));
5592  vo(dst, dst, scratch, Condition(0), Condition(0), Condition(0));
5593}
5594
5595void TurboAssembler::I64x2BitMask(Register dst, Simd128Register src,
5596                                  Register scratch1, Simd128Register scratch2) {
5597  mov(scratch1, Operand(0x8080808080800040));
5598  vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
5599  vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
5600  vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
5601}
5602
5603void TurboAssembler::I32x4BitMask(Register dst, Simd128Register src,
5604                                  Register scratch1, Simd128Register scratch2) {
5605  mov(scratch1, Operand(0x8080808000204060));
5606  vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
5607  vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
5608  vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
5609}
5610
5611void TurboAssembler::I16x8BitMask(Register dst, Simd128Register src,
5612                                  Register scratch1, Simd128Register scratch2) {
5613  mov(scratch1, Operand(0x10203040506070));
5614  vlvg(scratch2, scratch1, MemOperand(r0, 1), Condition(3));
5615  vbperm(scratch2, src, scratch2, Condition(0), Condition(0), Condition(0));
5616  vlgv(dst, scratch2, MemOperand(r0, 7), Condition(0));
5617}
5618
5619void TurboAssembler::F64x2ConvertLowI32x4S(Simd128Register dst,
5620                                           Simd128Register src) {
5621  vupl(dst, src, Condition(0), Condition(0), Condition(2));
5622  vcdg(dst, dst, Condition(4), Condition(0), Condition(3));
5623}
5624
5625void TurboAssembler::F64x2ConvertLowI32x4U(Simd128Register dst,
5626                                           Simd128Register src) {
5627  vupll(dst, src, Condition(0), Condition(0), Condition(2));
5628  vcdlg(dst, dst, Condition(4), Condition(0), Condition(3));
5629}
5630
5631void TurboAssembler::I8x16BitMask(Register dst, Simd128Register src,
5632                                  Register scratch1, Register scratch2,
5633                                  Simd128Register scratch3) {
5634  mov(scratch1, Operand(0x4048505860687078));
5635  mov(scratch2, Operand(0x8101820283038));
5636  vlvgp(scratch3, scratch2, scratch1);
5637  vbperm(scratch3, src, scratch3, Condition(0), Condition(0), Condition(0));
5638  vlgv(dst, scratch3, MemOperand(r0, 3), Condition(1));
5639}
5640
5641void TurboAssembler::V128AnyTrue(Register dst, Simd128Register src,
5642                                 Register scratch) {
5643  mov(dst, Operand(1));
5644  xgr(scratch, scratch);
5645  vtm(src, src, Condition(0), Condition(0), Condition(0));
5646  locgr(Condition(8), dst, scratch);
5647}
5648
5649#define CONVERT_FLOAT_TO_INT32(convert, dst, src, scratch1, scratch2) \
5650  for (int index = 0; index < 4; index++) {                           \
5651    vlgv(scratch2, src, MemOperand(r0, index), Condition(2));         \
5652    MovIntToFloat(scratch1, scratch2);                                \
5653    convert(scratch2, scratch1, kRoundToZero);                        \
5654    vlvg(dst, scratch2, MemOperand(r0, index), Condition(2));         \
5655  }
5656void TurboAssembler::I32x4SConvertF32x4(Simd128Register dst,
5657                                        Simd128Register src,
5658                                        Simd128Register scratch1,
5659                                        Register scratch2) {
5660  // NaN to 0.
5661  vfce(scratch1, src, src, Condition(0), Condition(0), Condition(2));
5662  vn(dst, src, scratch1, Condition(0), Condition(0), Condition(0));
5663  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5664    vcgd(dst, dst, Condition(5), Condition(0), Condition(2));
5665  } else {
5666    CONVERT_FLOAT_TO_INT32(ConvertFloat32ToInt32, dst, dst, scratch1, scratch2)
5667  }
5668}
5669
5670void TurboAssembler::I32x4UConvertF32x4(Simd128Register dst,
5671                                        Simd128Register src,
5672                                        Simd128Register scratch1,
5673                                        Register scratch2) {
5674  // vclgd or ConvertFloat32ToUnsignedInt32 will convert NaN to 0, negative to 0
5675  // automatically.
5676  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5677    vclgd(dst, src, Condition(5), Condition(0), Condition(2));
5678  } else {
5679    CONVERT_FLOAT_TO_INT32(ConvertFloat32ToUnsignedInt32, dst, src, scratch1,
5680                           scratch2)
5681  }
5682}
5683#undef CONVERT_FLOAT_TO_INT32
5684
5685#define CONVERT_INT32_TO_FLOAT(convert, dst, src, scratch1, scratch2) \
5686  for (int index = 0; index < 4; index++) {                           \
5687    vlgv(scratch2, src, MemOperand(r0, index), Condition(2));         \
5688    convert(scratch1, scratch2);                                      \
5689    MovFloatToInt(scratch2, scratch1);                                \
5690    vlvg(dst, scratch2, MemOperand(r0, index), Condition(2));         \
5691  }
5692void TurboAssembler::F32x4SConvertI32x4(Simd128Register dst,
5693                                        Simd128Register src,
5694                                        Simd128Register scratch1,
5695                                        Register scratch2) {
5696  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5697    vcdg(dst, src, Condition(4), Condition(0), Condition(2));
5698  } else {
5699    CONVERT_INT32_TO_FLOAT(ConvertIntToFloat, dst, src, scratch1, scratch2)
5700  }
5701}
5702void TurboAssembler::F32x4UConvertI32x4(Simd128Register dst,
5703                                        Simd128Register src,
5704                                        Simd128Register scratch1,
5705                                        Register scratch2) {
5706  if (CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)) {
5707    vcdlg(dst, src, Condition(4), Condition(0), Condition(2));
5708  } else {
5709    CONVERT_INT32_TO_FLOAT(ConvertUnsignedIntToFloat, dst, src, scratch1,
5710                           scratch2)
5711  }
5712}
5713#undef CONVERT_INT32_TO_FLOAT
5714
5715void TurboAssembler::I16x8SConvertI32x4(Simd128Register dst,
5716                                        Simd128Register src1,
5717                                        Simd128Register src2) {
5718  vpks(dst, src2, src1, Condition(0), Condition(2));
5719}
5720
5721void TurboAssembler::I8x16SConvertI16x8(Simd128Register dst,
5722                                        Simd128Register src1,
5723                                        Simd128Register src2) {
5724  vpks(dst, src2, src1, Condition(0), Condition(1));
5725}
5726
5727#define VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, mode)       \
5728  vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), \
5729     Condition(0), Condition(mode));                               \
5730  vmx(scratch, src1, kDoubleRegZero, Condition(0), Condition(0),   \
5731      Condition(mode));                                            \
5732  vmx(dst, src2, kDoubleRegZero, Condition(0), Condition(0), Condition(mode));
5733void TurboAssembler::I16x8UConvertI32x4(Simd128Register dst,
5734                                        Simd128Register src1,
5735                                        Simd128Register src2,
5736                                        Simd128Register scratch) {
5737  // treat inputs as signed, and saturate to unsigned (negative to 0).
5738  VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, 2)
5739  vpkls(dst, dst, scratch, Condition(0), Condition(2));
5740}
5741
5742void TurboAssembler::I8x16UConvertI16x8(Simd128Register dst,
5743                                        Simd128Register src1,
5744                                        Simd128Register src2,
5745                                        Simd128Register scratch) {
5746  // treat inputs as signed, and saturate to unsigned (negative to 0).
5747  VECTOR_PACK_UNSIGNED(dst, src1, src2, scratch, 1)
5748  vpkls(dst, dst, scratch, Condition(0), Condition(1));
5749}
5750#undef VECTOR_PACK_UNSIGNED
5751
5752#define BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, op, extract_high, \
5753                      extract_low, mode)                                     \
5754  DCHECK(dst != scratch1 && dst != scratch2);                                \
5755  DCHECK(dst != src1 && dst != src2);                                        \
5756  extract_high(scratch1, src1, Condition(0), Condition(0), Condition(mode)); \
5757  extract_high(scratch2, src2, Condition(0), Condition(0), Condition(mode)); \
5758  op(dst, scratch1, scratch2, Condition(0), Condition(0),                    \
5759     Condition(mode + 1));                                                   \
5760  extract_low(scratch1, src1, Condition(0), Condition(0), Condition(mode));  \
5761  extract_low(scratch2, src2, Condition(0), Condition(0), Condition(mode));  \
5762  op(scratch1, scratch1, scratch2, Condition(0), Condition(0),               \
5763     Condition(mode + 1));
5764void TurboAssembler::I16x8AddSatS(Simd128Register dst, Simd128Register src1,
5765                                  Simd128Register src2,
5766                                  Simd128Register scratch1,
5767                                  Simd128Register scratch2) {
5768  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuph, vupl, 1)
5769  vpks(dst, dst, scratch1, Condition(0), Condition(2));
5770}
5771
5772void TurboAssembler::I16x8SubSatS(Simd128Register dst, Simd128Register src1,
5773                                  Simd128Register src2,
5774                                  Simd128Register scratch1,
5775                                  Simd128Register scratch2) {
5776  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuph, vupl, 1)
5777  vpks(dst, dst, scratch1, Condition(0), Condition(2));
5778}
5779
5780void TurboAssembler::I16x8AddSatU(Simd128Register dst, Simd128Register src1,
5781                                  Simd128Register src2,
5782                                  Simd128Register scratch1,
5783                                  Simd128Register scratch2) {
5784  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuplh, vupll, 1)
5785  vpkls(dst, dst, scratch1, Condition(0), Condition(2));
5786}
5787
5788void TurboAssembler::I16x8SubSatU(Simd128Register dst, Simd128Register src1,
5789                                  Simd128Register src2,
5790                                  Simd128Register scratch1,
5791                                  Simd128Register scratch2) {
5792  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuplh, vupll, 1)
5793  // negative intermediate values to 0.
5794  vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), Condition(0),
5795     Condition(0));
5796  vmx(dst, kDoubleRegZero, dst, Condition(0), Condition(0), Condition(2));
5797  vmx(scratch1, kDoubleRegZero, scratch1, Condition(0), Condition(0),
5798      Condition(2));
5799  vpkls(dst, dst, scratch1, Condition(0), Condition(2));
5800}
5801
5802void TurboAssembler::I8x16AddSatS(Simd128Register dst, Simd128Register src1,
5803                                  Simd128Register src2,
5804                                  Simd128Register scratch1,
5805                                  Simd128Register scratch2) {
5806  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuph, vupl, 0)
5807  vpks(dst, dst, scratch1, Condition(0), Condition(1));
5808}
5809
5810void TurboAssembler::I8x16SubSatS(Simd128Register dst, Simd128Register src1,
5811                                  Simd128Register src2,
5812                                  Simd128Register scratch1,
5813                                  Simd128Register scratch2) {
5814  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuph, vupl, 0)
5815  vpks(dst, dst, scratch1, Condition(0), Condition(1));
5816}
5817
5818void TurboAssembler::I8x16AddSatU(Simd128Register dst, Simd128Register src1,
5819                                  Simd128Register src2,
5820                                  Simd128Register scratch1,
5821                                  Simd128Register scratch2) {
5822  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, va, vuplh, vupll, 0)
5823  vpkls(dst, dst, scratch1, Condition(0), Condition(1));
5824}
5825
5826void TurboAssembler::I8x16SubSatU(Simd128Register dst, Simd128Register src1,
5827                                  Simd128Register src2,
5828                                  Simd128Register scratch1,
5829                                  Simd128Register scratch2) {
5830  BINOP_EXTRACT(dst, src1, src2, scratch1, scratch2, vs, vuplh, vupll, 0)
5831  // negative intermediate values to 0.
5832  vx(kDoubleRegZero, kDoubleRegZero, kDoubleRegZero, Condition(0), Condition(0),
5833     Condition(0));
5834  vmx(dst, kDoubleRegZero, dst, Condition(0), Condition(0), Condition(1));
5835  vmx(scratch1, kDoubleRegZero, scratch1, Condition(0), Condition(0),
5836      Condition(1));
5837  vpkls(dst, dst, scratch1, Condition(0), Condition(1));
5838}
5839#undef BINOP_EXTRACT
5840
5841void TurboAssembler::F64x2PromoteLowF32x4(Simd128Register dst,
5842                                          Simd128Register src,
5843                                          Simd128Register scratch1,
5844                                          Register scratch2, Register scratch3,
5845                                          Register scratch4) {
5846  Register holder = scratch3;
5847  for (int index = 0; index < 2; ++index) {
5848    vlgv(scratch2, src, MemOperand(scratch2, index + 2), Condition(2));
5849    MovIntToFloat(scratch1, scratch2);
5850    ldebr(scratch1, scratch1);
5851    MovDoubleToInt64(holder, scratch1);
5852    holder = scratch4;
5853  }
5854  vlvgp(dst, scratch3, scratch4);
5855}
5856
5857void TurboAssembler::F32x4DemoteF64x2Zero(Simd128Register dst,
5858                                          Simd128Register src,
5859                                          Simd128Register scratch1,
5860                                          Register scratch2, Register scratch3,
5861                                          Register scratch4) {
5862  Register holder = scratch3;
5863  for (int index = 0; index < 2; ++index) {
5864    vlgv(scratch2, src, MemOperand(r0, index), Condition(3));
5865    MovInt64ToDouble(scratch1, scratch2);
5866    ledbr(scratch1, scratch1);
5867    MovFloatToInt(holder, scratch1);
5868    holder = scratch4;
5869  }
5870  vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5871  vlvg(dst, scratch3, MemOperand(r0, 2), Condition(2));
5872  vlvg(dst, scratch4, MemOperand(r0, 3), Condition(2));
5873}
5874
5875#define EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, lane_size, mul_even, \
5876                         mul_odd)                                           \
5877  CHECK_NE(src, scratch2);                                                  \
5878  vrepi(scratch2, Operand(1), Condition(lane_size));                        \
5879  mul_even(scratch1, src, scratch2, Condition(0), Condition(0),             \
5880           Condition(lane_size));                                           \
5881  mul_odd(scratch2, src, scratch2, Condition(0), Condition(0),              \
5882          Condition(lane_size));                                            \
5883  va(dst, scratch1, scratch2, Condition(0), Condition(0),                   \
5884     Condition(lane_size + 1));
5885void TurboAssembler::I32x4ExtAddPairwiseI16x8S(Simd128Register dst,
5886                                               Simd128Register src,
5887                                               Simd128Register scratch1,
5888                                               Simd128Register scratch2) {
5889  EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 1, vme, vmo)
5890}
5891
5892void TurboAssembler::I32x4ExtAddPairwiseI16x8U(Simd128Register dst,
5893                                               Simd128Register src,
5894                                               Simd128Register scratch,
5895                                               Simd128Register scratch2) {
5896  vx(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3));
5897  vsum(dst, src, scratch, Condition(0), Condition(0), Condition(1));
5898}
5899
5900void TurboAssembler::I16x8ExtAddPairwiseI8x16S(Simd128Register dst,
5901                                               Simd128Register src,
5902                                               Simd128Register scratch1,
5903                                               Simd128Register scratch2) {
5904  EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vme, vmo)
5905}
5906
5907void TurboAssembler::I16x8ExtAddPairwiseI8x16U(Simd128Register dst,
5908                                               Simd128Register src,
5909                                               Simd128Register scratch1,
5910                                               Simd128Register scratch2) {
5911  EXT_ADD_PAIRWISE(dst, src, scratch1, scratch2, 0, vmle, vmlo)
5912}
5913#undef EXT_ADD_PAIRWISE
5914
5915void TurboAssembler::I32x4TruncSatF64x2SZero(Simd128Register dst,
5916                                             Simd128Register src,
5917                                             Simd128Register scratch) {
5918  // NaN to 0.
5919  vlr(scratch, src, Condition(0), Condition(0), Condition(0));
5920  vfce(scratch, scratch, scratch, Condition(0), Condition(0), Condition(3));
5921  vn(scratch, src, scratch, Condition(0), Condition(0), Condition(0));
5922  vcgd(scratch, scratch, Condition(5), Condition(0), Condition(3));
5923  vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5924  vpks(dst, dst, scratch, Condition(0), Condition(3));
5925}
5926
5927void TurboAssembler::I32x4TruncSatF64x2UZero(Simd128Register dst,
5928                                             Simd128Register src,
5929                                             Simd128Register scratch) {
5930  vclgd(scratch, src, Condition(5), Condition(0), Condition(3));
5931  vx(dst, dst, dst, Condition(0), Condition(0), Condition(2));
5932  vpkls(dst, dst, scratch, Condition(0), Condition(3));
5933}
5934
5935void TurboAssembler::S128Const(Simd128Register dst, uint64_t high, uint64_t low,
5936                               Register scratch1, Register scratch2) {
5937  mov(scratch1, Operand(low));
5938  mov(scratch2, Operand(high));
5939  vlvgp(dst, scratch2, scratch1);
5940}
5941
5942void TurboAssembler::I8x16Swizzle(Simd128Register dst, Simd128Register src1,
5943                                  Simd128Register src2, Register scratch1,
5944                                  Register scratch2, Simd128Register scratch3,
5945                                  Simd128Register scratch4) {
5946  DCHECK(!AreAliased(src1, src2, scratch3, scratch4));
5947  // Saturate the indices to 5 bits. Input indices more than 31 should
5948  // return 0.
5949  vrepi(scratch3, Operand(31), Condition(0));
5950  vmnl(scratch4, src2, scratch3, Condition(0), Condition(0), Condition(0));
5951  // Input needs to be reversed.
5952  vlgv(scratch1, src1, MemOperand(r0, 0), Condition(3));
5953  vlgv(scratch2, src1, MemOperand(r0, 1), Condition(3));
5954  lrvgr(scratch1, scratch1);
5955  lrvgr(scratch2, scratch2);
5956  vlvgp(dst, scratch2, scratch1);
5957  // Clear scratch.
5958  vx(scratch3, scratch3, scratch3, Condition(0), Condition(0), Condition(0));
5959  vperm(dst, dst, scratch3, scratch4, Condition(0), Condition(0));
5960}
5961
5962void TurboAssembler::I8x16Shuffle(Simd128Register dst, Simd128Register src1,
5963                                  Simd128Register src2, uint64_t high,
5964                                  uint64_t low, Register scratch1,
5965                                  Register scratch2, Simd128Register scratch3) {
5966  mov(scratch1, Operand(low));
5967  mov(scratch2, Operand(high));
5968  vlvgp(scratch3, scratch2, scratch1);
5969  vperm(dst, src1, src2, scratch3, Condition(0), Condition(0));
5970}
5971
5972void TurboAssembler::I32x4DotI16x8S(Simd128Register dst, Simd128Register src1,
5973                                    Simd128Register src2,
5974                                    Simd128Register scratch) {
5975  vme(scratch, src1, src2, Condition(0), Condition(0), Condition(1));
5976  vmo(dst, src1, src2, Condition(0), Condition(0), Condition(1));
5977  va(dst, scratch, dst, Condition(0), Condition(0), Condition(2));
5978}
5979
5980#define Q15_MUL_ROAUND(accumulator, src1, src2, const_val, scratch, unpack) \
5981  unpack(scratch, src1, Condition(0), Condition(0), Condition(1));          \
5982  unpack(accumulator, src2, Condition(0), Condition(0), Condition(1));      \
5983  vml(accumulator, scratch, accumulator, Condition(0), Condition(0),        \
5984      Condition(2));                                                        \
5985  va(accumulator, accumulator, const_val, Condition(0), Condition(0),       \
5986     Condition(2));                                                         \
5987  vrepi(scratch, Operand(15), Condition(2));                                \
5988  vesrav(accumulator, accumulator, scratch, Condition(0), Condition(0),     \
5989         Condition(2));
5990void TurboAssembler::I16x8Q15MulRSatS(Simd128Register dst, Simd128Register src1,
5991                                      Simd128Register src2,
5992                                      Simd128Register scratch1,
5993                                      Simd128Register scratch2,
5994                                      Simd128Register scratch3) {
5995  DCHECK(!AreAliased(src1, src2, scratch1, scratch2, scratch3));
5996  vrepi(scratch1, Operand(0x4000), Condition(2));
5997  Q15_MUL_ROAUND(scratch2, src1, src2, scratch1, scratch3, vupl)
5998  Q15_MUL_ROAUND(dst, src1, src2, scratch1, scratch3, vuph)
5999  vpks(dst, dst, scratch2, Condition(0), Condition(2));
6000}
6001#undef Q15_MUL_ROAUND
6002
6003// Vector LE Load and Transform instructions.
6004#ifdef V8_TARGET_BIG_ENDIAN
6005#define IS_BIG_ENDIAN true
6006#else
6007#define IS_BIG_ENDIAN false
6008#endif
6009
6010#define CAN_LOAD_STORE_REVERSE \
6011  IS_BIG_ENDIAN&& CpuFeatures::IsSupported(VECTOR_ENHANCE_FACILITY_2)
6012
6013#define LOAD_SPLAT_LIST(V)       \
6014  V(64x2, vlbrrep, LoadU64LE, 3) \
6015  V(32x4, vlbrrep, LoadU32LE, 2) \
6016  V(16x8, vlbrrep, LoadU16LE, 1) \
6017  V(8x16, vlrep, LoadU8, 0)
6018
6019#define LOAD_SPLAT(name, vector_instr, scalar_instr, condition)       \
6020  void TurboAssembler::LoadAndSplat##name##LE(                        \
6021      Simd128Register dst, const MemOperand& mem, Register scratch) { \
6022    if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {          \
6023      vector_instr(dst, mem, Condition(condition));                   \
6024      return;                                                         \
6025    }                                                                 \
6026    scalar_instr(scratch, mem);                                       \
6027    vlvg(dst, scratch, MemOperand(r0, 0), Condition(condition));      \
6028    vrep(dst, dst, Operand(0), Condition(condition));                 \
6029  }
6030LOAD_SPLAT_LIST(LOAD_SPLAT)
6031#undef LOAD_SPLAT
6032#undef LOAD_SPLAT_LIST
6033
6034#define LOAD_EXTEND_LIST(V) \
6035  V(32x2U, vuplh, 2)        \
6036  V(32x2S, vuph, 2)         \
6037  V(16x4U, vuplh, 1)        \
6038  V(16x4S, vuph, 1)         \
6039  V(8x8U, vuplh, 0)         \
6040  V(8x8S, vuph, 0)
6041
6042#define LOAD_EXTEND(name, unpack_instr, condition)                            \
6043  void TurboAssembler::LoadAndExtend##name##LE(                               \
6044      Simd128Register dst, const MemOperand& mem, Register scratch) {         \
6045    if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {                  \
6046      vlebrg(dst, mem, Condition(0));                                         \
6047    } else {                                                                  \
6048      LoadU64LE(scratch, mem);                                                \
6049      vlvg(dst, scratch, MemOperand(r0, 0), Condition(3));                    \
6050    }                                                                         \
6051    unpack_instr(dst, dst, Condition(0), Condition(0), Condition(condition)); \
6052  }
6053LOAD_EXTEND_LIST(LOAD_EXTEND)
6054#undef LOAD_EXTEND
6055#undef LOAD_EXTEND
6056
6057void TurboAssembler::LoadV32ZeroLE(Simd128Register dst, const MemOperand& mem,
6058                                   Register scratch) {
6059  vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
6060  if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {
6061    vlebrf(dst, mem, Condition(3));
6062    return;
6063  }
6064  LoadU32LE(scratch, mem);
6065  vlvg(dst, scratch, MemOperand(r0, 3), Condition(2));
6066}
6067
6068void TurboAssembler::LoadV64ZeroLE(Simd128Register dst, const MemOperand& mem,
6069                                   Register scratch) {
6070  vx(dst, dst, dst, Condition(0), Condition(0), Condition(0));
6071  if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {
6072    vlebrg(dst, mem, Condition(1));
6073    return;
6074  }
6075  LoadU64LE(scratch, mem);
6076  vlvg(dst, scratch, MemOperand(r0, 1), Condition(3));
6077}
6078
6079#define LOAD_LANE_LIST(V)     \
6080  V(64, vlebrg, LoadU64LE, 3) \
6081  V(32, vlebrf, LoadU32LE, 2) \
6082  V(16, vlebrh, LoadU16LE, 1) \
6083  V(8, vleb, LoadU8, 0)
6084
6085#define LOAD_LANE(name, vector_instr, scalar_instr, condition)             \
6086  void TurboAssembler::LoadLane##name##LE(Simd128Register dst,             \
6087                                          const MemOperand& mem, int lane, \
6088                                          Register scratch) {              \
6089    if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {               \
6090      vector_instr(dst, mem, Condition(lane));                             \
6091      return;                                                              \
6092    }                                                                      \
6093    scalar_instr(scratch, mem);                                            \
6094    vlvg(dst, scratch, MemOperand(r0, lane), Condition(condition));        \
6095  }
6096LOAD_LANE_LIST(LOAD_LANE)
6097#undef LOAD_LANE
6098#undef LOAD_LANE_LIST
6099
6100#define STORE_LANE_LIST(V)      \
6101  V(64, vstebrg, StoreU64LE, 3) \
6102  V(32, vstebrf, StoreU32LE, 2) \
6103  V(16, vstebrh, StoreU16LE, 1) \
6104  V(8, vsteb, StoreU8, 0)
6105
6106#define STORE_LANE(name, vector_instr, scalar_instr, condition)             \
6107  void TurboAssembler::StoreLane##name##LE(Simd128Register src,             \
6108                                           const MemOperand& mem, int lane, \
6109                                           Register scratch) {              \
6110    if (CAN_LOAD_STORE_REVERSE && is_uint12(mem.offset())) {                \
6111      vector_instr(src, mem, Condition(lane));                              \
6112      return;                                                               \
6113    }                                                                       \
6114    vlgv(scratch, src, MemOperand(r0, lane), Condition(condition));         \
6115    scalar_instr(scratch, mem);                                             \
6116  }
6117STORE_LANE_LIST(STORE_LANE)
6118#undef STORE_LANE
6119#undef STORE_LANE_LIST
6120#undef CAN_LOAD_STORE_REVERSE
6121#undef IS_BIG_ENDIAN
6122
6123void MacroAssembler::LoadStackLimit(Register destination, StackLimitKind kind) {
6124  ASM_CODE_COMMENT(this);
6125  DCHECK(root_array_available());
6126  Isolate* isolate = this->isolate();
6127  ExternalReference limit =
6128      kind == StackLimitKind::kRealStackLimit
6129          ? ExternalReference::address_of_real_jslimit(isolate)
6130          : ExternalReference::address_of_jslimit(isolate);
6131  DCHECK(TurboAssembler::IsAddressableThroughRootRegister(isolate, limit));
6132
6133  intptr_t offset =
6134      TurboAssembler::RootRegisterOffsetForExternalReference(isolate, limit);
6135  CHECK(is_int32(offset));
6136  LoadU64(destination, MemOperand(kRootRegister, offset));
6137}
6138
6139}  // namespace internal
6140}  // namespace v8
6141
6142#endif  // V8_TARGET_ARCH_S390
6143