1// Copyright 2018 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "src/wasm/jump-table-assembler.h"
6
7#include "src/codegen/assembler-inl.h"
8#include "src/codegen/macro-assembler-inl.h"
9
10namespace v8 {
11namespace internal {
12namespace wasm {
13
14// The implementation is compact enough to implement it inline here. If it gets
15// much bigger, we might want to split it in a separate file per architecture.
16#if V8_TARGET_ARCH_X64
17void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
18                                                 Address lazy_compile_target) {
19  // Use a push, because mov to an extended register takes 6 bytes.
20  pushq_imm32(func_index);            // 5 bytes
21  EmitJumpSlot(lazy_compile_target);  // 5 bytes
22}
23
24bool JumpTableAssembler::EmitJumpSlot(Address target) {
25  intptr_t displacement = static_cast<intptr_t>(
26      reinterpret_cast<byte*>(target) - pc_ - kNearJmpInstrSize);
27  if (!is_int32(displacement)) return false;
28  near_jmp(displacement, RelocInfo::NO_INFO);  // 5 bytes
29  return true;
30}
31
32void JumpTableAssembler::EmitFarJumpSlot(Address target) {
33  Label data;
34  int start_offset = pc_offset();
35  jmp(Operand(&data));  // 6 bytes
36  Nop(2);               // 2 bytes
37  // The data must be properly aligned, so it can be patched atomically (see
38  // {PatchFarJumpSlot}).
39  DCHECK_EQ(start_offset + kSystemPointerSize, pc_offset());
40  USE(start_offset);
41  bind(&data);
42  dq(target);  // 8 bytes
43}
44
45// static
46void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
47  // The slot needs to be pointer-size aligned so we can atomically update it.
48  DCHECK(IsAligned(slot, kSystemPointerSize));
49  // Offset of the target is at 8 bytes, see {EmitFarJumpSlot}.
50  reinterpret_cast<std::atomic<Address>*>(slot + kSystemPointerSize)
51      ->store(target, std::memory_order_relaxed);
52  // The update is atomic because the address is properly aligned.
53  // Because of cache coherence, the data update will eventually be seen by all
54  // cores. It's ok if they temporarily jump to the old target.
55}
56
57void JumpTableAssembler::NopBytes(int bytes) {
58  DCHECK_LE(0, bytes);
59  Nop(bytes);
60}
61
62#elif V8_TARGET_ARCH_IA32
63void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
64                                                 Address lazy_compile_target) {
65  mov(kWasmCompileLazyFuncIndexRegister, func_index);  // 5 bytes
66  jmp(lazy_compile_target, RelocInfo::NO_INFO);        // 5 bytes
67}
68
69bool JumpTableAssembler::EmitJumpSlot(Address target) {
70  jmp(target, RelocInfo::NO_INFO);
71  return true;
72}
73
74void JumpTableAssembler::EmitFarJumpSlot(Address target) {
75  jmp(target, RelocInfo::NO_INFO);
76}
77
78// static
79void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
80  UNREACHABLE();
81}
82
83void JumpTableAssembler::NopBytes(int bytes) {
84  DCHECK_LE(0, bytes);
85  Nop(bytes);
86}
87
88#elif V8_TARGET_ARCH_ARM
89void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
90                                                 Address lazy_compile_target) {
91  // Load function index to a register.
92  // This generates [movw, movt] on ARMv7 and later, [ldr, constant pool marker,
93  // constant] on ARMv6.
94  Move32BitImmediate(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
95  // EmitJumpSlot emits either [b], [movw, movt, mov] (ARMv7+), or [ldr,
96  // constant].
97  // In total, this is <=5 instructions on all architectures.
98  // TODO(arm): Optimize this for code size; lazy compile is not performance
99  // critical, as it's only executed once per function.
100  EmitJumpSlot(lazy_compile_target);
101}
102
103bool JumpTableAssembler::EmitJumpSlot(Address target) {
104  // Note that {Move32BitImmediate} emits [ldr, constant] for the relocation
105  // mode used below, we need this to allow concurrent patching of this slot.
106  Move32BitImmediate(pc, Operand(target, RelocInfo::WASM_CALL));
107  CheckConstPool(true, false);  // force emit of const pool
108  return true;
109}
110
111void JumpTableAssembler::EmitFarJumpSlot(Address target) {
112  // Load from [pc + kInstrSize] to pc. Note that {pc} points two instructions
113  // after the currently executing one.
114  ldr_pcrel(pc, -kInstrSize);  // 1 instruction
115  dd(target);                  // 4 bytes (== 1 instruction)
116  STATIC_ASSERT(kInstrSize == kInt32Size);
117  STATIC_ASSERT(kFarJumpTableSlotSize == 2 * kInstrSize);
118}
119
120// static
121void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
122  UNREACHABLE();
123}
124
125void JumpTableAssembler::NopBytes(int bytes) {
126  DCHECK_LE(0, bytes);
127  DCHECK_EQ(0, bytes % kInstrSize);
128  for (; bytes > 0; bytes -= kInstrSize) {
129    nop();
130  }
131}
132
133#elif V8_TARGET_ARCH_ARM64
134void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
135                                                 Address lazy_compile_target) {
136  int start = pc_offset();
137  CodeEntry();                                             // 0-1 instr
138  Mov(kWasmCompileLazyFuncIndexRegister.W(), func_index);  // 1-2 instr
139  Jump(lazy_compile_target, RelocInfo::NO_INFO);           // 1 instr
140  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
141  DCHECK(nop_bytes == 0 || nop_bytes == kInstrSize);
142  if (nop_bytes) nop();
143}
144
145bool JumpTableAssembler::EmitJumpSlot(Address target) {
146  if (!TurboAssembler::IsNearCallOffset(
147          (reinterpret_cast<byte*>(target) - pc_) / kInstrSize)) {
148    return false;
149  }
150
151  CodeEntry();
152
153  Jump(target, RelocInfo::NO_INFO);
154  return true;
155}
156
157void JumpTableAssembler::EmitFarJumpSlot(Address target) {
158  // This code uses hard-coded registers and instructions (and avoids
159  // {UseScratchRegisterScope} or {InstructionAccurateScope}) because this code
160  // will only be called for the very specific runtime slot table, and we want
161  // to have maximum control over the generated code.
162  // Do not reuse this code without validating that the same assumptions hold.
163  CodeEntry();  // 0-1 instructions
164  constexpr Register kTmpReg = x16;
165  DCHECK(TmpList()->IncludesAliasOf(kTmpReg));
166  int kOffset = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 3 : 2;
167  // Load from [pc + kOffset * kInstrSize] to {kTmpReg}, then branch there.
168  ldr_pcrel(kTmpReg, kOffset);  // 1 instruction
169  br(kTmpReg);                  // 1 instruction
170#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY
171  nop();       // To keep the target below aligned to kSystemPointerSize.
172#endif
173  dq(target);  // 8 bytes (== 2 instructions)
174  STATIC_ASSERT(2 * kInstrSize == kSystemPointerSize);
175  const int kSlotCount = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 6 : 4;
176  STATIC_ASSERT(kFarJumpTableSlotSize == kSlotCount * kInstrSize);
177}
178
179// static
180void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
181  // See {EmitFarJumpSlot} for the offset of the target (16 bytes with
182  // CFI enabled, 8 bytes otherwise).
183  int kTargetOffset =
184      ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 4 * kInstrSize : 2 * kInstrSize;
185  // The slot needs to be pointer-size aligned so we can atomically update it.
186  DCHECK(IsAligned(slot + kTargetOffset, kSystemPointerSize));
187  reinterpret_cast<std::atomic<Address>*>(slot + kTargetOffset)
188      ->store(target, std::memory_order_relaxed);
189  // The data update is guaranteed to be atomic since it's a properly aligned
190  // and stores a single machine word. This update will eventually be observed
191  // by any concurrent [ldr] on the same address because of the data cache
192  // coherence. It's ok if other cores temporarily jump to the old target.
193}
194
195void JumpTableAssembler::NopBytes(int bytes) {
196  DCHECK_LE(0, bytes);
197  DCHECK_EQ(0, bytes % kInstrSize);
198  for (; bytes > 0; bytes -= kInstrSize) {
199    nop();
200  }
201}
202
203#elif V8_TARGET_ARCH_S390X
204void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
205                                                 Address lazy_compile_target) {
206  // Load function index to r7. 6 bytes
207  lgfi(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
208  // Jump to {lazy_compile_target}. 6 bytes or 12 bytes
209  mov(r1, Operand(lazy_compile_target, RelocInfo::CODE_TARGET));
210  b(r1);  // 2 bytes
211}
212
213bool JumpTableAssembler::EmitJumpSlot(Address target) {
214  intptr_t relative_target = reinterpret_cast<byte*>(target) - pc_;
215
216  if (!is_int32(relative_target / 2)) {
217    return false;
218  }
219
220  brcl(al, Operand(relative_target / 2));
221  nop(0);  // make the slot align to 8 bytes
222  return true;
223}
224
225void JumpTableAssembler::EmitFarJumpSlot(Address target) {
226  Label target_addr;
227  lgrl(ip, &target_addr);  // 6 bytes
228  b(ip);                   // 8 bytes
229
230  CHECK_EQ(reinterpret_cast<Address>(pc_) & 0x7, 0);  // Alignment
231  bind(&target_addr);
232  dp(target);
233}
234
235// static
236void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
237  Address target_addr = slot + 8;
238  reinterpret_cast<std::atomic<Address>*>(target_addr)
239      ->store(target, std::memory_order_relaxed);
240}
241
242void JumpTableAssembler::NopBytes(int bytes) {
243  DCHECK_LE(0, bytes);
244  DCHECK_EQ(0, bytes % 2);
245  for (; bytes > 0; bytes -= 2) {
246    nop(0);
247  }
248}
249
250#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64
251void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
252                                                 Address lazy_compile_target) {
253  int start = pc_offset();
254  li(kWasmCompileLazyFuncIndexRegister, func_index);  // max. 2 instr
255  // Jump produces max. 4 instructions for 32-bit platform
256  // and max. 6 instructions for 64-bit platform.
257  Jump(lazy_compile_target, RelocInfo::NO_INFO);
258  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
259  DCHECK_EQ(nop_bytes % kInstrSize, 0);
260  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
261}
262
263bool JumpTableAssembler::EmitJumpSlot(Address target) {
264  PatchAndJump(target);
265  return true;
266}
267
268void JumpTableAssembler::EmitFarJumpSlot(Address target) {
269  JumpToOffHeapInstructionStream(target);
270}
271
272// static
273void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
274  UNREACHABLE();
275}
276
277void JumpTableAssembler::NopBytes(int bytes) {
278  DCHECK_LE(0, bytes);
279  DCHECK_EQ(0, bytes % kInstrSize);
280  for (; bytes > 0; bytes -= kInstrSize) {
281    nop();
282  }
283}
284
285#elif V8_TARGET_ARCH_LOONG64
286void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
287                                                 Address lazy_compile_target) {
288  DCHECK(is_int32(func_index));
289  int start = pc_offset();
290  li(kWasmCompileLazyFuncIndexRegister, (int32_t)func_index);  // max. 2 instr
291  // Jump produces max 4 instructions.
292  Jump(lazy_compile_target, RelocInfo::NO_INFO);
293  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
294  DCHECK_EQ(nop_bytes % kInstrSize, 0);
295  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
296}
297bool JumpTableAssembler::EmitJumpSlot(Address target) {
298  PatchAndJump(target);
299  return true;
300}
301void JumpTableAssembler::EmitFarJumpSlot(Address target) {
302  JumpToOffHeapInstructionStream(target);
303}
304void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
305  UNREACHABLE();
306}
307void JumpTableAssembler::NopBytes(int bytes) {
308  DCHECK_LE(0, bytes);
309  DCHECK_EQ(0, bytes % kInstrSize);
310  for (; bytes > 0; bytes -= kInstrSize) {
311    nop();
312  }
313}
314
315#elif V8_TARGET_ARCH_PPC64
316void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
317                                                 Address lazy_compile_target) {
318  int start = pc_offset();
319  // Load function index to register. max 5 instrs
320  mov(kWasmCompileLazyFuncIndexRegister, Operand(func_index));
321  // Jump to {lazy_compile_target}. max 5 instrs
322  mov(r0, Operand(lazy_compile_target));
323  mtctr(r0);
324  bctr();
325  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
326  DCHECK_EQ(nop_bytes % kInstrSize, 0);
327  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
328}
329
330bool JumpTableAssembler::EmitJumpSlot(Address target) {
331  intptr_t relative_target = reinterpret_cast<byte*>(target) - pc_;
332
333  if (!is_int26(relative_target)) {
334    return false;
335  }
336
337  b(relative_target, LeaveLK);
338  return true;
339}
340
341void JumpTableAssembler::EmitFarJumpSlot(Address target) {
342  byte* start = pc_;
343  mov(ip, Operand(reinterpret_cast<Address>(start + kFarJumpTableSlotSize -
344                                            8)));  // 5 instr
345  LoadU64(ip, MemOperand(ip));
346  mtctr(ip);
347  bctr();
348  byte* end = pc_;
349  int used = end - start;
350  CHECK(used < kFarJumpTableSlotSize - 8);
351  NopBytes(kFarJumpTableSlotSize - 8 - used);
352  CHECK_EQ(reinterpret_cast<Address>(pc_) & 0x7, 0);  // Alignment
353  dp(target);
354}
355
356// static
357void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
358  Address target_addr = slot + kFarJumpTableSlotSize - 8;
359  reinterpret_cast<std::atomic<Address>*>(target_addr)
360      ->store(target, std::memory_order_relaxed);
361}
362
363void JumpTableAssembler::NopBytes(int bytes) {
364  DCHECK_LE(0, bytes);
365  DCHECK_EQ(0, bytes % 4);
366  for (; bytes > 0; bytes -= 4) {
367    nop(0);
368  }
369}
370
371#elif V8_TARGET_ARCH_RISCV64
372void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index,
373                                                 Address lazy_compile_target) {
374  int start = pc_offset();
375  li(kWasmCompileLazyFuncIndexRegister, func_index);  // max. 2 instr
376  // Jump produces max. 8 instructions (include constant pool and j)
377  Jump(lazy_compile_target, RelocInfo::NO_INFO);
378  int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset();
379  DCHECK_EQ(nop_bytes % kInstrSize, 0);
380  for (int i = 0; i < nop_bytes; i += kInstrSize) nop();
381}
382
383bool JumpTableAssembler::EmitJumpSlot(Address target) {
384  PatchAndJump(target);
385  return true;
386}
387
388void JumpTableAssembler::EmitFarJumpSlot(Address target) {
389  UseScratchRegisterScope temp(this);
390  Register rd = temp.Acquire();
391  auipc(rd, 0);
392  ld(rd, rd, 4 * kInstrSize);
393  Jump(rd);
394  nop();
395  dq(target);
396}
397
398// static
399void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) {
400  UNREACHABLE();
401}
402
403void JumpTableAssembler::NopBytes(int bytes) {
404  DCHECK_LE(0, bytes);
405  DCHECK_EQ(0, bytes % kInstrSize);
406  for (; bytes > 0; bytes -= kInstrSize) {
407    nop();
408  }
409}
410
411#else
412#error Unknown architecture.
413#endif
414
415}  // namespace wasm
416}  // namespace internal
417}  // namespace v8
418