1// Copyright 2018 the V8 project authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#if !V8_ENABLE_WEBASSEMBLY
6#error This header should only be included if WebAssembly is enabled.
7#endif  // !V8_ENABLE_WEBASSEMBLY
8
9#ifndef V8_WASM_JUMP_TABLE_ASSEMBLER_H_
10#define V8_WASM_JUMP_TABLE_ASSEMBLER_H_
11
12#include "src/codegen/macro-assembler.h"
13
14namespace v8 {
15namespace internal {
16namespace wasm {
17
18// The jump table is the central dispatch point for all (direct and indirect)
19// invocations in WebAssembly. It holds one slot per function in a module, with
20// each slot containing a dispatch to the currently published {WasmCode} that
21// corresponds to the function.
22//
23// Additionally to this main jump table, there exist special jump tables for
24// other purposes:
25// - the far stub table contains one entry per wasm runtime stub (see
26//   {WasmCode::RuntimeStubId}, which jumps to the corresponding embedded
27//   builtin, plus (if not the full address space can be reached via the jump
28//   table) one entry per wasm function.
29// - the lazy compile table contains one entry per wasm function which jumps to
30//   the common {WasmCompileLazy} builtin and passes the function index that was
31//   invoked.
32//
33// The main jump table is split into lines of fixed size, with lines laid out
34// consecutively within the executable memory of the {NativeModule}. The slots
35// in turn are consecutive within a line, but do not cross line boundaries.
36//
37//   +- L1 -------------------+ +- L2 -------------------+ +- L3 ...
38//   | S1 | S2 | ... | Sn | x | | S1 | S2 | ... | Sn | x | | S1  ...
39//   +------------------------+ +------------------------+ +---- ...
40//
41// The above illustrates jump table lines {Li} containing slots {Si} with each
42// line containing {n} slots and some padding {x} for alignment purposes.
43// Other jump tables are just consecutive.
44//
45// The main jump table will be patched concurrently while other threads execute
46// it. The code at the new target might also have been emitted concurrently, so
47// we need to ensure that there is proper synchronization between code emission,
48// jump table patching and code execution.
49// On Intel platforms, this all works out of the box because there is cache
50// coherency between i-cache and d-cache.
51// On ARM, it is safe because the i-cache flush after code emission executes an
52// "ic ivau" (Instruction Cache line Invalidate by Virtual Address to Point of
53// Unification), which broadcasts to all cores. A core which sees the jump table
54// update thus also sees the new code. Since the other core does not explicitly
55// execute an "isb" (Instruction Synchronization Barrier), it might still
56// execute the old code afterwards, which is no problem, since that code remains
57// available until it is garbage collected. Garbage collection itself is a
58// synchronization barrier though.
59class V8_EXPORT_PRIVATE JumpTableAssembler : public MacroAssembler {
60 public:
61  // Translate an offset into the continuous jump table to a jump table index.
62  static uint32_t SlotOffsetToIndex(uint32_t slot_offset) {
63    uint32_t line_index = slot_offset / kJumpTableLineSize;
64    uint32_t line_offset = slot_offset % kJumpTableLineSize;
65    DCHECK_EQ(0, line_offset % kJumpTableSlotSize);
66    return line_index * kJumpTableSlotsPerLine +
67           line_offset / kJumpTableSlotSize;
68  }
69
70  // Translate a jump table index to an offset into the continuous jump table.
71  static uint32_t JumpSlotIndexToOffset(uint32_t slot_index) {
72    uint32_t line_index = slot_index / kJumpTableSlotsPerLine;
73    uint32_t line_offset =
74        (slot_index % kJumpTableSlotsPerLine) * kJumpTableSlotSize;
75    return line_index * kJumpTableLineSize + line_offset;
76  }
77
78  // Determine the size of a jump table containing the given number of slots.
79  static constexpr uint32_t SizeForNumberOfSlots(uint32_t slot_count) {
80    return ((slot_count + kJumpTableSlotsPerLine - 1) /
81            kJumpTableSlotsPerLine) *
82           kJumpTableLineSize;
83  }
84
85  // Translate a far jump table index to an offset into the table.
86  static uint32_t FarJumpSlotIndexToOffset(uint32_t slot_index) {
87    return slot_index * kFarJumpTableSlotSize;
88  }
89
90  // Translate a far jump table offset to the index into the table.
91  static uint32_t FarJumpSlotOffsetToIndex(uint32_t offset) {
92    DCHECK_EQ(0, offset % kFarJumpTableSlotSize);
93    return offset / kFarJumpTableSlotSize;
94  }
95
96  // Determine the size of a far jump table containing the given number of
97  // slots.
98  static constexpr uint32_t SizeForNumberOfFarJumpSlots(
99      int num_runtime_slots, int num_function_slots) {
100    int num_entries = num_runtime_slots + num_function_slots;
101    return num_entries * kFarJumpTableSlotSize;
102  }
103
104  // Translate a slot index to an offset into the lazy compile table.
105  static uint32_t LazyCompileSlotIndexToOffset(uint32_t slot_index) {
106    return slot_index * kLazyCompileTableSlotSize;
107  }
108
109  // Determine the size of a lazy compile table.
110  static constexpr uint32_t SizeForNumberOfLazyFunctions(uint32_t slot_count) {
111    return slot_count * kLazyCompileTableSlotSize;
112  }
113
114  static void GenerateLazyCompileTable(Address base, uint32_t num_slots,
115                                       uint32_t num_imported_functions,
116                                       Address wasm_compile_lazy_target) {
117    uint32_t lazy_compile_table_size = num_slots * kLazyCompileTableSlotSize;
118    // Assume enough space, so the Assembler does not try to grow the buffer.
119    JumpTableAssembler jtasm(base, lazy_compile_table_size + 256);
120    for (uint32_t slot_index = 0; slot_index < num_slots; ++slot_index) {
121      DCHECK_EQ(slot_index * kLazyCompileTableSlotSize, jtasm.pc_offset());
122      jtasm.EmitLazyCompileJumpSlot(slot_index + num_imported_functions,
123                                    wasm_compile_lazy_target);
124    }
125    DCHECK_EQ(lazy_compile_table_size, jtasm.pc_offset());
126    FlushInstructionCache(base, lazy_compile_table_size);
127  }
128
129  static void GenerateFarJumpTable(Address base, Address* stub_targets,
130                                   int num_runtime_slots,
131                                   int num_function_slots) {
132    uint32_t table_size =
133        SizeForNumberOfFarJumpSlots(num_runtime_slots, num_function_slots);
134    // Assume enough space, so the Assembler does not try to grow the buffer.
135    JumpTableAssembler jtasm(base, table_size + 256);
136    int offset = 0;
137    for (int index = 0; index < num_runtime_slots + num_function_slots;
138         ++index) {
139      DCHECK_EQ(offset, FarJumpSlotIndexToOffset(index));
140      // Functions slots initially jump to themselves. They are patched before
141      // being used.
142      Address target =
143          index < num_runtime_slots ? stub_targets[index] : base + offset;
144      jtasm.EmitFarJumpSlot(target);
145      offset += kFarJumpTableSlotSize;
146      DCHECK_EQ(offset, jtasm.pc_offset());
147    }
148    FlushInstructionCache(base, table_size);
149  }
150
151  static void PatchJumpTableSlot(Address jump_table_slot,
152                                 Address far_jump_table_slot, Address target) {
153    // First, try to patch the jump table slot.
154    JumpTableAssembler jtasm(jump_table_slot);
155    if (!jtasm.EmitJumpSlot(target)) {
156      // If that fails, we need to patch the far jump table slot, and then
157      // update the jump table slot to jump to this far jump table slot.
158      DCHECK_NE(kNullAddress, far_jump_table_slot);
159      JumpTableAssembler::PatchFarJumpSlot(far_jump_table_slot, target);
160      CHECK(jtasm.EmitJumpSlot(far_jump_table_slot));
161    }
162    jtasm.NopBytes(kJumpTableSlotSize - jtasm.pc_offset());
163    FlushInstructionCache(jump_table_slot, kJumpTableSlotSize);
164  }
165
166 private:
167  // Instantiate a {JumpTableAssembler} for patching.
168  explicit JumpTableAssembler(Address slot_addr, int size = 256)
169      : MacroAssembler(nullptr, JumpTableAssemblerOptions(),
170                       CodeObjectRequired::kNo,
171                       ExternalAssemblerBuffer(
172                           reinterpret_cast<uint8_t*>(slot_addr), size)) {}
173
174// To allow concurrent patching of the jump table entries, we need to ensure
175// that the instruction containing the call target does not cross cache-line
176// boundaries. The jump table line size has been chosen to satisfy this.
177#if V8_TARGET_ARCH_X64
178  static constexpr int kJumpTableLineSize = 64;
179  static constexpr int kJumpTableSlotSize = 5;
180  static constexpr int kFarJumpTableSlotSize = 16;
181  static constexpr int kLazyCompileTableSlotSize = 10;
182#elif V8_TARGET_ARCH_IA32
183  static constexpr int kJumpTableLineSize = 64;
184  static constexpr int kJumpTableSlotSize = 5;
185  static constexpr int kFarJumpTableSlotSize = 5;
186  static constexpr int kLazyCompileTableSlotSize = 10;
187#elif V8_TARGET_ARCH_ARM
188  static constexpr int kJumpTableLineSize = 3 * kInstrSize;
189  static constexpr int kJumpTableSlotSize = 3 * kInstrSize;
190  static constexpr int kFarJumpTableSlotSize = 2 * kInstrSize;
191  static constexpr int kLazyCompileTableSlotSize = 5 * kInstrSize;
192#elif V8_TARGET_ARCH_ARM64 && V8_ENABLE_CONTROL_FLOW_INTEGRITY
193  static constexpr int kJumpTableLineSize = 2 * kInstrSize;
194  static constexpr int kJumpTableSlotSize = 2 * kInstrSize;
195  static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize;
196  static constexpr int kLazyCompileTableSlotSize = 4 * kInstrSize;
197#elif V8_TARGET_ARCH_ARM64 && !V8_ENABLE_CONTROL_FLOW_INTEGRITY
198  static constexpr int kJumpTableLineSize = 1 * kInstrSize;
199  static constexpr int kJumpTableSlotSize = 1 * kInstrSize;
200  static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize;
201  static constexpr int kLazyCompileTableSlotSize = 3 * kInstrSize;
202#elif V8_TARGET_ARCH_S390X
203  static constexpr int kJumpTableLineSize = 128;
204  static constexpr int kJumpTableSlotSize = 8;
205  static constexpr int kFarJumpTableSlotSize = 16;
206  static constexpr int kLazyCompileTableSlotSize = 20;
207#elif V8_TARGET_ARCH_PPC64
208  static constexpr int kJumpTableLineSize = 64;
209  static constexpr int kJumpTableSlotSize = 1 * kInstrSize;
210  static constexpr int kFarJumpTableSlotSize = 12 * kInstrSize;
211  static constexpr int kLazyCompileTableSlotSize = 12 * kInstrSize;
212#elif V8_TARGET_ARCH_MIPS
213  static constexpr int kJumpTableLineSize = 8 * kInstrSize;
214  static constexpr int kJumpTableSlotSize = 8 * kInstrSize;
215  static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize;
216  static constexpr int kLazyCompileTableSlotSize = 6 * kInstrSize;
217#elif V8_TARGET_ARCH_MIPS64
218  static constexpr int kJumpTableLineSize = 8 * kInstrSize;
219  static constexpr int kJumpTableSlotSize = 8 * kInstrSize;
220  static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize;
221  static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize;
222#elif V8_TARGET_ARCH_RISCV64
223  static constexpr int kJumpTableLineSize = 6 * kInstrSize;
224  static constexpr int kJumpTableSlotSize = 6 * kInstrSize;
225  static constexpr int kFarJumpTableSlotSize = 6 * kInstrSize;
226  static constexpr int kLazyCompileTableSlotSize = 10 * kInstrSize;
227#elif V8_TARGET_ARCH_LOONG64
228  static constexpr int kJumpTableLineSize = 8 * kInstrSize;
229  static constexpr int kJumpTableSlotSize = 8 * kInstrSize;
230  static constexpr int kFarJumpTableSlotSize = 4 * kInstrSize;
231  static constexpr int kLazyCompileTableSlotSize = 8 * kInstrSize;
232#else
233#error Unknown architecture.
234#endif
235
236  static constexpr int kJumpTableSlotsPerLine =
237      kJumpTableLineSize / kJumpTableSlotSize;
238  STATIC_ASSERT(kJumpTableSlotsPerLine >= 1);
239
240  // {JumpTableAssembler} is never used during snapshot generation, and its code
241  // must be independent of the code range of any isolate anyway. Just ensure
242  // that no relocation information is recorded, there is no buffer to store it
243  // since it is instantiated in patching mode in existing code directly.
244  static AssemblerOptions JumpTableAssemblerOptions() {
245    AssemblerOptions options;
246    options.disable_reloc_info_for_patching = true;
247    return options;
248  }
249
250  void EmitLazyCompileJumpSlot(uint32_t func_index,
251                               Address lazy_compile_target);
252
253  // Returns {true} if the jump fits in the jump table slot, {false} otherwise.
254  bool EmitJumpSlot(Address target);
255
256  // Initially emit a far jump slot.
257  void EmitFarJumpSlot(Address target);
258
259  // Patch an existing far jump slot, and make sure that this updated eventually
260  // becomes available to all execution units that might execute this code.
261  static void PatchFarJumpSlot(Address slot, Address target);
262
263  void NopBytes(int bytes);
264};
265
266}  // namespace wasm
267}  // namespace internal
268}  // namespace v8
269
270#endif  // V8_WASM_JUMP_TABLE_ASSEMBLER_H_
271