1// Copyright 2018 the V8 project authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "src/wasm/jump-table-assembler.h" 6 7#include "src/codegen/assembler-inl.h" 8#include "src/codegen/macro-assembler-inl.h" 9 10namespace v8 { 11namespace internal { 12namespace wasm { 13 14// The implementation is compact enough to implement it inline here. If it gets 15// much bigger, we might want to split it in a separate file per architecture. 16#if V8_TARGET_ARCH_X64 17void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 18 Address lazy_compile_target) { 19 // Use a push, because mov to an extended register takes 6 bytes. 20 pushq_imm32(func_index); // 5 bytes 21 EmitJumpSlot(lazy_compile_target); // 5 bytes 22} 23 24bool JumpTableAssembler::EmitJumpSlot(Address target) { 25 intptr_t displacement = static_cast<intptr_t>( 26 reinterpret_cast<byte*>(target) - pc_ - kNearJmpInstrSize); 27 if (!is_int32(displacement)) return false; 28 near_jmp(displacement, RelocInfo::NO_INFO); // 5 bytes 29 return true; 30} 31 32void JumpTableAssembler::EmitFarJumpSlot(Address target) { 33 Label data; 34 int start_offset = pc_offset(); 35 jmp(Operand(&data)); // 6 bytes 36 Nop(2); // 2 bytes 37 // The data must be properly aligned, so it can be patched atomically (see 38 // {PatchFarJumpSlot}). 39 DCHECK_EQ(start_offset + kSystemPointerSize, pc_offset()); 40 USE(start_offset); 41 bind(&data); 42 dq(target); // 8 bytes 43} 44 45// static 46void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 47 // The slot needs to be pointer-size aligned so we can atomically update it. 48 DCHECK(IsAligned(slot, kSystemPointerSize)); 49 // Offset of the target is at 8 bytes, see {EmitFarJumpSlot}. 50 reinterpret_cast<std::atomic<Address>*>(slot + kSystemPointerSize) 51 ->store(target, std::memory_order_relaxed); 52 // The update is atomic because the address is properly aligned. 53 // Because of cache coherence, the data update will eventually be seen by all 54 // cores. It's ok if they temporarily jump to the old target. 55} 56 57void JumpTableAssembler::NopBytes(int bytes) { 58 DCHECK_LE(0, bytes); 59 Nop(bytes); 60} 61 62#elif V8_TARGET_ARCH_IA32 63void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 64 Address lazy_compile_target) { 65 mov(kWasmCompileLazyFuncIndexRegister, func_index); // 5 bytes 66 jmp(lazy_compile_target, RelocInfo::NO_INFO); // 5 bytes 67} 68 69bool JumpTableAssembler::EmitJumpSlot(Address target) { 70 jmp(target, RelocInfo::NO_INFO); 71 return true; 72} 73 74void JumpTableAssembler::EmitFarJumpSlot(Address target) { 75 jmp(target, RelocInfo::NO_INFO); 76} 77 78// static 79void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 80 UNREACHABLE(); 81} 82 83void JumpTableAssembler::NopBytes(int bytes) { 84 DCHECK_LE(0, bytes); 85 Nop(bytes); 86} 87 88#elif V8_TARGET_ARCH_ARM 89void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 90 Address lazy_compile_target) { 91 // Load function index to a register. 92 // This generates [movw, movt] on ARMv7 and later, [ldr, constant pool marker, 93 // constant] on ARMv6. 94 Move32BitImmediate(kWasmCompileLazyFuncIndexRegister, Operand(func_index)); 95 // EmitJumpSlot emits either [b], [movw, movt, mov] (ARMv7+), or [ldr, 96 // constant]. 97 // In total, this is <=5 instructions on all architectures. 98 // TODO(arm): Optimize this for code size; lazy compile is not performance 99 // critical, as it's only executed once per function. 100 EmitJumpSlot(lazy_compile_target); 101} 102 103bool JumpTableAssembler::EmitJumpSlot(Address target) { 104 // Note that {Move32BitImmediate} emits [ldr, constant] for the relocation 105 // mode used below, we need this to allow concurrent patching of this slot. 106 Move32BitImmediate(pc, Operand(target, RelocInfo::WASM_CALL)); 107 CheckConstPool(true, false); // force emit of const pool 108 return true; 109} 110 111void JumpTableAssembler::EmitFarJumpSlot(Address target) { 112 // Load from [pc + kInstrSize] to pc. Note that {pc} points two instructions 113 // after the currently executing one. 114 ldr_pcrel(pc, -kInstrSize); // 1 instruction 115 dd(target); // 4 bytes (== 1 instruction) 116 STATIC_ASSERT(kInstrSize == kInt32Size); 117 STATIC_ASSERT(kFarJumpTableSlotSize == 2 * kInstrSize); 118} 119 120// static 121void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 122 UNREACHABLE(); 123} 124 125void JumpTableAssembler::NopBytes(int bytes) { 126 DCHECK_LE(0, bytes); 127 DCHECK_EQ(0, bytes % kInstrSize); 128 for (; bytes > 0; bytes -= kInstrSize) { 129 nop(); 130 } 131} 132 133#elif V8_TARGET_ARCH_ARM64 134void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 135 Address lazy_compile_target) { 136 int start = pc_offset(); 137 CodeEntry(); // 0-1 instr 138 Mov(kWasmCompileLazyFuncIndexRegister.W(), func_index); // 1-2 instr 139 Jump(lazy_compile_target, RelocInfo::NO_INFO); // 1 instr 140 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset(); 141 DCHECK(nop_bytes == 0 || nop_bytes == kInstrSize); 142 if (nop_bytes) nop(); 143} 144 145bool JumpTableAssembler::EmitJumpSlot(Address target) { 146 if (!TurboAssembler::IsNearCallOffset( 147 (reinterpret_cast<byte*>(target) - pc_) / kInstrSize)) { 148 return false; 149 } 150 151 CodeEntry(); 152 153 Jump(target, RelocInfo::NO_INFO); 154 return true; 155} 156 157void JumpTableAssembler::EmitFarJumpSlot(Address target) { 158 // This code uses hard-coded registers and instructions (and avoids 159 // {UseScratchRegisterScope} or {InstructionAccurateScope}) because this code 160 // will only be called for the very specific runtime slot table, and we want 161 // to have maximum control over the generated code. 162 // Do not reuse this code without validating that the same assumptions hold. 163 CodeEntry(); // 0-1 instructions 164 constexpr Register kTmpReg = x16; 165 DCHECK(TmpList()->IncludesAliasOf(kTmpReg)); 166 int kOffset = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 3 : 2; 167 // Load from [pc + kOffset * kInstrSize] to {kTmpReg}, then branch there. 168 ldr_pcrel(kTmpReg, kOffset); // 1 instruction 169 br(kTmpReg); // 1 instruction 170#ifdef V8_ENABLE_CONTROL_FLOW_INTEGRITY 171 nop(); // To keep the target below aligned to kSystemPointerSize. 172#endif 173 dq(target); // 8 bytes (== 2 instructions) 174 STATIC_ASSERT(2 * kInstrSize == kSystemPointerSize); 175 const int kSlotCount = ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 6 : 4; 176 STATIC_ASSERT(kFarJumpTableSlotSize == kSlotCount * kInstrSize); 177} 178 179// static 180void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 181 // See {EmitFarJumpSlot} for the offset of the target (16 bytes with 182 // CFI enabled, 8 bytes otherwise). 183 int kTargetOffset = 184 ENABLE_CONTROL_FLOW_INTEGRITY_BOOL ? 4 * kInstrSize : 2 * kInstrSize; 185 // The slot needs to be pointer-size aligned so we can atomically update it. 186 DCHECK(IsAligned(slot + kTargetOffset, kSystemPointerSize)); 187 reinterpret_cast<std::atomic<Address>*>(slot + kTargetOffset) 188 ->store(target, std::memory_order_relaxed); 189 // The data update is guaranteed to be atomic since it's a properly aligned 190 // and stores a single machine word. This update will eventually be observed 191 // by any concurrent [ldr] on the same address because of the data cache 192 // coherence. It's ok if other cores temporarily jump to the old target. 193} 194 195void JumpTableAssembler::NopBytes(int bytes) { 196 DCHECK_LE(0, bytes); 197 DCHECK_EQ(0, bytes % kInstrSize); 198 for (; bytes > 0; bytes -= kInstrSize) { 199 nop(); 200 } 201} 202 203#elif V8_TARGET_ARCH_S390X 204void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 205 Address lazy_compile_target) { 206 // Load function index to r7. 6 bytes 207 lgfi(kWasmCompileLazyFuncIndexRegister, Operand(func_index)); 208 // Jump to {lazy_compile_target}. 6 bytes or 12 bytes 209 mov(r1, Operand(lazy_compile_target, RelocInfo::CODE_TARGET)); 210 b(r1); // 2 bytes 211} 212 213bool JumpTableAssembler::EmitJumpSlot(Address target) { 214 intptr_t relative_target = reinterpret_cast<byte*>(target) - pc_; 215 216 if (!is_int32(relative_target / 2)) { 217 return false; 218 } 219 220 brcl(al, Operand(relative_target / 2)); 221 nop(0); // make the slot align to 8 bytes 222 return true; 223} 224 225void JumpTableAssembler::EmitFarJumpSlot(Address target) { 226 Label target_addr; 227 lgrl(ip, &target_addr); // 6 bytes 228 b(ip); // 8 bytes 229 230 CHECK_EQ(reinterpret_cast<Address>(pc_) & 0x7, 0); // Alignment 231 bind(&target_addr); 232 dp(target); 233} 234 235// static 236void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 237 Address target_addr = slot + 8; 238 reinterpret_cast<std::atomic<Address>*>(target_addr) 239 ->store(target, std::memory_order_relaxed); 240} 241 242void JumpTableAssembler::NopBytes(int bytes) { 243 DCHECK_LE(0, bytes); 244 DCHECK_EQ(0, bytes % 2); 245 for (; bytes > 0; bytes -= 2) { 246 nop(0); 247 } 248} 249 250#elif V8_TARGET_ARCH_MIPS || V8_TARGET_ARCH_MIPS64 251void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 252 Address lazy_compile_target) { 253 int start = pc_offset(); 254 li(kWasmCompileLazyFuncIndexRegister, func_index); // max. 2 instr 255 // Jump produces max. 4 instructions for 32-bit platform 256 // and max. 6 instructions for 64-bit platform. 257 Jump(lazy_compile_target, RelocInfo::NO_INFO); 258 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset(); 259 DCHECK_EQ(nop_bytes % kInstrSize, 0); 260 for (int i = 0; i < nop_bytes; i += kInstrSize) nop(); 261} 262 263bool JumpTableAssembler::EmitJumpSlot(Address target) { 264 PatchAndJump(target); 265 return true; 266} 267 268void JumpTableAssembler::EmitFarJumpSlot(Address target) { 269 JumpToOffHeapInstructionStream(target); 270} 271 272// static 273void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 274 UNREACHABLE(); 275} 276 277void JumpTableAssembler::NopBytes(int bytes) { 278 DCHECK_LE(0, bytes); 279 DCHECK_EQ(0, bytes % kInstrSize); 280 for (; bytes > 0; bytes -= kInstrSize) { 281 nop(); 282 } 283} 284 285#elif V8_TARGET_ARCH_LOONG64 286void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 287 Address lazy_compile_target) { 288 DCHECK(is_int32(func_index)); 289 int start = pc_offset(); 290 li(kWasmCompileLazyFuncIndexRegister, (int32_t)func_index); // max. 2 instr 291 // Jump produces max 4 instructions. 292 Jump(lazy_compile_target, RelocInfo::NO_INFO); 293 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset(); 294 DCHECK_EQ(nop_bytes % kInstrSize, 0); 295 for (int i = 0; i < nop_bytes; i += kInstrSize) nop(); 296} 297bool JumpTableAssembler::EmitJumpSlot(Address target) { 298 PatchAndJump(target); 299 return true; 300} 301void JumpTableAssembler::EmitFarJumpSlot(Address target) { 302 JumpToOffHeapInstructionStream(target); 303} 304void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 305 UNREACHABLE(); 306} 307void JumpTableAssembler::NopBytes(int bytes) { 308 DCHECK_LE(0, bytes); 309 DCHECK_EQ(0, bytes % kInstrSize); 310 for (; bytes > 0; bytes -= kInstrSize) { 311 nop(); 312 } 313} 314 315#elif V8_TARGET_ARCH_PPC64 316void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 317 Address lazy_compile_target) { 318 int start = pc_offset(); 319 // Load function index to register. max 5 instrs 320 mov(kWasmCompileLazyFuncIndexRegister, Operand(func_index)); 321 // Jump to {lazy_compile_target}. max 5 instrs 322 mov(r0, Operand(lazy_compile_target)); 323 mtctr(r0); 324 bctr(); 325 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset(); 326 DCHECK_EQ(nop_bytes % kInstrSize, 0); 327 for (int i = 0; i < nop_bytes; i += kInstrSize) nop(); 328} 329 330bool JumpTableAssembler::EmitJumpSlot(Address target) { 331 intptr_t relative_target = reinterpret_cast<byte*>(target) - pc_; 332 333 if (!is_int26(relative_target)) { 334 return false; 335 } 336 337 b(relative_target, LeaveLK); 338 return true; 339} 340 341void JumpTableAssembler::EmitFarJumpSlot(Address target) { 342 byte* start = pc_; 343 mov(ip, Operand(reinterpret_cast<Address>(start + kFarJumpTableSlotSize - 344 8))); // 5 instr 345 LoadU64(ip, MemOperand(ip)); 346 mtctr(ip); 347 bctr(); 348 byte* end = pc_; 349 int used = end - start; 350 CHECK(used < kFarJumpTableSlotSize - 8); 351 NopBytes(kFarJumpTableSlotSize - 8 - used); 352 CHECK_EQ(reinterpret_cast<Address>(pc_) & 0x7, 0); // Alignment 353 dp(target); 354} 355 356// static 357void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 358 Address target_addr = slot + kFarJumpTableSlotSize - 8; 359 reinterpret_cast<std::atomic<Address>*>(target_addr) 360 ->store(target, std::memory_order_relaxed); 361} 362 363void JumpTableAssembler::NopBytes(int bytes) { 364 DCHECK_LE(0, bytes); 365 DCHECK_EQ(0, bytes % 4); 366 for (; bytes > 0; bytes -= 4) { 367 nop(0); 368 } 369} 370 371#elif V8_TARGET_ARCH_RISCV64 372void JumpTableAssembler::EmitLazyCompileJumpSlot(uint32_t func_index, 373 Address lazy_compile_target) { 374 int start = pc_offset(); 375 li(kWasmCompileLazyFuncIndexRegister, func_index); // max. 2 instr 376 // Jump produces max. 8 instructions (include constant pool and j) 377 Jump(lazy_compile_target, RelocInfo::NO_INFO); 378 int nop_bytes = start + kLazyCompileTableSlotSize - pc_offset(); 379 DCHECK_EQ(nop_bytes % kInstrSize, 0); 380 for (int i = 0; i < nop_bytes; i += kInstrSize) nop(); 381} 382 383bool JumpTableAssembler::EmitJumpSlot(Address target) { 384 PatchAndJump(target); 385 return true; 386} 387 388void JumpTableAssembler::EmitFarJumpSlot(Address target) { 389 UseScratchRegisterScope temp(this); 390 Register rd = temp.Acquire(); 391 auipc(rd, 0); 392 ld(rd, rd, 4 * kInstrSize); 393 Jump(rd); 394 nop(); 395 dq(target); 396} 397 398// static 399void JumpTableAssembler::PatchFarJumpSlot(Address slot, Address target) { 400 UNREACHABLE(); 401} 402 403void JumpTableAssembler::NopBytes(int bytes) { 404 DCHECK_LE(0, bytes); 405 DCHECK_EQ(0, bytes % kInstrSize); 406 for (; bytes > 0; bytes -= kInstrSize) { 407 nop(); 408 } 409} 410 411#else 412#error Unknown architecture. 413#endif 414 415} // namespace wasm 416} // namespace internal 417} // namespace v8 418