162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * BPF JIT compiler 462306a36Sopenharmony_ci * 562306a36Sopenharmony_ci * Copyright (C) 2011-2013 Eric Dumazet (eric.dumazet@gmail.com) 662306a36Sopenharmony_ci * Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com 762306a36Sopenharmony_ci */ 862306a36Sopenharmony_ci#include <linux/netdevice.h> 962306a36Sopenharmony_ci#include <linux/filter.h> 1062306a36Sopenharmony_ci#include <linux/if_vlan.h> 1162306a36Sopenharmony_ci#include <linux/bpf.h> 1262306a36Sopenharmony_ci#include <linux/memory.h> 1362306a36Sopenharmony_ci#include <linux/sort.h> 1462306a36Sopenharmony_ci#include <asm/extable.h> 1562306a36Sopenharmony_ci#include <asm/ftrace.h> 1662306a36Sopenharmony_ci#include <asm/set_memory.h> 1762306a36Sopenharmony_ci#include <asm/nospec-branch.h> 1862306a36Sopenharmony_ci#include <asm/text-patching.h> 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_cistatic u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) 2162306a36Sopenharmony_ci{ 2262306a36Sopenharmony_ci if (len == 1) 2362306a36Sopenharmony_ci *ptr = bytes; 2462306a36Sopenharmony_ci else if (len == 2) 2562306a36Sopenharmony_ci *(u16 *)ptr = bytes; 2662306a36Sopenharmony_ci else { 2762306a36Sopenharmony_ci *(u32 *)ptr = bytes; 2862306a36Sopenharmony_ci barrier(); 2962306a36Sopenharmony_ci } 3062306a36Sopenharmony_ci return ptr + len; 3162306a36Sopenharmony_ci} 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci#define EMIT(bytes, len) \ 3462306a36Sopenharmony_ci do { prog = emit_code(prog, bytes, len); } while (0) 3562306a36Sopenharmony_ci 3662306a36Sopenharmony_ci#define EMIT1(b1) EMIT(b1, 1) 3762306a36Sopenharmony_ci#define EMIT2(b1, b2) EMIT((b1) + ((b2) << 8), 2) 3862306a36Sopenharmony_ci#define EMIT3(b1, b2, b3) EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3) 3962306a36Sopenharmony_ci#define EMIT4(b1, b2, b3, b4) EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4) 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci#define EMIT1_off32(b1, off) \ 4262306a36Sopenharmony_ci do { EMIT1(b1); EMIT(off, 4); } while (0) 4362306a36Sopenharmony_ci#define EMIT2_off32(b1, b2, off) \ 4462306a36Sopenharmony_ci do { EMIT2(b1, b2); EMIT(off, 4); } while (0) 4562306a36Sopenharmony_ci#define EMIT3_off32(b1, b2, b3, off) \ 4662306a36Sopenharmony_ci do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0) 4762306a36Sopenharmony_ci#define EMIT4_off32(b1, b2, b3, b4, off) \ 4862306a36Sopenharmony_ci do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0) 4962306a36Sopenharmony_ci 5062306a36Sopenharmony_ci#ifdef CONFIG_X86_KERNEL_IBT 5162306a36Sopenharmony_ci#define EMIT_ENDBR() EMIT(gen_endbr(), 4) 5262306a36Sopenharmony_ci#else 5362306a36Sopenharmony_ci#define EMIT_ENDBR() 5462306a36Sopenharmony_ci#endif 5562306a36Sopenharmony_ci 5662306a36Sopenharmony_cistatic bool is_imm8(int value) 5762306a36Sopenharmony_ci{ 5862306a36Sopenharmony_ci return value <= 127 && value >= -128; 5962306a36Sopenharmony_ci} 6062306a36Sopenharmony_ci 6162306a36Sopenharmony_cistatic bool is_simm32(s64 value) 6262306a36Sopenharmony_ci{ 6362306a36Sopenharmony_ci return value == (s64)(s32)value; 6462306a36Sopenharmony_ci} 6562306a36Sopenharmony_ci 6662306a36Sopenharmony_cistatic bool is_uimm32(u64 value) 6762306a36Sopenharmony_ci{ 6862306a36Sopenharmony_ci return value == (u64)(u32)value; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_ci/* mov dst, src */ 7262306a36Sopenharmony_ci#define EMIT_mov(DST, SRC) \ 7362306a36Sopenharmony_ci do { \ 7462306a36Sopenharmony_ci if (DST != SRC) \ 7562306a36Sopenharmony_ci EMIT3(add_2mod(0x48, DST, SRC), 0x89, add_2reg(0xC0, DST, SRC)); \ 7662306a36Sopenharmony_ci } while (0) 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_cistatic int bpf_size_to_x86_bytes(int bpf_size) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci if (bpf_size == BPF_W) 8162306a36Sopenharmony_ci return 4; 8262306a36Sopenharmony_ci else if (bpf_size == BPF_H) 8362306a36Sopenharmony_ci return 2; 8462306a36Sopenharmony_ci else if (bpf_size == BPF_B) 8562306a36Sopenharmony_ci return 1; 8662306a36Sopenharmony_ci else if (bpf_size == BPF_DW) 8762306a36Sopenharmony_ci return 4; /* imm32 */ 8862306a36Sopenharmony_ci else 8962306a36Sopenharmony_ci return 0; 9062306a36Sopenharmony_ci} 9162306a36Sopenharmony_ci 9262306a36Sopenharmony_ci/* 9362306a36Sopenharmony_ci * List of x86 cond jumps opcodes (. + s8) 9462306a36Sopenharmony_ci * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32) 9562306a36Sopenharmony_ci */ 9662306a36Sopenharmony_ci#define X86_JB 0x72 9762306a36Sopenharmony_ci#define X86_JAE 0x73 9862306a36Sopenharmony_ci#define X86_JE 0x74 9962306a36Sopenharmony_ci#define X86_JNE 0x75 10062306a36Sopenharmony_ci#define X86_JBE 0x76 10162306a36Sopenharmony_ci#define X86_JA 0x77 10262306a36Sopenharmony_ci#define X86_JL 0x7C 10362306a36Sopenharmony_ci#define X86_JGE 0x7D 10462306a36Sopenharmony_ci#define X86_JLE 0x7E 10562306a36Sopenharmony_ci#define X86_JG 0x7F 10662306a36Sopenharmony_ci 10762306a36Sopenharmony_ci/* Pick a register outside of BPF range for JIT internal work */ 10862306a36Sopenharmony_ci#define AUX_REG (MAX_BPF_JIT_REG + 1) 10962306a36Sopenharmony_ci#define X86_REG_R9 (MAX_BPF_JIT_REG + 2) 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci/* 11262306a36Sopenharmony_ci * The following table maps BPF registers to x86-64 registers. 11362306a36Sopenharmony_ci * 11462306a36Sopenharmony_ci * x86-64 register R12 is unused, since if used as base address 11562306a36Sopenharmony_ci * register in load/store instructions, it always needs an 11662306a36Sopenharmony_ci * extra byte of encoding and is callee saved. 11762306a36Sopenharmony_ci * 11862306a36Sopenharmony_ci * x86-64 register R9 is not used by BPF programs, but can be used by BPF 11962306a36Sopenharmony_ci * trampoline. x86-64 register R10 is used for blinding (if enabled). 12062306a36Sopenharmony_ci */ 12162306a36Sopenharmony_cistatic const int reg2hex[] = { 12262306a36Sopenharmony_ci [BPF_REG_0] = 0, /* RAX */ 12362306a36Sopenharmony_ci [BPF_REG_1] = 7, /* RDI */ 12462306a36Sopenharmony_ci [BPF_REG_2] = 6, /* RSI */ 12562306a36Sopenharmony_ci [BPF_REG_3] = 2, /* RDX */ 12662306a36Sopenharmony_ci [BPF_REG_4] = 1, /* RCX */ 12762306a36Sopenharmony_ci [BPF_REG_5] = 0, /* R8 */ 12862306a36Sopenharmony_ci [BPF_REG_6] = 3, /* RBX callee saved */ 12962306a36Sopenharmony_ci [BPF_REG_7] = 5, /* R13 callee saved */ 13062306a36Sopenharmony_ci [BPF_REG_8] = 6, /* R14 callee saved */ 13162306a36Sopenharmony_ci [BPF_REG_9] = 7, /* R15 callee saved */ 13262306a36Sopenharmony_ci [BPF_REG_FP] = 5, /* RBP readonly */ 13362306a36Sopenharmony_ci [BPF_REG_AX] = 2, /* R10 temp register */ 13462306a36Sopenharmony_ci [AUX_REG] = 3, /* R11 temp register */ 13562306a36Sopenharmony_ci [X86_REG_R9] = 1, /* R9 register, 6th function argument */ 13662306a36Sopenharmony_ci}; 13762306a36Sopenharmony_ci 13862306a36Sopenharmony_cistatic const int reg2pt_regs[] = { 13962306a36Sopenharmony_ci [BPF_REG_0] = offsetof(struct pt_regs, ax), 14062306a36Sopenharmony_ci [BPF_REG_1] = offsetof(struct pt_regs, di), 14162306a36Sopenharmony_ci [BPF_REG_2] = offsetof(struct pt_regs, si), 14262306a36Sopenharmony_ci [BPF_REG_3] = offsetof(struct pt_regs, dx), 14362306a36Sopenharmony_ci [BPF_REG_4] = offsetof(struct pt_regs, cx), 14462306a36Sopenharmony_ci [BPF_REG_5] = offsetof(struct pt_regs, r8), 14562306a36Sopenharmony_ci [BPF_REG_6] = offsetof(struct pt_regs, bx), 14662306a36Sopenharmony_ci [BPF_REG_7] = offsetof(struct pt_regs, r13), 14762306a36Sopenharmony_ci [BPF_REG_8] = offsetof(struct pt_regs, r14), 14862306a36Sopenharmony_ci [BPF_REG_9] = offsetof(struct pt_regs, r15), 14962306a36Sopenharmony_ci}; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci/* 15262306a36Sopenharmony_ci * is_ereg() == true if BPF register 'reg' maps to x86-64 r8..r15 15362306a36Sopenharmony_ci * which need extra byte of encoding. 15462306a36Sopenharmony_ci * rax,rcx,...,rbp have simpler encoding 15562306a36Sopenharmony_ci */ 15662306a36Sopenharmony_cistatic bool is_ereg(u32 reg) 15762306a36Sopenharmony_ci{ 15862306a36Sopenharmony_ci return (1 << reg) & (BIT(BPF_REG_5) | 15962306a36Sopenharmony_ci BIT(AUX_REG) | 16062306a36Sopenharmony_ci BIT(BPF_REG_7) | 16162306a36Sopenharmony_ci BIT(BPF_REG_8) | 16262306a36Sopenharmony_ci BIT(BPF_REG_9) | 16362306a36Sopenharmony_ci BIT(X86_REG_R9) | 16462306a36Sopenharmony_ci BIT(BPF_REG_AX)); 16562306a36Sopenharmony_ci} 16662306a36Sopenharmony_ci 16762306a36Sopenharmony_ci/* 16862306a36Sopenharmony_ci * is_ereg_8l() == true if BPF register 'reg' is mapped to access x86-64 16962306a36Sopenharmony_ci * lower 8-bit registers dil,sil,bpl,spl,r8b..r15b, which need extra byte 17062306a36Sopenharmony_ci * of encoding. al,cl,dl,bl have simpler encoding. 17162306a36Sopenharmony_ci */ 17262306a36Sopenharmony_cistatic bool is_ereg_8l(u32 reg) 17362306a36Sopenharmony_ci{ 17462306a36Sopenharmony_ci return is_ereg(reg) || 17562306a36Sopenharmony_ci (1 << reg) & (BIT(BPF_REG_1) | 17662306a36Sopenharmony_ci BIT(BPF_REG_2) | 17762306a36Sopenharmony_ci BIT(BPF_REG_FP)); 17862306a36Sopenharmony_ci} 17962306a36Sopenharmony_ci 18062306a36Sopenharmony_cistatic bool is_axreg(u32 reg) 18162306a36Sopenharmony_ci{ 18262306a36Sopenharmony_ci return reg == BPF_REG_0; 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ci/* Add modifiers if 'reg' maps to x86-64 registers R8..R15 */ 18662306a36Sopenharmony_cistatic u8 add_1mod(u8 byte, u32 reg) 18762306a36Sopenharmony_ci{ 18862306a36Sopenharmony_ci if (is_ereg(reg)) 18962306a36Sopenharmony_ci byte |= 1; 19062306a36Sopenharmony_ci return byte; 19162306a36Sopenharmony_ci} 19262306a36Sopenharmony_ci 19362306a36Sopenharmony_cistatic u8 add_2mod(u8 byte, u32 r1, u32 r2) 19462306a36Sopenharmony_ci{ 19562306a36Sopenharmony_ci if (is_ereg(r1)) 19662306a36Sopenharmony_ci byte |= 1; 19762306a36Sopenharmony_ci if (is_ereg(r2)) 19862306a36Sopenharmony_ci byte |= 4; 19962306a36Sopenharmony_ci return byte; 20062306a36Sopenharmony_ci} 20162306a36Sopenharmony_ci 20262306a36Sopenharmony_ci/* Encode 'dst_reg' register into x86-64 opcode 'byte' */ 20362306a36Sopenharmony_cistatic u8 add_1reg(u8 byte, u32 dst_reg) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci return byte + reg2hex[dst_reg]; 20662306a36Sopenharmony_ci} 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci/* Encode 'dst_reg' and 'src_reg' registers into x86-64 opcode 'byte' */ 20962306a36Sopenharmony_cistatic u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) 21062306a36Sopenharmony_ci{ 21162306a36Sopenharmony_ci return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); 21262306a36Sopenharmony_ci} 21362306a36Sopenharmony_ci 21462306a36Sopenharmony_ci/* Some 1-byte opcodes for binary ALU operations */ 21562306a36Sopenharmony_cistatic u8 simple_alu_opcodes[] = { 21662306a36Sopenharmony_ci [BPF_ADD] = 0x01, 21762306a36Sopenharmony_ci [BPF_SUB] = 0x29, 21862306a36Sopenharmony_ci [BPF_AND] = 0x21, 21962306a36Sopenharmony_ci [BPF_OR] = 0x09, 22062306a36Sopenharmony_ci [BPF_XOR] = 0x31, 22162306a36Sopenharmony_ci [BPF_LSH] = 0xE0, 22262306a36Sopenharmony_ci [BPF_RSH] = 0xE8, 22362306a36Sopenharmony_ci [BPF_ARSH] = 0xF8, 22462306a36Sopenharmony_ci}; 22562306a36Sopenharmony_ci 22662306a36Sopenharmony_cistatic void jit_fill_hole(void *area, unsigned int size) 22762306a36Sopenharmony_ci{ 22862306a36Sopenharmony_ci /* Fill whole space with INT3 instructions */ 22962306a36Sopenharmony_ci memset(area, 0xcc, size); 23062306a36Sopenharmony_ci} 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ciint bpf_arch_text_invalidate(void *dst, size_t len) 23362306a36Sopenharmony_ci{ 23462306a36Sopenharmony_ci return IS_ERR_OR_NULL(text_poke_set(dst, 0xcc, len)); 23562306a36Sopenharmony_ci} 23662306a36Sopenharmony_ci 23762306a36Sopenharmony_cistruct jit_context { 23862306a36Sopenharmony_ci int cleanup_addr; /* Epilogue code offset */ 23962306a36Sopenharmony_ci 24062306a36Sopenharmony_ci /* 24162306a36Sopenharmony_ci * Program specific offsets of labels in the code; these rely on the 24262306a36Sopenharmony_ci * JIT doing at least 2 passes, recording the position on the first 24362306a36Sopenharmony_ci * pass, only to generate the correct offset on the second pass. 24462306a36Sopenharmony_ci */ 24562306a36Sopenharmony_ci int tail_call_direct_label; 24662306a36Sopenharmony_ci int tail_call_indirect_label; 24762306a36Sopenharmony_ci}; 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci/* Maximum number of bytes emitted while JITing one eBPF insn */ 25062306a36Sopenharmony_ci#define BPF_MAX_INSN_SIZE 128 25162306a36Sopenharmony_ci#define BPF_INSN_SAFETY 64 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_ci/* Number of bytes emit_patch() needs to generate instructions */ 25462306a36Sopenharmony_ci#define X86_PATCH_SIZE 5 25562306a36Sopenharmony_ci/* Number of bytes that will be skipped on tailcall */ 25662306a36Sopenharmony_ci#define X86_TAIL_CALL_OFFSET (11 + ENDBR_INSN_SIZE) 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_cistatic void push_callee_regs(u8 **pprog, bool *callee_regs_used) 25962306a36Sopenharmony_ci{ 26062306a36Sopenharmony_ci u8 *prog = *pprog; 26162306a36Sopenharmony_ci 26262306a36Sopenharmony_ci if (callee_regs_used[0]) 26362306a36Sopenharmony_ci EMIT1(0x53); /* push rbx */ 26462306a36Sopenharmony_ci if (callee_regs_used[1]) 26562306a36Sopenharmony_ci EMIT2(0x41, 0x55); /* push r13 */ 26662306a36Sopenharmony_ci if (callee_regs_used[2]) 26762306a36Sopenharmony_ci EMIT2(0x41, 0x56); /* push r14 */ 26862306a36Sopenharmony_ci if (callee_regs_used[3]) 26962306a36Sopenharmony_ci EMIT2(0x41, 0x57); /* push r15 */ 27062306a36Sopenharmony_ci *pprog = prog; 27162306a36Sopenharmony_ci} 27262306a36Sopenharmony_ci 27362306a36Sopenharmony_cistatic void pop_callee_regs(u8 **pprog, bool *callee_regs_used) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci u8 *prog = *pprog; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci if (callee_regs_used[3]) 27862306a36Sopenharmony_ci EMIT2(0x41, 0x5F); /* pop r15 */ 27962306a36Sopenharmony_ci if (callee_regs_used[2]) 28062306a36Sopenharmony_ci EMIT2(0x41, 0x5E); /* pop r14 */ 28162306a36Sopenharmony_ci if (callee_regs_used[1]) 28262306a36Sopenharmony_ci EMIT2(0x41, 0x5D); /* pop r13 */ 28362306a36Sopenharmony_ci if (callee_regs_used[0]) 28462306a36Sopenharmony_ci EMIT1(0x5B); /* pop rbx */ 28562306a36Sopenharmony_ci *pprog = prog; 28662306a36Sopenharmony_ci} 28762306a36Sopenharmony_ci 28862306a36Sopenharmony_ci/* 28962306a36Sopenharmony_ci * Emit x86-64 prologue code for BPF program. 29062306a36Sopenharmony_ci * bpf_tail_call helper will skip the first X86_TAIL_CALL_OFFSET bytes 29162306a36Sopenharmony_ci * while jumping to another program 29262306a36Sopenharmony_ci */ 29362306a36Sopenharmony_cistatic void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf, 29462306a36Sopenharmony_ci bool tail_call_reachable, bool is_subprog) 29562306a36Sopenharmony_ci{ 29662306a36Sopenharmony_ci u8 *prog = *pprog; 29762306a36Sopenharmony_ci 29862306a36Sopenharmony_ci /* BPF trampoline can be made to work without these nops, 29962306a36Sopenharmony_ci * but let's waste 5 bytes for now and optimize later 30062306a36Sopenharmony_ci */ 30162306a36Sopenharmony_ci EMIT_ENDBR(); 30262306a36Sopenharmony_ci memcpy(prog, x86_nops[5], X86_PATCH_SIZE); 30362306a36Sopenharmony_ci prog += X86_PATCH_SIZE; 30462306a36Sopenharmony_ci if (!ebpf_from_cbpf) { 30562306a36Sopenharmony_ci if (tail_call_reachable && !is_subprog) 30662306a36Sopenharmony_ci EMIT2(0x31, 0xC0); /* xor eax, eax */ 30762306a36Sopenharmony_ci else 30862306a36Sopenharmony_ci EMIT2(0x66, 0x90); /* nop2 */ 30962306a36Sopenharmony_ci } 31062306a36Sopenharmony_ci EMIT1(0x55); /* push rbp */ 31162306a36Sopenharmony_ci EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 31262306a36Sopenharmony_ci 31362306a36Sopenharmony_ci /* X86_TAIL_CALL_OFFSET is here */ 31462306a36Sopenharmony_ci EMIT_ENDBR(); 31562306a36Sopenharmony_ci 31662306a36Sopenharmony_ci /* sub rsp, rounded_stack_depth */ 31762306a36Sopenharmony_ci if (stack_depth) 31862306a36Sopenharmony_ci EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8)); 31962306a36Sopenharmony_ci if (tail_call_reachable) 32062306a36Sopenharmony_ci EMIT1(0x50); /* push rax */ 32162306a36Sopenharmony_ci *pprog = prog; 32262306a36Sopenharmony_ci} 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_cistatic int emit_patch(u8 **pprog, void *func, void *ip, u8 opcode) 32562306a36Sopenharmony_ci{ 32662306a36Sopenharmony_ci u8 *prog = *pprog; 32762306a36Sopenharmony_ci s64 offset; 32862306a36Sopenharmony_ci 32962306a36Sopenharmony_ci offset = func - (ip + X86_PATCH_SIZE); 33062306a36Sopenharmony_ci if (!is_simm32(offset)) { 33162306a36Sopenharmony_ci pr_err("Target call %p is out of range\n", func); 33262306a36Sopenharmony_ci return -ERANGE; 33362306a36Sopenharmony_ci } 33462306a36Sopenharmony_ci EMIT1_off32(opcode, offset); 33562306a36Sopenharmony_ci *pprog = prog; 33662306a36Sopenharmony_ci return 0; 33762306a36Sopenharmony_ci} 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_cistatic int emit_call(u8 **pprog, void *func, void *ip) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci return emit_patch(pprog, func, ip, 0xE8); 34262306a36Sopenharmony_ci} 34362306a36Sopenharmony_ci 34462306a36Sopenharmony_cistatic int emit_rsb_call(u8 **pprog, void *func, void *ip) 34562306a36Sopenharmony_ci{ 34662306a36Sopenharmony_ci OPTIMIZER_HIDE_VAR(func); 34762306a36Sopenharmony_ci x86_call_depth_emit_accounting(pprog, func); 34862306a36Sopenharmony_ci return emit_patch(pprog, func, ip, 0xE8); 34962306a36Sopenharmony_ci} 35062306a36Sopenharmony_ci 35162306a36Sopenharmony_cistatic int emit_jump(u8 **pprog, void *func, void *ip) 35262306a36Sopenharmony_ci{ 35362306a36Sopenharmony_ci return emit_patch(pprog, func, ip, 0xE9); 35462306a36Sopenharmony_ci} 35562306a36Sopenharmony_ci 35662306a36Sopenharmony_cistatic int __bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 35762306a36Sopenharmony_ci void *old_addr, void *new_addr) 35862306a36Sopenharmony_ci{ 35962306a36Sopenharmony_ci const u8 *nop_insn = x86_nops[5]; 36062306a36Sopenharmony_ci u8 old_insn[X86_PATCH_SIZE]; 36162306a36Sopenharmony_ci u8 new_insn[X86_PATCH_SIZE]; 36262306a36Sopenharmony_ci u8 *prog; 36362306a36Sopenharmony_ci int ret; 36462306a36Sopenharmony_ci 36562306a36Sopenharmony_ci memcpy(old_insn, nop_insn, X86_PATCH_SIZE); 36662306a36Sopenharmony_ci if (old_addr) { 36762306a36Sopenharmony_ci prog = old_insn; 36862306a36Sopenharmony_ci ret = t == BPF_MOD_CALL ? 36962306a36Sopenharmony_ci emit_call(&prog, old_addr, ip) : 37062306a36Sopenharmony_ci emit_jump(&prog, old_addr, ip); 37162306a36Sopenharmony_ci if (ret) 37262306a36Sopenharmony_ci return ret; 37362306a36Sopenharmony_ci } 37462306a36Sopenharmony_ci 37562306a36Sopenharmony_ci memcpy(new_insn, nop_insn, X86_PATCH_SIZE); 37662306a36Sopenharmony_ci if (new_addr) { 37762306a36Sopenharmony_ci prog = new_insn; 37862306a36Sopenharmony_ci ret = t == BPF_MOD_CALL ? 37962306a36Sopenharmony_ci emit_call(&prog, new_addr, ip) : 38062306a36Sopenharmony_ci emit_jump(&prog, new_addr, ip); 38162306a36Sopenharmony_ci if (ret) 38262306a36Sopenharmony_ci return ret; 38362306a36Sopenharmony_ci } 38462306a36Sopenharmony_ci 38562306a36Sopenharmony_ci ret = -EBUSY; 38662306a36Sopenharmony_ci mutex_lock(&text_mutex); 38762306a36Sopenharmony_ci if (memcmp(ip, old_insn, X86_PATCH_SIZE)) 38862306a36Sopenharmony_ci goto out; 38962306a36Sopenharmony_ci ret = 1; 39062306a36Sopenharmony_ci if (memcmp(ip, new_insn, X86_PATCH_SIZE)) { 39162306a36Sopenharmony_ci text_poke_bp(ip, new_insn, X86_PATCH_SIZE, NULL); 39262306a36Sopenharmony_ci ret = 0; 39362306a36Sopenharmony_ci } 39462306a36Sopenharmony_ciout: 39562306a36Sopenharmony_ci mutex_unlock(&text_mutex); 39662306a36Sopenharmony_ci return ret; 39762306a36Sopenharmony_ci} 39862306a36Sopenharmony_ci 39962306a36Sopenharmony_ciint bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t, 40062306a36Sopenharmony_ci void *old_addr, void *new_addr) 40162306a36Sopenharmony_ci{ 40262306a36Sopenharmony_ci if (!is_kernel_text((long)ip) && 40362306a36Sopenharmony_ci !is_bpf_text_address((long)ip)) 40462306a36Sopenharmony_ci /* BPF poking in modules is not supported */ 40562306a36Sopenharmony_ci return -EINVAL; 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_ci /* 40862306a36Sopenharmony_ci * See emit_prologue(), for IBT builds the trampoline hook is preceded 40962306a36Sopenharmony_ci * with an ENDBR instruction. 41062306a36Sopenharmony_ci */ 41162306a36Sopenharmony_ci if (is_endbr(*(u32 *)ip)) 41262306a36Sopenharmony_ci ip += ENDBR_INSN_SIZE; 41362306a36Sopenharmony_ci 41462306a36Sopenharmony_ci return __bpf_arch_text_poke(ip, t, old_addr, new_addr); 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci#define EMIT_LFENCE() EMIT3(0x0F, 0xAE, 0xE8) 41862306a36Sopenharmony_ci 41962306a36Sopenharmony_cistatic void emit_indirect_jump(u8 **pprog, int reg, u8 *ip) 42062306a36Sopenharmony_ci{ 42162306a36Sopenharmony_ci u8 *prog = *pprog; 42262306a36Sopenharmony_ci 42362306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_RETPOLINE_LFENCE)) { 42462306a36Sopenharmony_ci EMIT_LFENCE(); 42562306a36Sopenharmony_ci EMIT2(0xFF, 0xE0 + reg); 42662306a36Sopenharmony_ci } else if (cpu_feature_enabled(X86_FEATURE_RETPOLINE)) { 42762306a36Sopenharmony_ci OPTIMIZER_HIDE_VAR(reg); 42862306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_CALL_DEPTH)) 42962306a36Sopenharmony_ci emit_jump(&prog, &__x86_indirect_jump_thunk_array[reg], ip); 43062306a36Sopenharmony_ci else 43162306a36Sopenharmony_ci emit_jump(&prog, &__x86_indirect_thunk_array[reg], ip); 43262306a36Sopenharmony_ci } else { 43362306a36Sopenharmony_ci EMIT2(0xFF, 0xE0 + reg); /* jmp *%\reg */ 43462306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_RETPOLINE) || IS_ENABLED(CONFIG_SLS)) 43562306a36Sopenharmony_ci EMIT1(0xCC); /* int3 */ 43662306a36Sopenharmony_ci } 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci *pprog = prog; 43962306a36Sopenharmony_ci} 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_cistatic void emit_return(u8 **pprog, u8 *ip) 44262306a36Sopenharmony_ci{ 44362306a36Sopenharmony_ci u8 *prog = *pprog; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci if (cpu_feature_enabled(X86_FEATURE_RETHUNK)) { 44662306a36Sopenharmony_ci emit_jump(&prog, x86_return_thunk, ip); 44762306a36Sopenharmony_ci } else { 44862306a36Sopenharmony_ci EMIT1(0xC3); /* ret */ 44962306a36Sopenharmony_ci if (IS_ENABLED(CONFIG_SLS)) 45062306a36Sopenharmony_ci EMIT1(0xCC); /* int3 */ 45162306a36Sopenharmony_ci } 45262306a36Sopenharmony_ci 45362306a36Sopenharmony_ci *pprog = prog; 45462306a36Sopenharmony_ci} 45562306a36Sopenharmony_ci 45662306a36Sopenharmony_ci/* 45762306a36Sopenharmony_ci * Generate the following code: 45862306a36Sopenharmony_ci * 45962306a36Sopenharmony_ci * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ... 46062306a36Sopenharmony_ci * if (index >= array->map.max_entries) 46162306a36Sopenharmony_ci * goto out; 46262306a36Sopenharmony_ci * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) 46362306a36Sopenharmony_ci * goto out; 46462306a36Sopenharmony_ci * prog = array->ptrs[index]; 46562306a36Sopenharmony_ci * if (prog == NULL) 46662306a36Sopenharmony_ci * goto out; 46762306a36Sopenharmony_ci * goto *(prog->bpf_func + prologue_size); 46862306a36Sopenharmony_ci * out: 46962306a36Sopenharmony_ci */ 47062306a36Sopenharmony_cistatic void emit_bpf_tail_call_indirect(u8 **pprog, bool *callee_regs_used, 47162306a36Sopenharmony_ci u32 stack_depth, u8 *ip, 47262306a36Sopenharmony_ci struct jit_context *ctx) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci int tcc_off = -4 - round_up(stack_depth, 8); 47562306a36Sopenharmony_ci u8 *prog = *pprog, *start = *pprog; 47662306a36Sopenharmony_ci int offset; 47762306a36Sopenharmony_ci 47862306a36Sopenharmony_ci /* 47962306a36Sopenharmony_ci * rdi - pointer to ctx 48062306a36Sopenharmony_ci * rsi - pointer to bpf_array 48162306a36Sopenharmony_ci * rdx - index in bpf_array 48262306a36Sopenharmony_ci */ 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci /* 48562306a36Sopenharmony_ci * if (index >= array->map.max_entries) 48662306a36Sopenharmony_ci * goto out; 48762306a36Sopenharmony_ci */ 48862306a36Sopenharmony_ci EMIT2(0x89, 0xD2); /* mov edx, edx */ 48962306a36Sopenharmony_ci EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ 49062306a36Sopenharmony_ci offsetof(struct bpf_array, map.max_entries)); 49162306a36Sopenharmony_ci 49262306a36Sopenharmony_ci offset = ctx->tail_call_indirect_label - (prog + 2 - start); 49362306a36Sopenharmony_ci EMIT2(X86_JBE, offset); /* jbe out */ 49462306a36Sopenharmony_ci 49562306a36Sopenharmony_ci /* 49662306a36Sopenharmony_ci * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) 49762306a36Sopenharmony_ci * goto out; 49862306a36Sopenharmony_ci */ 49962306a36Sopenharmony_ci EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ 50062306a36Sopenharmony_ci EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci offset = ctx->tail_call_indirect_label - (prog + 2 - start); 50362306a36Sopenharmony_ci EMIT2(X86_JAE, offset); /* jae out */ 50462306a36Sopenharmony_ci EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 50562306a36Sopenharmony_ci EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ 50662306a36Sopenharmony_ci 50762306a36Sopenharmony_ci /* prog = array->ptrs[index]; */ 50862306a36Sopenharmony_ci EMIT4_off32(0x48, 0x8B, 0x8C, 0xD6, /* mov rcx, [rsi + rdx * 8 + offsetof(...)] */ 50962306a36Sopenharmony_ci offsetof(struct bpf_array, ptrs)); 51062306a36Sopenharmony_ci 51162306a36Sopenharmony_ci /* 51262306a36Sopenharmony_ci * if (prog == NULL) 51362306a36Sopenharmony_ci * goto out; 51462306a36Sopenharmony_ci */ 51562306a36Sopenharmony_ci EMIT3(0x48, 0x85, 0xC9); /* test rcx,rcx */ 51662306a36Sopenharmony_ci 51762306a36Sopenharmony_ci offset = ctx->tail_call_indirect_label - (prog + 2 - start); 51862306a36Sopenharmony_ci EMIT2(X86_JE, offset); /* je out */ 51962306a36Sopenharmony_ci 52062306a36Sopenharmony_ci pop_callee_regs(&prog, callee_regs_used); 52162306a36Sopenharmony_ci 52262306a36Sopenharmony_ci EMIT1(0x58); /* pop rax */ 52362306a36Sopenharmony_ci if (stack_depth) 52462306a36Sopenharmony_ci EMIT3_off32(0x48, 0x81, 0xC4, /* add rsp, sd */ 52562306a36Sopenharmony_ci round_up(stack_depth, 8)); 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci /* goto *(prog->bpf_func + X86_TAIL_CALL_OFFSET); */ 52862306a36Sopenharmony_ci EMIT4(0x48, 0x8B, 0x49, /* mov rcx, qword ptr [rcx + 32] */ 52962306a36Sopenharmony_ci offsetof(struct bpf_prog, bpf_func)); 53062306a36Sopenharmony_ci EMIT4(0x48, 0x83, 0xC1, /* add rcx, X86_TAIL_CALL_OFFSET */ 53162306a36Sopenharmony_ci X86_TAIL_CALL_OFFSET); 53262306a36Sopenharmony_ci /* 53362306a36Sopenharmony_ci * Now we're ready to jump into next BPF program 53462306a36Sopenharmony_ci * rdi == ctx (1st arg) 53562306a36Sopenharmony_ci * rcx == prog->bpf_func + X86_TAIL_CALL_OFFSET 53662306a36Sopenharmony_ci */ 53762306a36Sopenharmony_ci emit_indirect_jump(&prog, 1 /* rcx */, ip + (prog - start)); 53862306a36Sopenharmony_ci 53962306a36Sopenharmony_ci /* out: */ 54062306a36Sopenharmony_ci ctx->tail_call_indirect_label = prog - start; 54162306a36Sopenharmony_ci *pprog = prog; 54262306a36Sopenharmony_ci} 54362306a36Sopenharmony_ci 54462306a36Sopenharmony_cistatic void emit_bpf_tail_call_direct(struct bpf_jit_poke_descriptor *poke, 54562306a36Sopenharmony_ci u8 **pprog, u8 *ip, 54662306a36Sopenharmony_ci bool *callee_regs_used, u32 stack_depth, 54762306a36Sopenharmony_ci struct jit_context *ctx) 54862306a36Sopenharmony_ci{ 54962306a36Sopenharmony_ci int tcc_off = -4 - round_up(stack_depth, 8); 55062306a36Sopenharmony_ci u8 *prog = *pprog, *start = *pprog; 55162306a36Sopenharmony_ci int offset; 55262306a36Sopenharmony_ci 55362306a36Sopenharmony_ci /* 55462306a36Sopenharmony_ci * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT) 55562306a36Sopenharmony_ci * goto out; 55662306a36Sopenharmony_ci */ 55762306a36Sopenharmony_ci EMIT2_off32(0x8B, 0x85, tcc_off); /* mov eax, dword ptr [rbp - tcc_off] */ 55862306a36Sopenharmony_ci EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ 55962306a36Sopenharmony_ci 56062306a36Sopenharmony_ci offset = ctx->tail_call_direct_label - (prog + 2 - start); 56162306a36Sopenharmony_ci EMIT2(X86_JAE, offset); /* jae out */ 56262306a36Sopenharmony_ci EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ 56362306a36Sopenharmony_ci EMIT2_off32(0x89, 0x85, tcc_off); /* mov dword ptr [rbp - tcc_off], eax */ 56462306a36Sopenharmony_ci 56562306a36Sopenharmony_ci poke->tailcall_bypass = ip + (prog - start); 56662306a36Sopenharmony_ci poke->adj_off = X86_TAIL_CALL_OFFSET; 56762306a36Sopenharmony_ci poke->tailcall_target = ip + ctx->tail_call_direct_label - X86_PATCH_SIZE; 56862306a36Sopenharmony_ci poke->bypass_addr = (u8 *)poke->tailcall_target + X86_PATCH_SIZE; 56962306a36Sopenharmony_ci 57062306a36Sopenharmony_ci emit_jump(&prog, (u8 *)poke->tailcall_target + X86_PATCH_SIZE, 57162306a36Sopenharmony_ci poke->tailcall_bypass); 57262306a36Sopenharmony_ci 57362306a36Sopenharmony_ci pop_callee_regs(&prog, callee_regs_used); 57462306a36Sopenharmony_ci EMIT1(0x58); /* pop rax */ 57562306a36Sopenharmony_ci if (stack_depth) 57662306a36Sopenharmony_ci EMIT3_off32(0x48, 0x81, 0xC4, round_up(stack_depth, 8)); 57762306a36Sopenharmony_ci 57862306a36Sopenharmony_ci memcpy(prog, x86_nops[5], X86_PATCH_SIZE); 57962306a36Sopenharmony_ci prog += X86_PATCH_SIZE; 58062306a36Sopenharmony_ci 58162306a36Sopenharmony_ci /* out: */ 58262306a36Sopenharmony_ci ctx->tail_call_direct_label = prog - start; 58362306a36Sopenharmony_ci 58462306a36Sopenharmony_ci *pprog = prog; 58562306a36Sopenharmony_ci} 58662306a36Sopenharmony_ci 58762306a36Sopenharmony_cistatic void bpf_tail_call_direct_fixup(struct bpf_prog *prog) 58862306a36Sopenharmony_ci{ 58962306a36Sopenharmony_ci struct bpf_jit_poke_descriptor *poke; 59062306a36Sopenharmony_ci struct bpf_array *array; 59162306a36Sopenharmony_ci struct bpf_prog *target; 59262306a36Sopenharmony_ci int i, ret; 59362306a36Sopenharmony_ci 59462306a36Sopenharmony_ci for (i = 0; i < prog->aux->size_poke_tab; i++) { 59562306a36Sopenharmony_ci poke = &prog->aux->poke_tab[i]; 59662306a36Sopenharmony_ci if (poke->aux && poke->aux != prog->aux) 59762306a36Sopenharmony_ci continue; 59862306a36Sopenharmony_ci 59962306a36Sopenharmony_ci WARN_ON_ONCE(READ_ONCE(poke->tailcall_target_stable)); 60062306a36Sopenharmony_ci 60162306a36Sopenharmony_ci if (poke->reason != BPF_POKE_REASON_TAIL_CALL) 60262306a36Sopenharmony_ci continue; 60362306a36Sopenharmony_ci 60462306a36Sopenharmony_ci array = container_of(poke->tail_call.map, struct bpf_array, map); 60562306a36Sopenharmony_ci mutex_lock(&array->aux->poke_mutex); 60662306a36Sopenharmony_ci target = array->ptrs[poke->tail_call.key]; 60762306a36Sopenharmony_ci if (target) { 60862306a36Sopenharmony_ci ret = __bpf_arch_text_poke(poke->tailcall_target, 60962306a36Sopenharmony_ci BPF_MOD_JUMP, NULL, 61062306a36Sopenharmony_ci (u8 *)target->bpf_func + 61162306a36Sopenharmony_ci poke->adj_off); 61262306a36Sopenharmony_ci BUG_ON(ret < 0); 61362306a36Sopenharmony_ci ret = __bpf_arch_text_poke(poke->tailcall_bypass, 61462306a36Sopenharmony_ci BPF_MOD_JUMP, 61562306a36Sopenharmony_ci (u8 *)poke->tailcall_target + 61662306a36Sopenharmony_ci X86_PATCH_SIZE, NULL); 61762306a36Sopenharmony_ci BUG_ON(ret < 0); 61862306a36Sopenharmony_ci } 61962306a36Sopenharmony_ci WRITE_ONCE(poke->tailcall_target_stable, true); 62062306a36Sopenharmony_ci mutex_unlock(&array->aux->poke_mutex); 62162306a36Sopenharmony_ci } 62262306a36Sopenharmony_ci} 62362306a36Sopenharmony_ci 62462306a36Sopenharmony_cistatic void emit_mov_imm32(u8 **pprog, bool sign_propagate, 62562306a36Sopenharmony_ci u32 dst_reg, const u32 imm32) 62662306a36Sopenharmony_ci{ 62762306a36Sopenharmony_ci u8 *prog = *pprog; 62862306a36Sopenharmony_ci u8 b1, b2, b3; 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci /* 63162306a36Sopenharmony_ci * Optimization: if imm32 is positive, use 'mov %eax, imm32' 63262306a36Sopenharmony_ci * (which zero-extends imm32) to save 2 bytes. 63362306a36Sopenharmony_ci */ 63462306a36Sopenharmony_ci if (sign_propagate && (s32)imm32 < 0) { 63562306a36Sopenharmony_ci /* 'mov %rax, imm32' sign extends imm32 */ 63662306a36Sopenharmony_ci b1 = add_1mod(0x48, dst_reg); 63762306a36Sopenharmony_ci b2 = 0xC7; 63862306a36Sopenharmony_ci b3 = 0xC0; 63962306a36Sopenharmony_ci EMIT3_off32(b1, b2, add_1reg(b3, dst_reg), imm32); 64062306a36Sopenharmony_ci goto done; 64162306a36Sopenharmony_ci } 64262306a36Sopenharmony_ci 64362306a36Sopenharmony_ci /* 64462306a36Sopenharmony_ci * Optimization: if imm32 is zero, use 'xor %eax, %eax' 64562306a36Sopenharmony_ci * to save 3 bytes. 64662306a36Sopenharmony_ci */ 64762306a36Sopenharmony_ci if (imm32 == 0) { 64862306a36Sopenharmony_ci if (is_ereg(dst_reg)) 64962306a36Sopenharmony_ci EMIT1(add_2mod(0x40, dst_reg, dst_reg)); 65062306a36Sopenharmony_ci b2 = 0x31; /* xor */ 65162306a36Sopenharmony_ci b3 = 0xC0; 65262306a36Sopenharmony_ci EMIT2(b2, add_2reg(b3, dst_reg, dst_reg)); 65362306a36Sopenharmony_ci goto done; 65462306a36Sopenharmony_ci } 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ci /* mov %eax, imm32 */ 65762306a36Sopenharmony_ci if (is_ereg(dst_reg)) 65862306a36Sopenharmony_ci EMIT1(add_1mod(0x40, dst_reg)); 65962306a36Sopenharmony_ci EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); 66062306a36Sopenharmony_cidone: 66162306a36Sopenharmony_ci *pprog = prog; 66262306a36Sopenharmony_ci} 66362306a36Sopenharmony_ci 66462306a36Sopenharmony_cistatic void emit_mov_imm64(u8 **pprog, u32 dst_reg, 66562306a36Sopenharmony_ci const u32 imm32_hi, const u32 imm32_lo) 66662306a36Sopenharmony_ci{ 66762306a36Sopenharmony_ci u8 *prog = *pprog; 66862306a36Sopenharmony_ci 66962306a36Sopenharmony_ci if (is_uimm32(((u64)imm32_hi << 32) | (u32)imm32_lo)) { 67062306a36Sopenharmony_ci /* 67162306a36Sopenharmony_ci * For emitting plain u32, where sign bit must not be 67262306a36Sopenharmony_ci * propagated LLVM tends to load imm64 over mov32 67362306a36Sopenharmony_ci * directly, so save couple of bytes by just doing 67462306a36Sopenharmony_ci * 'mov %eax, imm32' instead. 67562306a36Sopenharmony_ci */ 67662306a36Sopenharmony_ci emit_mov_imm32(&prog, false, dst_reg, imm32_lo); 67762306a36Sopenharmony_ci } else { 67862306a36Sopenharmony_ci /* movabsq rax, imm64 */ 67962306a36Sopenharmony_ci EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); 68062306a36Sopenharmony_ci EMIT(imm32_lo, 4); 68162306a36Sopenharmony_ci EMIT(imm32_hi, 4); 68262306a36Sopenharmony_ci } 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci *pprog = prog; 68562306a36Sopenharmony_ci} 68662306a36Sopenharmony_ci 68762306a36Sopenharmony_cistatic void emit_mov_reg(u8 **pprog, bool is64, u32 dst_reg, u32 src_reg) 68862306a36Sopenharmony_ci{ 68962306a36Sopenharmony_ci u8 *prog = *pprog; 69062306a36Sopenharmony_ci 69162306a36Sopenharmony_ci if (is64) { 69262306a36Sopenharmony_ci /* mov dst, src */ 69362306a36Sopenharmony_ci EMIT_mov(dst_reg, src_reg); 69462306a36Sopenharmony_ci } else { 69562306a36Sopenharmony_ci /* mov32 dst, src */ 69662306a36Sopenharmony_ci if (is_ereg(dst_reg) || is_ereg(src_reg)) 69762306a36Sopenharmony_ci EMIT1(add_2mod(0x40, dst_reg, src_reg)); 69862306a36Sopenharmony_ci EMIT2(0x89, add_2reg(0xC0, dst_reg, src_reg)); 69962306a36Sopenharmony_ci } 70062306a36Sopenharmony_ci 70162306a36Sopenharmony_ci *pprog = prog; 70262306a36Sopenharmony_ci} 70362306a36Sopenharmony_ci 70462306a36Sopenharmony_cistatic void emit_movsx_reg(u8 **pprog, int num_bits, bool is64, u32 dst_reg, 70562306a36Sopenharmony_ci u32 src_reg) 70662306a36Sopenharmony_ci{ 70762306a36Sopenharmony_ci u8 *prog = *pprog; 70862306a36Sopenharmony_ci 70962306a36Sopenharmony_ci if (is64) { 71062306a36Sopenharmony_ci /* movs[b,w,l]q dst, src */ 71162306a36Sopenharmony_ci if (num_bits == 8) 71262306a36Sopenharmony_ci EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbe, 71362306a36Sopenharmony_ci add_2reg(0xC0, src_reg, dst_reg)); 71462306a36Sopenharmony_ci else if (num_bits == 16) 71562306a36Sopenharmony_ci EMIT4(add_2mod(0x48, src_reg, dst_reg), 0x0f, 0xbf, 71662306a36Sopenharmony_ci add_2reg(0xC0, src_reg, dst_reg)); 71762306a36Sopenharmony_ci else if (num_bits == 32) 71862306a36Sopenharmony_ci EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x63, 71962306a36Sopenharmony_ci add_2reg(0xC0, src_reg, dst_reg)); 72062306a36Sopenharmony_ci } else { 72162306a36Sopenharmony_ci /* movs[b,w]l dst, src */ 72262306a36Sopenharmony_ci if (num_bits == 8) { 72362306a36Sopenharmony_ci EMIT4(add_2mod(0x40, src_reg, dst_reg), 0x0f, 0xbe, 72462306a36Sopenharmony_ci add_2reg(0xC0, src_reg, dst_reg)); 72562306a36Sopenharmony_ci } else if (num_bits == 16) { 72662306a36Sopenharmony_ci if (is_ereg(dst_reg) || is_ereg(src_reg)) 72762306a36Sopenharmony_ci EMIT1(add_2mod(0x40, src_reg, dst_reg)); 72862306a36Sopenharmony_ci EMIT3(add_2mod(0x0f, src_reg, dst_reg), 0xbf, 72962306a36Sopenharmony_ci add_2reg(0xC0, src_reg, dst_reg)); 73062306a36Sopenharmony_ci } 73162306a36Sopenharmony_ci } 73262306a36Sopenharmony_ci 73362306a36Sopenharmony_ci *pprog = prog; 73462306a36Sopenharmony_ci} 73562306a36Sopenharmony_ci 73662306a36Sopenharmony_ci/* Emit the suffix (ModR/M etc) for addressing *(ptr_reg + off) and val_reg */ 73762306a36Sopenharmony_cistatic void emit_insn_suffix(u8 **pprog, u32 ptr_reg, u32 val_reg, int off) 73862306a36Sopenharmony_ci{ 73962306a36Sopenharmony_ci u8 *prog = *pprog; 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci if (is_imm8(off)) { 74262306a36Sopenharmony_ci /* 1-byte signed displacement. 74362306a36Sopenharmony_ci * 74462306a36Sopenharmony_ci * If off == 0 we could skip this and save one extra byte, but 74562306a36Sopenharmony_ci * special case of x86 R13 which always needs an offset is not 74662306a36Sopenharmony_ci * worth the hassle 74762306a36Sopenharmony_ci */ 74862306a36Sopenharmony_ci EMIT2(add_2reg(0x40, ptr_reg, val_reg), off); 74962306a36Sopenharmony_ci } else { 75062306a36Sopenharmony_ci /* 4-byte signed displacement */ 75162306a36Sopenharmony_ci EMIT1_off32(add_2reg(0x80, ptr_reg, val_reg), off); 75262306a36Sopenharmony_ci } 75362306a36Sopenharmony_ci *pprog = prog; 75462306a36Sopenharmony_ci} 75562306a36Sopenharmony_ci 75662306a36Sopenharmony_ci/* 75762306a36Sopenharmony_ci * Emit a REX byte if it will be necessary to address these registers 75862306a36Sopenharmony_ci */ 75962306a36Sopenharmony_cistatic void maybe_emit_mod(u8 **pprog, u32 dst_reg, u32 src_reg, bool is64) 76062306a36Sopenharmony_ci{ 76162306a36Sopenharmony_ci u8 *prog = *pprog; 76262306a36Sopenharmony_ci 76362306a36Sopenharmony_ci if (is64) 76462306a36Sopenharmony_ci EMIT1(add_2mod(0x48, dst_reg, src_reg)); 76562306a36Sopenharmony_ci else if (is_ereg(dst_reg) || is_ereg(src_reg)) 76662306a36Sopenharmony_ci EMIT1(add_2mod(0x40, dst_reg, src_reg)); 76762306a36Sopenharmony_ci *pprog = prog; 76862306a36Sopenharmony_ci} 76962306a36Sopenharmony_ci 77062306a36Sopenharmony_ci/* 77162306a36Sopenharmony_ci * Similar version of maybe_emit_mod() for a single register 77262306a36Sopenharmony_ci */ 77362306a36Sopenharmony_cistatic void maybe_emit_1mod(u8 **pprog, u32 reg, bool is64) 77462306a36Sopenharmony_ci{ 77562306a36Sopenharmony_ci u8 *prog = *pprog; 77662306a36Sopenharmony_ci 77762306a36Sopenharmony_ci if (is64) 77862306a36Sopenharmony_ci EMIT1(add_1mod(0x48, reg)); 77962306a36Sopenharmony_ci else if (is_ereg(reg)) 78062306a36Sopenharmony_ci EMIT1(add_1mod(0x40, reg)); 78162306a36Sopenharmony_ci *pprog = prog; 78262306a36Sopenharmony_ci} 78362306a36Sopenharmony_ci 78462306a36Sopenharmony_ci/* LDX: dst_reg = *(u8*)(src_reg + off) */ 78562306a36Sopenharmony_cistatic void emit_ldx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 78662306a36Sopenharmony_ci{ 78762306a36Sopenharmony_ci u8 *prog = *pprog; 78862306a36Sopenharmony_ci 78962306a36Sopenharmony_ci switch (size) { 79062306a36Sopenharmony_ci case BPF_B: 79162306a36Sopenharmony_ci /* Emit 'movzx rax, byte ptr [rax + off]' */ 79262306a36Sopenharmony_ci EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB6); 79362306a36Sopenharmony_ci break; 79462306a36Sopenharmony_ci case BPF_H: 79562306a36Sopenharmony_ci /* Emit 'movzx rax, word ptr [rax + off]' */ 79662306a36Sopenharmony_ci EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xB7); 79762306a36Sopenharmony_ci break; 79862306a36Sopenharmony_ci case BPF_W: 79962306a36Sopenharmony_ci /* Emit 'mov eax, dword ptr [rax+0x14]' */ 80062306a36Sopenharmony_ci if (is_ereg(dst_reg) || is_ereg(src_reg)) 80162306a36Sopenharmony_ci EMIT2(add_2mod(0x40, src_reg, dst_reg), 0x8B); 80262306a36Sopenharmony_ci else 80362306a36Sopenharmony_ci EMIT1(0x8B); 80462306a36Sopenharmony_ci break; 80562306a36Sopenharmony_ci case BPF_DW: 80662306a36Sopenharmony_ci /* Emit 'mov rax, qword ptr [rax+0x14]' */ 80762306a36Sopenharmony_ci EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x8B); 80862306a36Sopenharmony_ci break; 80962306a36Sopenharmony_ci } 81062306a36Sopenharmony_ci emit_insn_suffix(&prog, src_reg, dst_reg, off); 81162306a36Sopenharmony_ci *pprog = prog; 81262306a36Sopenharmony_ci} 81362306a36Sopenharmony_ci 81462306a36Sopenharmony_ci/* LDSX: dst_reg = *(s8*)(src_reg + off) */ 81562306a36Sopenharmony_cistatic void emit_ldsx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 81662306a36Sopenharmony_ci{ 81762306a36Sopenharmony_ci u8 *prog = *pprog; 81862306a36Sopenharmony_ci 81962306a36Sopenharmony_ci switch (size) { 82062306a36Sopenharmony_ci case BPF_B: 82162306a36Sopenharmony_ci /* Emit 'movsx rax, byte ptr [rax + off]' */ 82262306a36Sopenharmony_ci EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBE); 82362306a36Sopenharmony_ci break; 82462306a36Sopenharmony_ci case BPF_H: 82562306a36Sopenharmony_ci /* Emit 'movsx rax, word ptr [rax + off]' */ 82662306a36Sopenharmony_ci EMIT3(add_2mod(0x48, src_reg, dst_reg), 0x0F, 0xBF); 82762306a36Sopenharmony_ci break; 82862306a36Sopenharmony_ci case BPF_W: 82962306a36Sopenharmony_ci /* Emit 'movsx rax, dword ptr [rax+0x14]' */ 83062306a36Sopenharmony_ci EMIT2(add_2mod(0x48, src_reg, dst_reg), 0x63); 83162306a36Sopenharmony_ci break; 83262306a36Sopenharmony_ci } 83362306a36Sopenharmony_ci emit_insn_suffix(&prog, src_reg, dst_reg, off); 83462306a36Sopenharmony_ci *pprog = prog; 83562306a36Sopenharmony_ci} 83662306a36Sopenharmony_ci 83762306a36Sopenharmony_ci/* STX: *(u8*)(dst_reg + off) = src_reg */ 83862306a36Sopenharmony_cistatic void emit_stx(u8 **pprog, u32 size, u32 dst_reg, u32 src_reg, int off) 83962306a36Sopenharmony_ci{ 84062306a36Sopenharmony_ci u8 *prog = *pprog; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_ci switch (size) { 84362306a36Sopenharmony_ci case BPF_B: 84462306a36Sopenharmony_ci /* Emit 'mov byte ptr [rax + off], al' */ 84562306a36Sopenharmony_ci if (is_ereg(dst_reg) || is_ereg_8l(src_reg)) 84662306a36Sopenharmony_ci /* Add extra byte for eregs or SIL,DIL,BPL in src_reg */ 84762306a36Sopenharmony_ci EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x88); 84862306a36Sopenharmony_ci else 84962306a36Sopenharmony_ci EMIT1(0x88); 85062306a36Sopenharmony_ci break; 85162306a36Sopenharmony_ci case BPF_H: 85262306a36Sopenharmony_ci if (is_ereg(dst_reg) || is_ereg(src_reg)) 85362306a36Sopenharmony_ci EMIT3(0x66, add_2mod(0x40, dst_reg, src_reg), 0x89); 85462306a36Sopenharmony_ci else 85562306a36Sopenharmony_ci EMIT2(0x66, 0x89); 85662306a36Sopenharmony_ci break; 85762306a36Sopenharmony_ci case BPF_W: 85862306a36Sopenharmony_ci if (is_ereg(dst_reg) || is_ereg(src_reg)) 85962306a36Sopenharmony_ci EMIT2(add_2mod(0x40, dst_reg, src_reg), 0x89); 86062306a36Sopenharmony_ci else 86162306a36Sopenharmony_ci EMIT1(0x89); 86262306a36Sopenharmony_ci break; 86362306a36Sopenharmony_ci case BPF_DW: 86462306a36Sopenharmony_ci EMIT2(add_2mod(0x48, dst_reg, src_reg), 0x89); 86562306a36Sopenharmony_ci break; 86662306a36Sopenharmony_ci } 86762306a36Sopenharmony_ci emit_insn_suffix(&prog, dst_reg, src_reg, off); 86862306a36Sopenharmony_ci *pprog = prog; 86962306a36Sopenharmony_ci} 87062306a36Sopenharmony_ci 87162306a36Sopenharmony_cistatic int emit_atomic(u8 **pprog, u8 atomic_op, 87262306a36Sopenharmony_ci u32 dst_reg, u32 src_reg, s16 off, u8 bpf_size) 87362306a36Sopenharmony_ci{ 87462306a36Sopenharmony_ci u8 *prog = *pprog; 87562306a36Sopenharmony_ci 87662306a36Sopenharmony_ci EMIT1(0xF0); /* lock prefix */ 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci maybe_emit_mod(&prog, dst_reg, src_reg, bpf_size == BPF_DW); 87962306a36Sopenharmony_ci 88062306a36Sopenharmony_ci /* emit opcode */ 88162306a36Sopenharmony_ci switch (atomic_op) { 88262306a36Sopenharmony_ci case BPF_ADD: 88362306a36Sopenharmony_ci case BPF_AND: 88462306a36Sopenharmony_ci case BPF_OR: 88562306a36Sopenharmony_ci case BPF_XOR: 88662306a36Sopenharmony_ci /* lock *(u32/u64*)(dst_reg + off) <op>= src_reg */ 88762306a36Sopenharmony_ci EMIT1(simple_alu_opcodes[atomic_op]); 88862306a36Sopenharmony_ci break; 88962306a36Sopenharmony_ci case BPF_ADD | BPF_FETCH: 89062306a36Sopenharmony_ci /* src_reg = atomic_fetch_add(dst_reg + off, src_reg); */ 89162306a36Sopenharmony_ci EMIT2(0x0F, 0xC1); 89262306a36Sopenharmony_ci break; 89362306a36Sopenharmony_ci case BPF_XCHG: 89462306a36Sopenharmony_ci /* src_reg = atomic_xchg(dst_reg + off, src_reg); */ 89562306a36Sopenharmony_ci EMIT1(0x87); 89662306a36Sopenharmony_ci break; 89762306a36Sopenharmony_ci case BPF_CMPXCHG: 89862306a36Sopenharmony_ci /* r0 = atomic_cmpxchg(dst_reg + off, r0, src_reg); */ 89962306a36Sopenharmony_ci EMIT2(0x0F, 0xB1); 90062306a36Sopenharmony_ci break; 90162306a36Sopenharmony_ci default: 90262306a36Sopenharmony_ci pr_err("bpf_jit: unknown atomic opcode %02x\n", atomic_op); 90362306a36Sopenharmony_ci return -EFAULT; 90462306a36Sopenharmony_ci } 90562306a36Sopenharmony_ci 90662306a36Sopenharmony_ci emit_insn_suffix(&prog, dst_reg, src_reg, off); 90762306a36Sopenharmony_ci 90862306a36Sopenharmony_ci *pprog = prog; 90962306a36Sopenharmony_ci return 0; 91062306a36Sopenharmony_ci} 91162306a36Sopenharmony_ci 91262306a36Sopenharmony_cibool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs) 91362306a36Sopenharmony_ci{ 91462306a36Sopenharmony_ci u32 reg = x->fixup >> 8; 91562306a36Sopenharmony_ci 91662306a36Sopenharmony_ci /* jump over faulting load and clear dest register */ 91762306a36Sopenharmony_ci *(unsigned long *)((void *)regs + reg) = 0; 91862306a36Sopenharmony_ci regs->ip += x->fixup & 0xff; 91962306a36Sopenharmony_ci return true; 92062306a36Sopenharmony_ci} 92162306a36Sopenharmony_ci 92262306a36Sopenharmony_cistatic void detect_reg_usage(struct bpf_insn *insn, int insn_cnt, 92362306a36Sopenharmony_ci bool *regs_used, bool *tail_call_seen) 92462306a36Sopenharmony_ci{ 92562306a36Sopenharmony_ci int i; 92662306a36Sopenharmony_ci 92762306a36Sopenharmony_ci for (i = 1; i <= insn_cnt; i++, insn++) { 92862306a36Sopenharmony_ci if (insn->code == (BPF_JMP | BPF_TAIL_CALL)) 92962306a36Sopenharmony_ci *tail_call_seen = true; 93062306a36Sopenharmony_ci if (insn->dst_reg == BPF_REG_6 || insn->src_reg == BPF_REG_6) 93162306a36Sopenharmony_ci regs_used[0] = true; 93262306a36Sopenharmony_ci if (insn->dst_reg == BPF_REG_7 || insn->src_reg == BPF_REG_7) 93362306a36Sopenharmony_ci regs_used[1] = true; 93462306a36Sopenharmony_ci if (insn->dst_reg == BPF_REG_8 || insn->src_reg == BPF_REG_8) 93562306a36Sopenharmony_ci regs_used[2] = true; 93662306a36Sopenharmony_ci if (insn->dst_reg == BPF_REG_9 || insn->src_reg == BPF_REG_9) 93762306a36Sopenharmony_ci regs_used[3] = true; 93862306a36Sopenharmony_ci } 93962306a36Sopenharmony_ci} 94062306a36Sopenharmony_ci 94162306a36Sopenharmony_cistatic void emit_nops(u8 **pprog, int len) 94262306a36Sopenharmony_ci{ 94362306a36Sopenharmony_ci u8 *prog = *pprog; 94462306a36Sopenharmony_ci int i, noplen; 94562306a36Sopenharmony_ci 94662306a36Sopenharmony_ci while (len > 0) { 94762306a36Sopenharmony_ci noplen = len; 94862306a36Sopenharmony_ci 94962306a36Sopenharmony_ci if (noplen > ASM_NOP_MAX) 95062306a36Sopenharmony_ci noplen = ASM_NOP_MAX; 95162306a36Sopenharmony_ci 95262306a36Sopenharmony_ci for (i = 0; i < noplen; i++) 95362306a36Sopenharmony_ci EMIT1(x86_nops[noplen][i]); 95462306a36Sopenharmony_ci len -= noplen; 95562306a36Sopenharmony_ci } 95662306a36Sopenharmony_ci 95762306a36Sopenharmony_ci *pprog = prog; 95862306a36Sopenharmony_ci} 95962306a36Sopenharmony_ci 96062306a36Sopenharmony_ci/* emit the 3-byte VEX prefix 96162306a36Sopenharmony_ci * 96262306a36Sopenharmony_ci * r: same as rex.r, extra bit for ModRM reg field 96362306a36Sopenharmony_ci * x: same as rex.x, extra bit for SIB index field 96462306a36Sopenharmony_ci * b: same as rex.b, extra bit for ModRM r/m, or SIB base 96562306a36Sopenharmony_ci * m: opcode map select, encoding escape bytes e.g. 0x0f38 96662306a36Sopenharmony_ci * w: same as rex.w (32 bit or 64 bit) or opcode specific 96762306a36Sopenharmony_ci * src_reg2: additional source reg (encoded as BPF reg) 96862306a36Sopenharmony_ci * l: vector length (128 bit or 256 bit) or reserved 96962306a36Sopenharmony_ci * pp: opcode prefix (none, 0x66, 0xf2 or 0xf3) 97062306a36Sopenharmony_ci */ 97162306a36Sopenharmony_cistatic void emit_3vex(u8 **pprog, bool r, bool x, bool b, u8 m, 97262306a36Sopenharmony_ci bool w, u8 src_reg2, bool l, u8 pp) 97362306a36Sopenharmony_ci{ 97462306a36Sopenharmony_ci u8 *prog = *pprog; 97562306a36Sopenharmony_ci const u8 b0 = 0xc4; /* first byte of 3-byte VEX prefix */ 97662306a36Sopenharmony_ci u8 b1, b2; 97762306a36Sopenharmony_ci u8 vvvv = reg2hex[src_reg2]; 97862306a36Sopenharmony_ci 97962306a36Sopenharmony_ci /* reg2hex gives only the lower 3 bit of vvvv */ 98062306a36Sopenharmony_ci if (is_ereg(src_reg2)) 98162306a36Sopenharmony_ci vvvv |= 1 << 3; 98262306a36Sopenharmony_ci 98362306a36Sopenharmony_ci /* 98462306a36Sopenharmony_ci * 2nd byte of 3-byte VEX prefix 98562306a36Sopenharmony_ci * ~ means bit inverted encoding 98662306a36Sopenharmony_ci * 98762306a36Sopenharmony_ci * 7 0 98862306a36Sopenharmony_ci * +---+---+---+---+---+---+---+---+ 98962306a36Sopenharmony_ci * |~R |~X |~B | m | 99062306a36Sopenharmony_ci * +---+---+---+---+---+---+---+---+ 99162306a36Sopenharmony_ci */ 99262306a36Sopenharmony_ci b1 = (!r << 7) | (!x << 6) | (!b << 5) | (m & 0x1f); 99362306a36Sopenharmony_ci /* 99462306a36Sopenharmony_ci * 3rd byte of 3-byte VEX prefix 99562306a36Sopenharmony_ci * 99662306a36Sopenharmony_ci * 7 0 99762306a36Sopenharmony_ci * +---+---+---+---+---+---+---+---+ 99862306a36Sopenharmony_ci * | W | ~vvvv | L | pp | 99962306a36Sopenharmony_ci * +---+---+---+---+---+---+---+---+ 100062306a36Sopenharmony_ci */ 100162306a36Sopenharmony_ci b2 = (w << 7) | ((~vvvv & 0xf) << 3) | (l << 2) | (pp & 3); 100262306a36Sopenharmony_ci 100362306a36Sopenharmony_ci EMIT3(b0, b1, b2); 100462306a36Sopenharmony_ci *pprog = prog; 100562306a36Sopenharmony_ci} 100662306a36Sopenharmony_ci 100762306a36Sopenharmony_ci/* emit BMI2 shift instruction */ 100862306a36Sopenharmony_cistatic void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op) 100962306a36Sopenharmony_ci{ 101062306a36Sopenharmony_ci u8 *prog = *pprog; 101162306a36Sopenharmony_ci bool r = is_ereg(dst_reg); 101262306a36Sopenharmony_ci u8 m = 2; /* escape code 0f38 */ 101362306a36Sopenharmony_ci 101462306a36Sopenharmony_ci emit_3vex(&prog, r, false, r, m, is64, src_reg, false, op); 101562306a36Sopenharmony_ci EMIT2(0xf7, add_2reg(0xC0, dst_reg, dst_reg)); 101662306a36Sopenharmony_ci *pprog = prog; 101762306a36Sopenharmony_ci} 101862306a36Sopenharmony_ci 101962306a36Sopenharmony_ci#define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp))) 102062306a36Sopenharmony_ci 102162306a36Sopenharmony_ci/* mov rax, qword ptr [rbp - rounded_stack_depth - 8] */ 102262306a36Sopenharmony_ci#define RESTORE_TAIL_CALL_CNT(stack) \ 102362306a36Sopenharmony_ci EMIT3_off32(0x48, 0x8B, 0x85, -round_up(stack, 8) - 8) 102462306a36Sopenharmony_ci 102562306a36Sopenharmony_cistatic int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image, 102662306a36Sopenharmony_ci int oldproglen, struct jit_context *ctx, bool jmp_padding) 102762306a36Sopenharmony_ci{ 102862306a36Sopenharmony_ci bool tail_call_reachable = bpf_prog->aux->tail_call_reachable; 102962306a36Sopenharmony_ci struct bpf_insn *insn = bpf_prog->insnsi; 103062306a36Sopenharmony_ci bool callee_regs_used[4] = {}; 103162306a36Sopenharmony_ci int insn_cnt = bpf_prog->len; 103262306a36Sopenharmony_ci bool tail_call_seen = false; 103362306a36Sopenharmony_ci bool seen_exit = false; 103462306a36Sopenharmony_ci u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY]; 103562306a36Sopenharmony_ci int i, excnt = 0; 103662306a36Sopenharmony_ci int ilen, proglen = 0; 103762306a36Sopenharmony_ci u8 *prog = temp; 103862306a36Sopenharmony_ci int err; 103962306a36Sopenharmony_ci 104062306a36Sopenharmony_ci detect_reg_usage(insn, insn_cnt, callee_regs_used, 104162306a36Sopenharmony_ci &tail_call_seen); 104262306a36Sopenharmony_ci 104362306a36Sopenharmony_ci /* tail call's presence in current prog implies it is reachable */ 104462306a36Sopenharmony_ci tail_call_reachable |= tail_call_seen; 104562306a36Sopenharmony_ci 104662306a36Sopenharmony_ci emit_prologue(&prog, bpf_prog->aux->stack_depth, 104762306a36Sopenharmony_ci bpf_prog_was_classic(bpf_prog), tail_call_reachable, 104862306a36Sopenharmony_ci bpf_prog->aux->func_idx != 0); 104962306a36Sopenharmony_ci push_callee_regs(&prog, callee_regs_used); 105062306a36Sopenharmony_ci 105162306a36Sopenharmony_ci ilen = prog - temp; 105262306a36Sopenharmony_ci if (rw_image) 105362306a36Sopenharmony_ci memcpy(rw_image + proglen, temp, ilen); 105462306a36Sopenharmony_ci proglen += ilen; 105562306a36Sopenharmony_ci addrs[0] = proglen; 105662306a36Sopenharmony_ci prog = temp; 105762306a36Sopenharmony_ci 105862306a36Sopenharmony_ci for (i = 1; i <= insn_cnt; i++, insn++) { 105962306a36Sopenharmony_ci const s32 imm32 = insn->imm; 106062306a36Sopenharmony_ci u32 dst_reg = insn->dst_reg; 106162306a36Sopenharmony_ci u32 src_reg = insn->src_reg; 106262306a36Sopenharmony_ci u8 b2 = 0, b3 = 0; 106362306a36Sopenharmony_ci u8 *start_of_ldx; 106462306a36Sopenharmony_ci s64 jmp_offset; 106562306a36Sopenharmony_ci s16 insn_off; 106662306a36Sopenharmony_ci u8 jmp_cond; 106762306a36Sopenharmony_ci u8 *func; 106862306a36Sopenharmony_ci int nops; 106962306a36Sopenharmony_ci 107062306a36Sopenharmony_ci switch (insn->code) { 107162306a36Sopenharmony_ci /* ALU */ 107262306a36Sopenharmony_ci case BPF_ALU | BPF_ADD | BPF_X: 107362306a36Sopenharmony_ci case BPF_ALU | BPF_SUB | BPF_X: 107462306a36Sopenharmony_ci case BPF_ALU | BPF_AND | BPF_X: 107562306a36Sopenharmony_ci case BPF_ALU | BPF_OR | BPF_X: 107662306a36Sopenharmony_ci case BPF_ALU | BPF_XOR | BPF_X: 107762306a36Sopenharmony_ci case BPF_ALU64 | BPF_ADD | BPF_X: 107862306a36Sopenharmony_ci case BPF_ALU64 | BPF_SUB | BPF_X: 107962306a36Sopenharmony_ci case BPF_ALU64 | BPF_AND | BPF_X: 108062306a36Sopenharmony_ci case BPF_ALU64 | BPF_OR | BPF_X: 108162306a36Sopenharmony_ci case BPF_ALU64 | BPF_XOR | BPF_X: 108262306a36Sopenharmony_ci maybe_emit_mod(&prog, dst_reg, src_reg, 108362306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 108462306a36Sopenharmony_ci b2 = simple_alu_opcodes[BPF_OP(insn->code)]; 108562306a36Sopenharmony_ci EMIT2(b2, add_2reg(0xC0, dst_reg, src_reg)); 108662306a36Sopenharmony_ci break; 108762306a36Sopenharmony_ci 108862306a36Sopenharmony_ci case BPF_ALU64 | BPF_MOV | BPF_X: 108962306a36Sopenharmony_ci case BPF_ALU | BPF_MOV | BPF_X: 109062306a36Sopenharmony_ci if (insn->off == 0) 109162306a36Sopenharmony_ci emit_mov_reg(&prog, 109262306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64, 109362306a36Sopenharmony_ci dst_reg, src_reg); 109462306a36Sopenharmony_ci else 109562306a36Sopenharmony_ci emit_movsx_reg(&prog, insn->off, 109662306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64, 109762306a36Sopenharmony_ci dst_reg, src_reg); 109862306a36Sopenharmony_ci break; 109962306a36Sopenharmony_ci 110062306a36Sopenharmony_ci /* neg dst */ 110162306a36Sopenharmony_ci case BPF_ALU | BPF_NEG: 110262306a36Sopenharmony_ci case BPF_ALU64 | BPF_NEG: 110362306a36Sopenharmony_ci maybe_emit_1mod(&prog, dst_reg, 110462306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 110562306a36Sopenharmony_ci EMIT2(0xF7, add_1reg(0xD8, dst_reg)); 110662306a36Sopenharmony_ci break; 110762306a36Sopenharmony_ci 110862306a36Sopenharmony_ci case BPF_ALU | BPF_ADD | BPF_K: 110962306a36Sopenharmony_ci case BPF_ALU | BPF_SUB | BPF_K: 111062306a36Sopenharmony_ci case BPF_ALU | BPF_AND | BPF_K: 111162306a36Sopenharmony_ci case BPF_ALU | BPF_OR | BPF_K: 111262306a36Sopenharmony_ci case BPF_ALU | BPF_XOR | BPF_K: 111362306a36Sopenharmony_ci case BPF_ALU64 | BPF_ADD | BPF_K: 111462306a36Sopenharmony_ci case BPF_ALU64 | BPF_SUB | BPF_K: 111562306a36Sopenharmony_ci case BPF_ALU64 | BPF_AND | BPF_K: 111662306a36Sopenharmony_ci case BPF_ALU64 | BPF_OR | BPF_K: 111762306a36Sopenharmony_ci case BPF_ALU64 | BPF_XOR | BPF_K: 111862306a36Sopenharmony_ci maybe_emit_1mod(&prog, dst_reg, 111962306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 112062306a36Sopenharmony_ci 112162306a36Sopenharmony_ci /* 112262306a36Sopenharmony_ci * b3 holds 'normal' opcode, b2 short form only valid 112362306a36Sopenharmony_ci * in case dst is eax/rax. 112462306a36Sopenharmony_ci */ 112562306a36Sopenharmony_ci switch (BPF_OP(insn->code)) { 112662306a36Sopenharmony_ci case BPF_ADD: 112762306a36Sopenharmony_ci b3 = 0xC0; 112862306a36Sopenharmony_ci b2 = 0x05; 112962306a36Sopenharmony_ci break; 113062306a36Sopenharmony_ci case BPF_SUB: 113162306a36Sopenharmony_ci b3 = 0xE8; 113262306a36Sopenharmony_ci b2 = 0x2D; 113362306a36Sopenharmony_ci break; 113462306a36Sopenharmony_ci case BPF_AND: 113562306a36Sopenharmony_ci b3 = 0xE0; 113662306a36Sopenharmony_ci b2 = 0x25; 113762306a36Sopenharmony_ci break; 113862306a36Sopenharmony_ci case BPF_OR: 113962306a36Sopenharmony_ci b3 = 0xC8; 114062306a36Sopenharmony_ci b2 = 0x0D; 114162306a36Sopenharmony_ci break; 114262306a36Sopenharmony_ci case BPF_XOR: 114362306a36Sopenharmony_ci b3 = 0xF0; 114462306a36Sopenharmony_ci b2 = 0x35; 114562306a36Sopenharmony_ci break; 114662306a36Sopenharmony_ci } 114762306a36Sopenharmony_ci 114862306a36Sopenharmony_ci if (is_imm8(imm32)) 114962306a36Sopenharmony_ci EMIT3(0x83, add_1reg(b3, dst_reg), imm32); 115062306a36Sopenharmony_ci else if (is_axreg(dst_reg)) 115162306a36Sopenharmony_ci EMIT1_off32(b2, imm32); 115262306a36Sopenharmony_ci else 115362306a36Sopenharmony_ci EMIT2_off32(0x81, add_1reg(b3, dst_reg), imm32); 115462306a36Sopenharmony_ci break; 115562306a36Sopenharmony_ci 115662306a36Sopenharmony_ci case BPF_ALU64 | BPF_MOV | BPF_K: 115762306a36Sopenharmony_ci case BPF_ALU | BPF_MOV | BPF_K: 115862306a36Sopenharmony_ci emit_mov_imm32(&prog, BPF_CLASS(insn->code) == BPF_ALU64, 115962306a36Sopenharmony_ci dst_reg, imm32); 116062306a36Sopenharmony_ci break; 116162306a36Sopenharmony_ci 116262306a36Sopenharmony_ci case BPF_LD | BPF_IMM | BPF_DW: 116362306a36Sopenharmony_ci emit_mov_imm64(&prog, dst_reg, insn[1].imm, insn[0].imm); 116462306a36Sopenharmony_ci insn++; 116562306a36Sopenharmony_ci i++; 116662306a36Sopenharmony_ci break; 116762306a36Sopenharmony_ci 116862306a36Sopenharmony_ci /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ 116962306a36Sopenharmony_ci case BPF_ALU | BPF_MOD | BPF_X: 117062306a36Sopenharmony_ci case BPF_ALU | BPF_DIV | BPF_X: 117162306a36Sopenharmony_ci case BPF_ALU | BPF_MOD | BPF_K: 117262306a36Sopenharmony_ci case BPF_ALU | BPF_DIV | BPF_K: 117362306a36Sopenharmony_ci case BPF_ALU64 | BPF_MOD | BPF_X: 117462306a36Sopenharmony_ci case BPF_ALU64 | BPF_DIV | BPF_X: 117562306a36Sopenharmony_ci case BPF_ALU64 | BPF_MOD | BPF_K: 117662306a36Sopenharmony_ci case BPF_ALU64 | BPF_DIV | BPF_K: { 117762306a36Sopenharmony_ci bool is64 = BPF_CLASS(insn->code) == BPF_ALU64; 117862306a36Sopenharmony_ci 117962306a36Sopenharmony_ci if (dst_reg != BPF_REG_0) 118062306a36Sopenharmony_ci EMIT1(0x50); /* push rax */ 118162306a36Sopenharmony_ci if (dst_reg != BPF_REG_3) 118262306a36Sopenharmony_ci EMIT1(0x52); /* push rdx */ 118362306a36Sopenharmony_ci 118462306a36Sopenharmony_ci if (BPF_SRC(insn->code) == BPF_X) { 118562306a36Sopenharmony_ci if (src_reg == BPF_REG_0 || 118662306a36Sopenharmony_ci src_reg == BPF_REG_3) { 118762306a36Sopenharmony_ci /* mov r11, src_reg */ 118862306a36Sopenharmony_ci EMIT_mov(AUX_REG, src_reg); 118962306a36Sopenharmony_ci src_reg = AUX_REG; 119062306a36Sopenharmony_ci } 119162306a36Sopenharmony_ci } else { 119262306a36Sopenharmony_ci /* mov r11, imm32 */ 119362306a36Sopenharmony_ci EMIT3_off32(0x49, 0xC7, 0xC3, imm32); 119462306a36Sopenharmony_ci src_reg = AUX_REG; 119562306a36Sopenharmony_ci } 119662306a36Sopenharmony_ci 119762306a36Sopenharmony_ci if (dst_reg != BPF_REG_0) 119862306a36Sopenharmony_ci /* mov rax, dst_reg */ 119962306a36Sopenharmony_ci emit_mov_reg(&prog, is64, BPF_REG_0, dst_reg); 120062306a36Sopenharmony_ci 120162306a36Sopenharmony_ci if (insn->off == 0) { 120262306a36Sopenharmony_ci /* 120362306a36Sopenharmony_ci * xor edx, edx 120462306a36Sopenharmony_ci * equivalent to 'xor rdx, rdx', but one byte less 120562306a36Sopenharmony_ci */ 120662306a36Sopenharmony_ci EMIT2(0x31, 0xd2); 120762306a36Sopenharmony_ci 120862306a36Sopenharmony_ci /* div src_reg */ 120962306a36Sopenharmony_ci maybe_emit_1mod(&prog, src_reg, is64); 121062306a36Sopenharmony_ci EMIT2(0xF7, add_1reg(0xF0, src_reg)); 121162306a36Sopenharmony_ci } else { 121262306a36Sopenharmony_ci if (BPF_CLASS(insn->code) == BPF_ALU) 121362306a36Sopenharmony_ci EMIT1(0x99); /* cdq */ 121462306a36Sopenharmony_ci else 121562306a36Sopenharmony_ci EMIT2(0x48, 0x99); /* cqo */ 121662306a36Sopenharmony_ci 121762306a36Sopenharmony_ci /* idiv src_reg */ 121862306a36Sopenharmony_ci maybe_emit_1mod(&prog, src_reg, is64); 121962306a36Sopenharmony_ci EMIT2(0xF7, add_1reg(0xF8, src_reg)); 122062306a36Sopenharmony_ci } 122162306a36Sopenharmony_ci 122262306a36Sopenharmony_ci if (BPF_OP(insn->code) == BPF_MOD && 122362306a36Sopenharmony_ci dst_reg != BPF_REG_3) 122462306a36Sopenharmony_ci /* mov dst_reg, rdx */ 122562306a36Sopenharmony_ci emit_mov_reg(&prog, is64, dst_reg, BPF_REG_3); 122662306a36Sopenharmony_ci else if (BPF_OP(insn->code) == BPF_DIV && 122762306a36Sopenharmony_ci dst_reg != BPF_REG_0) 122862306a36Sopenharmony_ci /* mov dst_reg, rax */ 122962306a36Sopenharmony_ci emit_mov_reg(&prog, is64, dst_reg, BPF_REG_0); 123062306a36Sopenharmony_ci 123162306a36Sopenharmony_ci if (dst_reg != BPF_REG_3) 123262306a36Sopenharmony_ci EMIT1(0x5A); /* pop rdx */ 123362306a36Sopenharmony_ci if (dst_reg != BPF_REG_0) 123462306a36Sopenharmony_ci EMIT1(0x58); /* pop rax */ 123562306a36Sopenharmony_ci break; 123662306a36Sopenharmony_ci } 123762306a36Sopenharmony_ci 123862306a36Sopenharmony_ci case BPF_ALU | BPF_MUL | BPF_K: 123962306a36Sopenharmony_ci case BPF_ALU64 | BPF_MUL | BPF_K: 124062306a36Sopenharmony_ci maybe_emit_mod(&prog, dst_reg, dst_reg, 124162306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 124262306a36Sopenharmony_ci 124362306a36Sopenharmony_ci if (is_imm8(imm32)) 124462306a36Sopenharmony_ci /* imul dst_reg, dst_reg, imm8 */ 124562306a36Sopenharmony_ci EMIT3(0x6B, add_2reg(0xC0, dst_reg, dst_reg), 124662306a36Sopenharmony_ci imm32); 124762306a36Sopenharmony_ci else 124862306a36Sopenharmony_ci /* imul dst_reg, dst_reg, imm32 */ 124962306a36Sopenharmony_ci EMIT2_off32(0x69, 125062306a36Sopenharmony_ci add_2reg(0xC0, dst_reg, dst_reg), 125162306a36Sopenharmony_ci imm32); 125262306a36Sopenharmony_ci break; 125362306a36Sopenharmony_ci 125462306a36Sopenharmony_ci case BPF_ALU | BPF_MUL | BPF_X: 125562306a36Sopenharmony_ci case BPF_ALU64 | BPF_MUL | BPF_X: 125662306a36Sopenharmony_ci maybe_emit_mod(&prog, src_reg, dst_reg, 125762306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 125862306a36Sopenharmony_ci 125962306a36Sopenharmony_ci /* imul dst_reg, src_reg */ 126062306a36Sopenharmony_ci EMIT3(0x0F, 0xAF, add_2reg(0xC0, src_reg, dst_reg)); 126162306a36Sopenharmony_ci break; 126262306a36Sopenharmony_ci 126362306a36Sopenharmony_ci /* Shifts */ 126462306a36Sopenharmony_ci case BPF_ALU | BPF_LSH | BPF_K: 126562306a36Sopenharmony_ci case BPF_ALU | BPF_RSH | BPF_K: 126662306a36Sopenharmony_ci case BPF_ALU | BPF_ARSH | BPF_K: 126762306a36Sopenharmony_ci case BPF_ALU64 | BPF_LSH | BPF_K: 126862306a36Sopenharmony_ci case BPF_ALU64 | BPF_RSH | BPF_K: 126962306a36Sopenharmony_ci case BPF_ALU64 | BPF_ARSH | BPF_K: 127062306a36Sopenharmony_ci maybe_emit_1mod(&prog, dst_reg, 127162306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 127262306a36Sopenharmony_ci 127362306a36Sopenharmony_ci b3 = simple_alu_opcodes[BPF_OP(insn->code)]; 127462306a36Sopenharmony_ci if (imm32 == 1) 127562306a36Sopenharmony_ci EMIT2(0xD1, add_1reg(b3, dst_reg)); 127662306a36Sopenharmony_ci else 127762306a36Sopenharmony_ci EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); 127862306a36Sopenharmony_ci break; 127962306a36Sopenharmony_ci 128062306a36Sopenharmony_ci case BPF_ALU | BPF_LSH | BPF_X: 128162306a36Sopenharmony_ci case BPF_ALU | BPF_RSH | BPF_X: 128262306a36Sopenharmony_ci case BPF_ALU | BPF_ARSH | BPF_X: 128362306a36Sopenharmony_ci case BPF_ALU64 | BPF_LSH | BPF_X: 128462306a36Sopenharmony_ci case BPF_ALU64 | BPF_RSH | BPF_X: 128562306a36Sopenharmony_ci case BPF_ALU64 | BPF_ARSH | BPF_X: 128662306a36Sopenharmony_ci /* BMI2 shifts aren't better when shift count is already in rcx */ 128762306a36Sopenharmony_ci if (boot_cpu_has(X86_FEATURE_BMI2) && src_reg != BPF_REG_4) { 128862306a36Sopenharmony_ci /* shrx/sarx/shlx dst_reg, dst_reg, src_reg */ 128962306a36Sopenharmony_ci bool w = (BPF_CLASS(insn->code) == BPF_ALU64); 129062306a36Sopenharmony_ci u8 op; 129162306a36Sopenharmony_ci 129262306a36Sopenharmony_ci switch (BPF_OP(insn->code)) { 129362306a36Sopenharmony_ci case BPF_LSH: 129462306a36Sopenharmony_ci op = 1; /* prefix 0x66 */ 129562306a36Sopenharmony_ci break; 129662306a36Sopenharmony_ci case BPF_RSH: 129762306a36Sopenharmony_ci op = 3; /* prefix 0xf2 */ 129862306a36Sopenharmony_ci break; 129962306a36Sopenharmony_ci case BPF_ARSH: 130062306a36Sopenharmony_ci op = 2; /* prefix 0xf3 */ 130162306a36Sopenharmony_ci break; 130262306a36Sopenharmony_ci } 130362306a36Sopenharmony_ci 130462306a36Sopenharmony_ci emit_shiftx(&prog, dst_reg, src_reg, w, op); 130562306a36Sopenharmony_ci 130662306a36Sopenharmony_ci break; 130762306a36Sopenharmony_ci } 130862306a36Sopenharmony_ci 130962306a36Sopenharmony_ci if (src_reg != BPF_REG_4) { /* common case */ 131062306a36Sopenharmony_ci /* Check for bad case when dst_reg == rcx */ 131162306a36Sopenharmony_ci if (dst_reg == BPF_REG_4) { 131262306a36Sopenharmony_ci /* mov r11, dst_reg */ 131362306a36Sopenharmony_ci EMIT_mov(AUX_REG, dst_reg); 131462306a36Sopenharmony_ci dst_reg = AUX_REG; 131562306a36Sopenharmony_ci } else { 131662306a36Sopenharmony_ci EMIT1(0x51); /* push rcx */ 131762306a36Sopenharmony_ci } 131862306a36Sopenharmony_ci /* mov rcx, src_reg */ 131962306a36Sopenharmony_ci EMIT_mov(BPF_REG_4, src_reg); 132062306a36Sopenharmony_ci } 132162306a36Sopenharmony_ci 132262306a36Sopenharmony_ci /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ 132362306a36Sopenharmony_ci maybe_emit_1mod(&prog, dst_reg, 132462306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_ALU64); 132562306a36Sopenharmony_ci 132662306a36Sopenharmony_ci b3 = simple_alu_opcodes[BPF_OP(insn->code)]; 132762306a36Sopenharmony_ci EMIT2(0xD3, add_1reg(b3, dst_reg)); 132862306a36Sopenharmony_ci 132962306a36Sopenharmony_ci if (src_reg != BPF_REG_4) { 133062306a36Sopenharmony_ci if (insn->dst_reg == BPF_REG_4) 133162306a36Sopenharmony_ci /* mov dst_reg, r11 */ 133262306a36Sopenharmony_ci EMIT_mov(insn->dst_reg, AUX_REG); 133362306a36Sopenharmony_ci else 133462306a36Sopenharmony_ci EMIT1(0x59); /* pop rcx */ 133562306a36Sopenharmony_ci } 133662306a36Sopenharmony_ci 133762306a36Sopenharmony_ci break; 133862306a36Sopenharmony_ci 133962306a36Sopenharmony_ci case BPF_ALU | BPF_END | BPF_FROM_BE: 134062306a36Sopenharmony_ci case BPF_ALU64 | BPF_END | BPF_FROM_LE: 134162306a36Sopenharmony_ci switch (imm32) { 134262306a36Sopenharmony_ci case 16: 134362306a36Sopenharmony_ci /* Emit 'ror %ax, 8' to swap lower 2 bytes */ 134462306a36Sopenharmony_ci EMIT1(0x66); 134562306a36Sopenharmony_ci if (is_ereg(dst_reg)) 134662306a36Sopenharmony_ci EMIT1(0x41); 134762306a36Sopenharmony_ci EMIT3(0xC1, add_1reg(0xC8, dst_reg), 8); 134862306a36Sopenharmony_ci 134962306a36Sopenharmony_ci /* Emit 'movzwl eax, ax' */ 135062306a36Sopenharmony_ci if (is_ereg(dst_reg)) 135162306a36Sopenharmony_ci EMIT3(0x45, 0x0F, 0xB7); 135262306a36Sopenharmony_ci else 135362306a36Sopenharmony_ci EMIT2(0x0F, 0xB7); 135462306a36Sopenharmony_ci EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 135562306a36Sopenharmony_ci break; 135662306a36Sopenharmony_ci case 32: 135762306a36Sopenharmony_ci /* Emit 'bswap eax' to swap lower 4 bytes */ 135862306a36Sopenharmony_ci if (is_ereg(dst_reg)) 135962306a36Sopenharmony_ci EMIT2(0x41, 0x0F); 136062306a36Sopenharmony_ci else 136162306a36Sopenharmony_ci EMIT1(0x0F); 136262306a36Sopenharmony_ci EMIT1(add_1reg(0xC8, dst_reg)); 136362306a36Sopenharmony_ci break; 136462306a36Sopenharmony_ci case 64: 136562306a36Sopenharmony_ci /* Emit 'bswap rax' to swap 8 bytes */ 136662306a36Sopenharmony_ci EMIT3(add_1mod(0x48, dst_reg), 0x0F, 136762306a36Sopenharmony_ci add_1reg(0xC8, dst_reg)); 136862306a36Sopenharmony_ci break; 136962306a36Sopenharmony_ci } 137062306a36Sopenharmony_ci break; 137162306a36Sopenharmony_ci 137262306a36Sopenharmony_ci case BPF_ALU | BPF_END | BPF_FROM_LE: 137362306a36Sopenharmony_ci switch (imm32) { 137462306a36Sopenharmony_ci case 16: 137562306a36Sopenharmony_ci /* 137662306a36Sopenharmony_ci * Emit 'movzwl eax, ax' to zero extend 16-bit 137762306a36Sopenharmony_ci * into 64 bit 137862306a36Sopenharmony_ci */ 137962306a36Sopenharmony_ci if (is_ereg(dst_reg)) 138062306a36Sopenharmony_ci EMIT3(0x45, 0x0F, 0xB7); 138162306a36Sopenharmony_ci else 138262306a36Sopenharmony_ci EMIT2(0x0F, 0xB7); 138362306a36Sopenharmony_ci EMIT1(add_2reg(0xC0, dst_reg, dst_reg)); 138462306a36Sopenharmony_ci break; 138562306a36Sopenharmony_ci case 32: 138662306a36Sopenharmony_ci /* Emit 'mov eax, eax' to clear upper 32-bits */ 138762306a36Sopenharmony_ci if (is_ereg(dst_reg)) 138862306a36Sopenharmony_ci EMIT1(0x45); 138962306a36Sopenharmony_ci EMIT2(0x89, add_2reg(0xC0, dst_reg, dst_reg)); 139062306a36Sopenharmony_ci break; 139162306a36Sopenharmony_ci case 64: 139262306a36Sopenharmony_ci /* nop */ 139362306a36Sopenharmony_ci break; 139462306a36Sopenharmony_ci } 139562306a36Sopenharmony_ci break; 139662306a36Sopenharmony_ci 139762306a36Sopenharmony_ci /* speculation barrier */ 139862306a36Sopenharmony_ci case BPF_ST | BPF_NOSPEC: 139962306a36Sopenharmony_ci EMIT_LFENCE(); 140062306a36Sopenharmony_ci break; 140162306a36Sopenharmony_ci 140262306a36Sopenharmony_ci /* ST: *(u8*)(dst_reg + off) = imm */ 140362306a36Sopenharmony_ci case BPF_ST | BPF_MEM | BPF_B: 140462306a36Sopenharmony_ci if (is_ereg(dst_reg)) 140562306a36Sopenharmony_ci EMIT2(0x41, 0xC6); 140662306a36Sopenharmony_ci else 140762306a36Sopenharmony_ci EMIT1(0xC6); 140862306a36Sopenharmony_ci goto st; 140962306a36Sopenharmony_ci case BPF_ST | BPF_MEM | BPF_H: 141062306a36Sopenharmony_ci if (is_ereg(dst_reg)) 141162306a36Sopenharmony_ci EMIT3(0x66, 0x41, 0xC7); 141262306a36Sopenharmony_ci else 141362306a36Sopenharmony_ci EMIT2(0x66, 0xC7); 141462306a36Sopenharmony_ci goto st; 141562306a36Sopenharmony_ci case BPF_ST | BPF_MEM | BPF_W: 141662306a36Sopenharmony_ci if (is_ereg(dst_reg)) 141762306a36Sopenharmony_ci EMIT2(0x41, 0xC7); 141862306a36Sopenharmony_ci else 141962306a36Sopenharmony_ci EMIT1(0xC7); 142062306a36Sopenharmony_ci goto st; 142162306a36Sopenharmony_ci case BPF_ST | BPF_MEM | BPF_DW: 142262306a36Sopenharmony_ci EMIT2(add_1mod(0x48, dst_reg), 0xC7); 142362306a36Sopenharmony_ci 142462306a36Sopenharmony_cist: if (is_imm8(insn->off)) 142562306a36Sopenharmony_ci EMIT2(add_1reg(0x40, dst_reg), insn->off); 142662306a36Sopenharmony_ci else 142762306a36Sopenharmony_ci EMIT1_off32(add_1reg(0x80, dst_reg), insn->off); 142862306a36Sopenharmony_ci 142962306a36Sopenharmony_ci EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(insn->code))); 143062306a36Sopenharmony_ci break; 143162306a36Sopenharmony_ci 143262306a36Sopenharmony_ci /* STX: *(u8*)(dst_reg + off) = src_reg */ 143362306a36Sopenharmony_ci case BPF_STX | BPF_MEM | BPF_B: 143462306a36Sopenharmony_ci case BPF_STX | BPF_MEM | BPF_H: 143562306a36Sopenharmony_ci case BPF_STX | BPF_MEM | BPF_W: 143662306a36Sopenharmony_ci case BPF_STX | BPF_MEM | BPF_DW: 143762306a36Sopenharmony_ci emit_stx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn->off); 143862306a36Sopenharmony_ci break; 143962306a36Sopenharmony_ci 144062306a36Sopenharmony_ci /* LDX: dst_reg = *(u8*)(src_reg + off) */ 144162306a36Sopenharmony_ci case BPF_LDX | BPF_MEM | BPF_B: 144262306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEM | BPF_B: 144362306a36Sopenharmony_ci case BPF_LDX | BPF_MEM | BPF_H: 144462306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEM | BPF_H: 144562306a36Sopenharmony_ci case BPF_LDX | BPF_MEM | BPF_W: 144662306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEM | BPF_W: 144762306a36Sopenharmony_ci case BPF_LDX | BPF_MEM | BPF_DW: 144862306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEM | BPF_DW: 144962306a36Sopenharmony_ci /* LDXS: dst_reg = *(s8*)(src_reg + off) */ 145062306a36Sopenharmony_ci case BPF_LDX | BPF_MEMSX | BPF_B: 145162306a36Sopenharmony_ci case BPF_LDX | BPF_MEMSX | BPF_H: 145262306a36Sopenharmony_ci case BPF_LDX | BPF_MEMSX | BPF_W: 145362306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEMSX | BPF_B: 145462306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEMSX | BPF_H: 145562306a36Sopenharmony_ci case BPF_LDX | BPF_PROBE_MEMSX | BPF_W: 145662306a36Sopenharmony_ci insn_off = insn->off; 145762306a36Sopenharmony_ci 145862306a36Sopenharmony_ci if (BPF_MODE(insn->code) == BPF_PROBE_MEM || 145962306a36Sopenharmony_ci BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { 146062306a36Sopenharmony_ci /* Conservatively check that src_reg + insn->off is a kernel address: 146162306a36Sopenharmony_ci * src_reg + insn->off >= TASK_SIZE_MAX + PAGE_SIZE 146262306a36Sopenharmony_ci * src_reg is used as scratch for src_reg += insn->off and restored 146362306a36Sopenharmony_ci * after emit_ldx if necessary 146462306a36Sopenharmony_ci */ 146562306a36Sopenharmony_ci 146662306a36Sopenharmony_ci u64 limit = TASK_SIZE_MAX + PAGE_SIZE; 146762306a36Sopenharmony_ci u8 *end_of_jmp; 146862306a36Sopenharmony_ci 146962306a36Sopenharmony_ci /* At end of these emitted checks, insn->off will have been added 147062306a36Sopenharmony_ci * to src_reg, so no need to do relative load with insn->off offset 147162306a36Sopenharmony_ci */ 147262306a36Sopenharmony_ci insn_off = 0; 147362306a36Sopenharmony_ci 147462306a36Sopenharmony_ci /* movabsq r11, limit */ 147562306a36Sopenharmony_ci EMIT2(add_1mod(0x48, AUX_REG), add_1reg(0xB8, AUX_REG)); 147662306a36Sopenharmony_ci EMIT((u32)limit, 4); 147762306a36Sopenharmony_ci EMIT(limit >> 32, 4); 147862306a36Sopenharmony_ci 147962306a36Sopenharmony_ci if (insn->off) { 148062306a36Sopenharmony_ci /* add src_reg, insn->off */ 148162306a36Sopenharmony_ci maybe_emit_1mod(&prog, src_reg, true); 148262306a36Sopenharmony_ci EMIT2_off32(0x81, add_1reg(0xC0, src_reg), insn->off); 148362306a36Sopenharmony_ci } 148462306a36Sopenharmony_ci 148562306a36Sopenharmony_ci /* cmp src_reg, r11 */ 148662306a36Sopenharmony_ci maybe_emit_mod(&prog, src_reg, AUX_REG, true); 148762306a36Sopenharmony_ci EMIT2(0x39, add_2reg(0xC0, src_reg, AUX_REG)); 148862306a36Sopenharmony_ci 148962306a36Sopenharmony_ci /* if unsigned '>=', goto load */ 149062306a36Sopenharmony_ci EMIT2(X86_JAE, 0); 149162306a36Sopenharmony_ci end_of_jmp = prog; 149262306a36Sopenharmony_ci 149362306a36Sopenharmony_ci /* xor dst_reg, dst_reg */ 149462306a36Sopenharmony_ci emit_mov_imm32(&prog, false, dst_reg, 0); 149562306a36Sopenharmony_ci /* jmp byte_after_ldx */ 149662306a36Sopenharmony_ci EMIT2(0xEB, 0); 149762306a36Sopenharmony_ci 149862306a36Sopenharmony_ci /* populate jmp_offset for JAE above to jump to start_of_ldx */ 149962306a36Sopenharmony_ci start_of_ldx = prog; 150062306a36Sopenharmony_ci end_of_jmp[-1] = start_of_ldx - end_of_jmp; 150162306a36Sopenharmony_ci } 150262306a36Sopenharmony_ci if (BPF_MODE(insn->code) == BPF_PROBE_MEMSX || 150362306a36Sopenharmony_ci BPF_MODE(insn->code) == BPF_MEMSX) 150462306a36Sopenharmony_ci emit_ldsx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); 150562306a36Sopenharmony_ci else 150662306a36Sopenharmony_ci emit_ldx(&prog, BPF_SIZE(insn->code), dst_reg, src_reg, insn_off); 150762306a36Sopenharmony_ci if (BPF_MODE(insn->code) == BPF_PROBE_MEM || 150862306a36Sopenharmony_ci BPF_MODE(insn->code) == BPF_PROBE_MEMSX) { 150962306a36Sopenharmony_ci struct exception_table_entry *ex; 151062306a36Sopenharmony_ci u8 *_insn = image + proglen + (start_of_ldx - temp); 151162306a36Sopenharmony_ci s64 delta; 151262306a36Sopenharmony_ci 151362306a36Sopenharmony_ci /* populate jmp_offset for JMP above */ 151462306a36Sopenharmony_ci start_of_ldx[-1] = prog - start_of_ldx; 151562306a36Sopenharmony_ci 151662306a36Sopenharmony_ci if (insn->off && src_reg != dst_reg) { 151762306a36Sopenharmony_ci /* sub src_reg, insn->off 151862306a36Sopenharmony_ci * Restore src_reg after "add src_reg, insn->off" in prev 151962306a36Sopenharmony_ci * if statement. But if src_reg == dst_reg, emit_ldx 152062306a36Sopenharmony_ci * above already clobbered src_reg, so no need to restore. 152162306a36Sopenharmony_ci * If add src_reg, insn->off was unnecessary, no need to 152262306a36Sopenharmony_ci * restore either. 152362306a36Sopenharmony_ci */ 152462306a36Sopenharmony_ci maybe_emit_1mod(&prog, src_reg, true); 152562306a36Sopenharmony_ci EMIT2_off32(0x81, add_1reg(0xE8, src_reg), insn->off); 152662306a36Sopenharmony_ci } 152762306a36Sopenharmony_ci 152862306a36Sopenharmony_ci if (!bpf_prog->aux->extable) 152962306a36Sopenharmony_ci break; 153062306a36Sopenharmony_ci 153162306a36Sopenharmony_ci if (excnt >= bpf_prog->aux->num_exentries) { 153262306a36Sopenharmony_ci pr_err("ex gen bug\n"); 153362306a36Sopenharmony_ci return -EFAULT; 153462306a36Sopenharmony_ci } 153562306a36Sopenharmony_ci ex = &bpf_prog->aux->extable[excnt++]; 153662306a36Sopenharmony_ci 153762306a36Sopenharmony_ci delta = _insn - (u8 *)&ex->insn; 153862306a36Sopenharmony_ci if (!is_simm32(delta)) { 153962306a36Sopenharmony_ci pr_err("extable->insn doesn't fit into 32-bit\n"); 154062306a36Sopenharmony_ci return -EFAULT; 154162306a36Sopenharmony_ci } 154262306a36Sopenharmony_ci /* switch ex to rw buffer for writes */ 154362306a36Sopenharmony_ci ex = (void *)rw_image + ((void *)ex - (void *)image); 154462306a36Sopenharmony_ci 154562306a36Sopenharmony_ci ex->insn = delta; 154662306a36Sopenharmony_ci 154762306a36Sopenharmony_ci ex->data = EX_TYPE_BPF; 154862306a36Sopenharmony_ci 154962306a36Sopenharmony_ci if (dst_reg > BPF_REG_9) { 155062306a36Sopenharmony_ci pr_err("verifier error\n"); 155162306a36Sopenharmony_ci return -EFAULT; 155262306a36Sopenharmony_ci } 155362306a36Sopenharmony_ci /* 155462306a36Sopenharmony_ci * Compute size of x86 insn and its target dest x86 register. 155562306a36Sopenharmony_ci * ex_handler_bpf() will use lower 8 bits to adjust 155662306a36Sopenharmony_ci * pt_regs->ip to jump over this x86 instruction 155762306a36Sopenharmony_ci * and upper bits to figure out which pt_regs to zero out. 155862306a36Sopenharmony_ci * End result: x86 insn "mov rbx, qword ptr [rax+0x14]" 155962306a36Sopenharmony_ci * of 4 bytes will be ignored and rbx will be zero inited. 156062306a36Sopenharmony_ci */ 156162306a36Sopenharmony_ci ex->fixup = (prog - start_of_ldx) | (reg2pt_regs[dst_reg] << 8); 156262306a36Sopenharmony_ci } 156362306a36Sopenharmony_ci break; 156462306a36Sopenharmony_ci 156562306a36Sopenharmony_ci case BPF_STX | BPF_ATOMIC | BPF_W: 156662306a36Sopenharmony_ci case BPF_STX | BPF_ATOMIC | BPF_DW: 156762306a36Sopenharmony_ci if (insn->imm == (BPF_AND | BPF_FETCH) || 156862306a36Sopenharmony_ci insn->imm == (BPF_OR | BPF_FETCH) || 156962306a36Sopenharmony_ci insn->imm == (BPF_XOR | BPF_FETCH)) { 157062306a36Sopenharmony_ci bool is64 = BPF_SIZE(insn->code) == BPF_DW; 157162306a36Sopenharmony_ci u32 real_src_reg = src_reg; 157262306a36Sopenharmony_ci u32 real_dst_reg = dst_reg; 157362306a36Sopenharmony_ci u8 *branch_target; 157462306a36Sopenharmony_ci 157562306a36Sopenharmony_ci /* 157662306a36Sopenharmony_ci * Can't be implemented with a single x86 insn. 157762306a36Sopenharmony_ci * Need to do a CMPXCHG loop. 157862306a36Sopenharmony_ci */ 157962306a36Sopenharmony_ci 158062306a36Sopenharmony_ci /* Will need RAX as a CMPXCHG operand so save R0 */ 158162306a36Sopenharmony_ci emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0); 158262306a36Sopenharmony_ci if (src_reg == BPF_REG_0) 158362306a36Sopenharmony_ci real_src_reg = BPF_REG_AX; 158462306a36Sopenharmony_ci if (dst_reg == BPF_REG_0) 158562306a36Sopenharmony_ci real_dst_reg = BPF_REG_AX; 158662306a36Sopenharmony_ci 158762306a36Sopenharmony_ci branch_target = prog; 158862306a36Sopenharmony_ci /* Load old value */ 158962306a36Sopenharmony_ci emit_ldx(&prog, BPF_SIZE(insn->code), 159062306a36Sopenharmony_ci BPF_REG_0, real_dst_reg, insn->off); 159162306a36Sopenharmony_ci /* 159262306a36Sopenharmony_ci * Perform the (commutative) operation locally, 159362306a36Sopenharmony_ci * put the result in the AUX_REG. 159462306a36Sopenharmony_ci */ 159562306a36Sopenharmony_ci emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0); 159662306a36Sopenharmony_ci maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64); 159762306a36Sopenharmony_ci EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)], 159862306a36Sopenharmony_ci add_2reg(0xC0, AUX_REG, real_src_reg)); 159962306a36Sopenharmony_ci /* Attempt to swap in new value */ 160062306a36Sopenharmony_ci err = emit_atomic(&prog, BPF_CMPXCHG, 160162306a36Sopenharmony_ci real_dst_reg, AUX_REG, 160262306a36Sopenharmony_ci insn->off, 160362306a36Sopenharmony_ci BPF_SIZE(insn->code)); 160462306a36Sopenharmony_ci if (WARN_ON(err)) 160562306a36Sopenharmony_ci return err; 160662306a36Sopenharmony_ci /* 160762306a36Sopenharmony_ci * ZF tells us whether we won the race. If it's 160862306a36Sopenharmony_ci * cleared we need to try again. 160962306a36Sopenharmony_ci */ 161062306a36Sopenharmony_ci EMIT2(X86_JNE, -(prog - branch_target) - 2); 161162306a36Sopenharmony_ci /* Return the pre-modification value */ 161262306a36Sopenharmony_ci emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0); 161362306a36Sopenharmony_ci /* Restore R0 after clobbering RAX */ 161462306a36Sopenharmony_ci emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX); 161562306a36Sopenharmony_ci break; 161662306a36Sopenharmony_ci } 161762306a36Sopenharmony_ci 161862306a36Sopenharmony_ci err = emit_atomic(&prog, insn->imm, dst_reg, src_reg, 161962306a36Sopenharmony_ci insn->off, BPF_SIZE(insn->code)); 162062306a36Sopenharmony_ci if (err) 162162306a36Sopenharmony_ci return err; 162262306a36Sopenharmony_ci break; 162362306a36Sopenharmony_ci 162462306a36Sopenharmony_ci /* call */ 162562306a36Sopenharmony_ci case BPF_JMP | BPF_CALL: { 162662306a36Sopenharmony_ci int offs; 162762306a36Sopenharmony_ci 162862306a36Sopenharmony_ci func = (u8 *) __bpf_call_base + imm32; 162962306a36Sopenharmony_ci if (tail_call_reachable) { 163062306a36Sopenharmony_ci RESTORE_TAIL_CALL_CNT(bpf_prog->aux->stack_depth); 163162306a36Sopenharmony_ci if (!imm32) 163262306a36Sopenharmony_ci return -EINVAL; 163362306a36Sopenharmony_ci offs = 7 + x86_call_depth_emit_accounting(&prog, func); 163462306a36Sopenharmony_ci } else { 163562306a36Sopenharmony_ci if (!imm32) 163662306a36Sopenharmony_ci return -EINVAL; 163762306a36Sopenharmony_ci offs = x86_call_depth_emit_accounting(&prog, func); 163862306a36Sopenharmony_ci } 163962306a36Sopenharmony_ci if (emit_call(&prog, func, image + addrs[i - 1] + offs)) 164062306a36Sopenharmony_ci return -EINVAL; 164162306a36Sopenharmony_ci break; 164262306a36Sopenharmony_ci } 164362306a36Sopenharmony_ci 164462306a36Sopenharmony_ci case BPF_JMP | BPF_TAIL_CALL: 164562306a36Sopenharmony_ci if (imm32) 164662306a36Sopenharmony_ci emit_bpf_tail_call_direct(&bpf_prog->aux->poke_tab[imm32 - 1], 164762306a36Sopenharmony_ci &prog, image + addrs[i - 1], 164862306a36Sopenharmony_ci callee_regs_used, 164962306a36Sopenharmony_ci bpf_prog->aux->stack_depth, 165062306a36Sopenharmony_ci ctx); 165162306a36Sopenharmony_ci else 165262306a36Sopenharmony_ci emit_bpf_tail_call_indirect(&prog, 165362306a36Sopenharmony_ci callee_regs_used, 165462306a36Sopenharmony_ci bpf_prog->aux->stack_depth, 165562306a36Sopenharmony_ci image + addrs[i - 1], 165662306a36Sopenharmony_ci ctx); 165762306a36Sopenharmony_ci break; 165862306a36Sopenharmony_ci 165962306a36Sopenharmony_ci /* cond jump */ 166062306a36Sopenharmony_ci case BPF_JMP | BPF_JEQ | BPF_X: 166162306a36Sopenharmony_ci case BPF_JMP | BPF_JNE | BPF_X: 166262306a36Sopenharmony_ci case BPF_JMP | BPF_JGT | BPF_X: 166362306a36Sopenharmony_ci case BPF_JMP | BPF_JLT | BPF_X: 166462306a36Sopenharmony_ci case BPF_JMP | BPF_JGE | BPF_X: 166562306a36Sopenharmony_ci case BPF_JMP | BPF_JLE | BPF_X: 166662306a36Sopenharmony_ci case BPF_JMP | BPF_JSGT | BPF_X: 166762306a36Sopenharmony_ci case BPF_JMP | BPF_JSLT | BPF_X: 166862306a36Sopenharmony_ci case BPF_JMP | BPF_JSGE | BPF_X: 166962306a36Sopenharmony_ci case BPF_JMP | BPF_JSLE | BPF_X: 167062306a36Sopenharmony_ci case BPF_JMP32 | BPF_JEQ | BPF_X: 167162306a36Sopenharmony_ci case BPF_JMP32 | BPF_JNE | BPF_X: 167262306a36Sopenharmony_ci case BPF_JMP32 | BPF_JGT | BPF_X: 167362306a36Sopenharmony_ci case BPF_JMP32 | BPF_JLT | BPF_X: 167462306a36Sopenharmony_ci case BPF_JMP32 | BPF_JGE | BPF_X: 167562306a36Sopenharmony_ci case BPF_JMP32 | BPF_JLE | BPF_X: 167662306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSGT | BPF_X: 167762306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSLT | BPF_X: 167862306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSGE | BPF_X: 167962306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSLE | BPF_X: 168062306a36Sopenharmony_ci /* cmp dst_reg, src_reg */ 168162306a36Sopenharmony_ci maybe_emit_mod(&prog, dst_reg, src_reg, 168262306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_JMP); 168362306a36Sopenharmony_ci EMIT2(0x39, add_2reg(0xC0, dst_reg, src_reg)); 168462306a36Sopenharmony_ci goto emit_cond_jmp; 168562306a36Sopenharmony_ci 168662306a36Sopenharmony_ci case BPF_JMP | BPF_JSET | BPF_X: 168762306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSET | BPF_X: 168862306a36Sopenharmony_ci /* test dst_reg, src_reg */ 168962306a36Sopenharmony_ci maybe_emit_mod(&prog, dst_reg, src_reg, 169062306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_JMP); 169162306a36Sopenharmony_ci EMIT2(0x85, add_2reg(0xC0, dst_reg, src_reg)); 169262306a36Sopenharmony_ci goto emit_cond_jmp; 169362306a36Sopenharmony_ci 169462306a36Sopenharmony_ci case BPF_JMP | BPF_JSET | BPF_K: 169562306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSET | BPF_K: 169662306a36Sopenharmony_ci /* test dst_reg, imm32 */ 169762306a36Sopenharmony_ci maybe_emit_1mod(&prog, dst_reg, 169862306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_JMP); 169962306a36Sopenharmony_ci EMIT2_off32(0xF7, add_1reg(0xC0, dst_reg), imm32); 170062306a36Sopenharmony_ci goto emit_cond_jmp; 170162306a36Sopenharmony_ci 170262306a36Sopenharmony_ci case BPF_JMP | BPF_JEQ | BPF_K: 170362306a36Sopenharmony_ci case BPF_JMP | BPF_JNE | BPF_K: 170462306a36Sopenharmony_ci case BPF_JMP | BPF_JGT | BPF_K: 170562306a36Sopenharmony_ci case BPF_JMP | BPF_JLT | BPF_K: 170662306a36Sopenharmony_ci case BPF_JMP | BPF_JGE | BPF_K: 170762306a36Sopenharmony_ci case BPF_JMP | BPF_JLE | BPF_K: 170862306a36Sopenharmony_ci case BPF_JMP | BPF_JSGT | BPF_K: 170962306a36Sopenharmony_ci case BPF_JMP | BPF_JSLT | BPF_K: 171062306a36Sopenharmony_ci case BPF_JMP | BPF_JSGE | BPF_K: 171162306a36Sopenharmony_ci case BPF_JMP | BPF_JSLE | BPF_K: 171262306a36Sopenharmony_ci case BPF_JMP32 | BPF_JEQ | BPF_K: 171362306a36Sopenharmony_ci case BPF_JMP32 | BPF_JNE | BPF_K: 171462306a36Sopenharmony_ci case BPF_JMP32 | BPF_JGT | BPF_K: 171562306a36Sopenharmony_ci case BPF_JMP32 | BPF_JLT | BPF_K: 171662306a36Sopenharmony_ci case BPF_JMP32 | BPF_JGE | BPF_K: 171762306a36Sopenharmony_ci case BPF_JMP32 | BPF_JLE | BPF_K: 171862306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSGT | BPF_K: 171962306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSLT | BPF_K: 172062306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSGE | BPF_K: 172162306a36Sopenharmony_ci case BPF_JMP32 | BPF_JSLE | BPF_K: 172262306a36Sopenharmony_ci /* test dst_reg, dst_reg to save one extra byte */ 172362306a36Sopenharmony_ci if (imm32 == 0) { 172462306a36Sopenharmony_ci maybe_emit_mod(&prog, dst_reg, dst_reg, 172562306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_JMP); 172662306a36Sopenharmony_ci EMIT2(0x85, add_2reg(0xC0, dst_reg, dst_reg)); 172762306a36Sopenharmony_ci goto emit_cond_jmp; 172862306a36Sopenharmony_ci } 172962306a36Sopenharmony_ci 173062306a36Sopenharmony_ci /* cmp dst_reg, imm8/32 */ 173162306a36Sopenharmony_ci maybe_emit_1mod(&prog, dst_reg, 173262306a36Sopenharmony_ci BPF_CLASS(insn->code) == BPF_JMP); 173362306a36Sopenharmony_ci 173462306a36Sopenharmony_ci if (is_imm8(imm32)) 173562306a36Sopenharmony_ci EMIT3(0x83, add_1reg(0xF8, dst_reg), imm32); 173662306a36Sopenharmony_ci else 173762306a36Sopenharmony_ci EMIT2_off32(0x81, add_1reg(0xF8, dst_reg), imm32); 173862306a36Sopenharmony_ci 173962306a36Sopenharmony_ciemit_cond_jmp: /* Convert BPF opcode to x86 */ 174062306a36Sopenharmony_ci switch (BPF_OP(insn->code)) { 174162306a36Sopenharmony_ci case BPF_JEQ: 174262306a36Sopenharmony_ci jmp_cond = X86_JE; 174362306a36Sopenharmony_ci break; 174462306a36Sopenharmony_ci case BPF_JSET: 174562306a36Sopenharmony_ci case BPF_JNE: 174662306a36Sopenharmony_ci jmp_cond = X86_JNE; 174762306a36Sopenharmony_ci break; 174862306a36Sopenharmony_ci case BPF_JGT: 174962306a36Sopenharmony_ci /* GT is unsigned '>', JA in x86 */ 175062306a36Sopenharmony_ci jmp_cond = X86_JA; 175162306a36Sopenharmony_ci break; 175262306a36Sopenharmony_ci case BPF_JLT: 175362306a36Sopenharmony_ci /* LT is unsigned '<', JB in x86 */ 175462306a36Sopenharmony_ci jmp_cond = X86_JB; 175562306a36Sopenharmony_ci break; 175662306a36Sopenharmony_ci case BPF_JGE: 175762306a36Sopenharmony_ci /* GE is unsigned '>=', JAE in x86 */ 175862306a36Sopenharmony_ci jmp_cond = X86_JAE; 175962306a36Sopenharmony_ci break; 176062306a36Sopenharmony_ci case BPF_JLE: 176162306a36Sopenharmony_ci /* LE is unsigned '<=', JBE in x86 */ 176262306a36Sopenharmony_ci jmp_cond = X86_JBE; 176362306a36Sopenharmony_ci break; 176462306a36Sopenharmony_ci case BPF_JSGT: 176562306a36Sopenharmony_ci /* Signed '>', GT in x86 */ 176662306a36Sopenharmony_ci jmp_cond = X86_JG; 176762306a36Sopenharmony_ci break; 176862306a36Sopenharmony_ci case BPF_JSLT: 176962306a36Sopenharmony_ci /* Signed '<', LT in x86 */ 177062306a36Sopenharmony_ci jmp_cond = X86_JL; 177162306a36Sopenharmony_ci break; 177262306a36Sopenharmony_ci case BPF_JSGE: 177362306a36Sopenharmony_ci /* Signed '>=', GE in x86 */ 177462306a36Sopenharmony_ci jmp_cond = X86_JGE; 177562306a36Sopenharmony_ci break; 177662306a36Sopenharmony_ci case BPF_JSLE: 177762306a36Sopenharmony_ci /* Signed '<=', LE in x86 */ 177862306a36Sopenharmony_ci jmp_cond = X86_JLE; 177962306a36Sopenharmony_ci break; 178062306a36Sopenharmony_ci default: /* to silence GCC warning */ 178162306a36Sopenharmony_ci return -EFAULT; 178262306a36Sopenharmony_ci } 178362306a36Sopenharmony_ci jmp_offset = addrs[i + insn->off] - addrs[i]; 178462306a36Sopenharmony_ci if (is_imm8(jmp_offset)) { 178562306a36Sopenharmony_ci if (jmp_padding) { 178662306a36Sopenharmony_ci /* To keep the jmp_offset valid, the extra bytes are 178762306a36Sopenharmony_ci * padded before the jump insn, so we subtract the 178862306a36Sopenharmony_ci * 2 bytes of jmp_cond insn from INSN_SZ_DIFF. 178962306a36Sopenharmony_ci * 179062306a36Sopenharmony_ci * If the previous pass already emits an imm8 179162306a36Sopenharmony_ci * jmp_cond, then this BPF insn won't shrink, so 179262306a36Sopenharmony_ci * "nops" is 0. 179362306a36Sopenharmony_ci * 179462306a36Sopenharmony_ci * On the other hand, if the previous pass emits an 179562306a36Sopenharmony_ci * imm32 jmp_cond, the extra 4 bytes(*) is padded to 179662306a36Sopenharmony_ci * keep the image from shrinking further. 179762306a36Sopenharmony_ci * 179862306a36Sopenharmony_ci * (*) imm32 jmp_cond is 6 bytes, and imm8 jmp_cond 179962306a36Sopenharmony_ci * is 2 bytes, so the size difference is 4 bytes. 180062306a36Sopenharmony_ci */ 180162306a36Sopenharmony_ci nops = INSN_SZ_DIFF - 2; 180262306a36Sopenharmony_ci if (nops != 0 && nops != 4) { 180362306a36Sopenharmony_ci pr_err("unexpected jmp_cond padding: %d bytes\n", 180462306a36Sopenharmony_ci nops); 180562306a36Sopenharmony_ci return -EFAULT; 180662306a36Sopenharmony_ci } 180762306a36Sopenharmony_ci emit_nops(&prog, nops); 180862306a36Sopenharmony_ci } 180962306a36Sopenharmony_ci EMIT2(jmp_cond, jmp_offset); 181062306a36Sopenharmony_ci } else if (is_simm32(jmp_offset)) { 181162306a36Sopenharmony_ci EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset); 181262306a36Sopenharmony_ci } else { 181362306a36Sopenharmony_ci pr_err("cond_jmp gen bug %llx\n", jmp_offset); 181462306a36Sopenharmony_ci return -EFAULT; 181562306a36Sopenharmony_ci } 181662306a36Sopenharmony_ci 181762306a36Sopenharmony_ci break; 181862306a36Sopenharmony_ci 181962306a36Sopenharmony_ci case BPF_JMP | BPF_JA: 182062306a36Sopenharmony_ci case BPF_JMP32 | BPF_JA: 182162306a36Sopenharmony_ci if (BPF_CLASS(insn->code) == BPF_JMP) { 182262306a36Sopenharmony_ci if (insn->off == -1) 182362306a36Sopenharmony_ci /* -1 jmp instructions will always jump 182462306a36Sopenharmony_ci * backwards two bytes. Explicitly handling 182562306a36Sopenharmony_ci * this case avoids wasting too many passes 182662306a36Sopenharmony_ci * when there are long sequences of replaced 182762306a36Sopenharmony_ci * dead code. 182862306a36Sopenharmony_ci */ 182962306a36Sopenharmony_ci jmp_offset = -2; 183062306a36Sopenharmony_ci else 183162306a36Sopenharmony_ci jmp_offset = addrs[i + insn->off] - addrs[i]; 183262306a36Sopenharmony_ci } else { 183362306a36Sopenharmony_ci if (insn->imm == -1) 183462306a36Sopenharmony_ci jmp_offset = -2; 183562306a36Sopenharmony_ci else 183662306a36Sopenharmony_ci jmp_offset = addrs[i + insn->imm] - addrs[i]; 183762306a36Sopenharmony_ci } 183862306a36Sopenharmony_ci 183962306a36Sopenharmony_ci if (!jmp_offset) { 184062306a36Sopenharmony_ci /* 184162306a36Sopenharmony_ci * If jmp_padding is enabled, the extra nops will 184262306a36Sopenharmony_ci * be inserted. Otherwise, optimize out nop jumps. 184362306a36Sopenharmony_ci */ 184462306a36Sopenharmony_ci if (jmp_padding) { 184562306a36Sopenharmony_ci /* There are 3 possible conditions. 184662306a36Sopenharmony_ci * (1) This BPF_JA is already optimized out in 184762306a36Sopenharmony_ci * the previous run, so there is no need 184862306a36Sopenharmony_ci * to pad any extra byte (0 byte). 184962306a36Sopenharmony_ci * (2) The previous pass emits an imm8 jmp, 185062306a36Sopenharmony_ci * so we pad 2 bytes to match the previous 185162306a36Sopenharmony_ci * insn size. 185262306a36Sopenharmony_ci * (3) Similarly, the previous pass emits an 185362306a36Sopenharmony_ci * imm32 jmp, and 5 bytes is padded. 185462306a36Sopenharmony_ci */ 185562306a36Sopenharmony_ci nops = INSN_SZ_DIFF; 185662306a36Sopenharmony_ci if (nops != 0 && nops != 2 && nops != 5) { 185762306a36Sopenharmony_ci pr_err("unexpected nop jump padding: %d bytes\n", 185862306a36Sopenharmony_ci nops); 185962306a36Sopenharmony_ci return -EFAULT; 186062306a36Sopenharmony_ci } 186162306a36Sopenharmony_ci emit_nops(&prog, nops); 186262306a36Sopenharmony_ci } 186362306a36Sopenharmony_ci break; 186462306a36Sopenharmony_ci } 186562306a36Sopenharmony_ciemit_jmp: 186662306a36Sopenharmony_ci if (is_imm8(jmp_offset)) { 186762306a36Sopenharmony_ci if (jmp_padding) { 186862306a36Sopenharmony_ci /* To avoid breaking jmp_offset, the extra bytes 186962306a36Sopenharmony_ci * are padded before the actual jmp insn, so 187062306a36Sopenharmony_ci * 2 bytes is subtracted from INSN_SZ_DIFF. 187162306a36Sopenharmony_ci * 187262306a36Sopenharmony_ci * If the previous pass already emits an imm8 187362306a36Sopenharmony_ci * jmp, there is nothing to pad (0 byte). 187462306a36Sopenharmony_ci * 187562306a36Sopenharmony_ci * If it emits an imm32 jmp (5 bytes) previously 187662306a36Sopenharmony_ci * and now an imm8 jmp (2 bytes), then we pad 187762306a36Sopenharmony_ci * (5 - 2 = 3) bytes to stop the image from 187862306a36Sopenharmony_ci * shrinking further. 187962306a36Sopenharmony_ci */ 188062306a36Sopenharmony_ci nops = INSN_SZ_DIFF - 2; 188162306a36Sopenharmony_ci if (nops != 0 && nops != 3) { 188262306a36Sopenharmony_ci pr_err("unexpected jump padding: %d bytes\n", 188362306a36Sopenharmony_ci nops); 188462306a36Sopenharmony_ci return -EFAULT; 188562306a36Sopenharmony_ci } 188662306a36Sopenharmony_ci emit_nops(&prog, INSN_SZ_DIFF - 2); 188762306a36Sopenharmony_ci } 188862306a36Sopenharmony_ci EMIT2(0xEB, jmp_offset); 188962306a36Sopenharmony_ci } else if (is_simm32(jmp_offset)) { 189062306a36Sopenharmony_ci EMIT1_off32(0xE9, jmp_offset); 189162306a36Sopenharmony_ci } else { 189262306a36Sopenharmony_ci pr_err("jmp gen bug %llx\n", jmp_offset); 189362306a36Sopenharmony_ci return -EFAULT; 189462306a36Sopenharmony_ci } 189562306a36Sopenharmony_ci break; 189662306a36Sopenharmony_ci 189762306a36Sopenharmony_ci case BPF_JMP | BPF_EXIT: 189862306a36Sopenharmony_ci if (seen_exit) { 189962306a36Sopenharmony_ci jmp_offset = ctx->cleanup_addr - addrs[i]; 190062306a36Sopenharmony_ci goto emit_jmp; 190162306a36Sopenharmony_ci } 190262306a36Sopenharmony_ci seen_exit = true; 190362306a36Sopenharmony_ci /* Update cleanup_addr */ 190462306a36Sopenharmony_ci ctx->cleanup_addr = proglen; 190562306a36Sopenharmony_ci pop_callee_regs(&prog, callee_regs_used); 190662306a36Sopenharmony_ci EMIT1(0xC9); /* leave */ 190762306a36Sopenharmony_ci emit_return(&prog, image + addrs[i - 1] + (prog - temp)); 190862306a36Sopenharmony_ci break; 190962306a36Sopenharmony_ci 191062306a36Sopenharmony_ci default: 191162306a36Sopenharmony_ci /* 191262306a36Sopenharmony_ci * By design x86-64 JIT should support all BPF instructions. 191362306a36Sopenharmony_ci * This error will be seen if new instruction was added 191462306a36Sopenharmony_ci * to the interpreter, but not to the JIT, or if there is 191562306a36Sopenharmony_ci * junk in bpf_prog. 191662306a36Sopenharmony_ci */ 191762306a36Sopenharmony_ci pr_err("bpf_jit: unknown opcode %02x\n", insn->code); 191862306a36Sopenharmony_ci return -EINVAL; 191962306a36Sopenharmony_ci } 192062306a36Sopenharmony_ci 192162306a36Sopenharmony_ci ilen = prog - temp; 192262306a36Sopenharmony_ci if (ilen > BPF_MAX_INSN_SIZE) { 192362306a36Sopenharmony_ci pr_err("bpf_jit: fatal insn size error\n"); 192462306a36Sopenharmony_ci return -EFAULT; 192562306a36Sopenharmony_ci } 192662306a36Sopenharmony_ci 192762306a36Sopenharmony_ci if (image) { 192862306a36Sopenharmony_ci /* 192962306a36Sopenharmony_ci * When populating the image, assert that: 193062306a36Sopenharmony_ci * 193162306a36Sopenharmony_ci * i) We do not write beyond the allocated space, and 193262306a36Sopenharmony_ci * ii) addrs[i] did not change from the prior run, in order 193362306a36Sopenharmony_ci * to validate assumptions made for computing branch 193462306a36Sopenharmony_ci * displacements. 193562306a36Sopenharmony_ci */ 193662306a36Sopenharmony_ci if (unlikely(proglen + ilen > oldproglen || 193762306a36Sopenharmony_ci proglen + ilen != addrs[i])) { 193862306a36Sopenharmony_ci pr_err("bpf_jit: fatal error\n"); 193962306a36Sopenharmony_ci return -EFAULT; 194062306a36Sopenharmony_ci } 194162306a36Sopenharmony_ci memcpy(rw_image + proglen, temp, ilen); 194262306a36Sopenharmony_ci } 194362306a36Sopenharmony_ci proglen += ilen; 194462306a36Sopenharmony_ci addrs[i] = proglen; 194562306a36Sopenharmony_ci prog = temp; 194662306a36Sopenharmony_ci } 194762306a36Sopenharmony_ci 194862306a36Sopenharmony_ci if (image && excnt != bpf_prog->aux->num_exentries) { 194962306a36Sopenharmony_ci pr_err("extable is not populated\n"); 195062306a36Sopenharmony_ci return -EFAULT; 195162306a36Sopenharmony_ci } 195262306a36Sopenharmony_ci return proglen; 195362306a36Sopenharmony_ci} 195462306a36Sopenharmony_ci 195562306a36Sopenharmony_cistatic void clean_stack_garbage(const struct btf_func_model *m, 195662306a36Sopenharmony_ci u8 **pprog, int nr_stack_slots, 195762306a36Sopenharmony_ci int stack_size) 195862306a36Sopenharmony_ci{ 195962306a36Sopenharmony_ci int arg_size, off; 196062306a36Sopenharmony_ci u8 *prog; 196162306a36Sopenharmony_ci 196262306a36Sopenharmony_ci /* Generally speaking, the compiler will pass the arguments 196362306a36Sopenharmony_ci * on-stack with "push" instruction, which will take 8-byte 196462306a36Sopenharmony_ci * on the stack. In this case, there won't be garbage values 196562306a36Sopenharmony_ci * while we copy the arguments from origin stack frame to current 196662306a36Sopenharmony_ci * in BPF_DW. 196762306a36Sopenharmony_ci * 196862306a36Sopenharmony_ci * However, sometimes the compiler will only allocate 4-byte on 196962306a36Sopenharmony_ci * the stack for the arguments. For now, this case will only 197062306a36Sopenharmony_ci * happen if there is only one argument on-stack and its size 197162306a36Sopenharmony_ci * not more than 4 byte. In this case, there will be garbage 197262306a36Sopenharmony_ci * values on the upper 4-byte where we store the argument on 197362306a36Sopenharmony_ci * current stack frame. 197462306a36Sopenharmony_ci * 197562306a36Sopenharmony_ci * arguments on origin stack: 197662306a36Sopenharmony_ci * 197762306a36Sopenharmony_ci * stack_arg_1(4-byte) xxx(4-byte) 197862306a36Sopenharmony_ci * 197962306a36Sopenharmony_ci * what we copy: 198062306a36Sopenharmony_ci * 198162306a36Sopenharmony_ci * stack_arg_1(8-byte): stack_arg_1(origin) xxx 198262306a36Sopenharmony_ci * 198362306a36Sopenharmony_ci * and the xxx is the garbage values which we should clean here. 198462306a36Sopenharmony_ci */ 198562306a36Sopenharmony_ci if (nr_stack_slots != 1) 198662306a36Sopenharmony_ci return; 198762306a36Sopenharmony_ci 198862306a36Sopenharmony_ci /* the size of the last argument */ 198962306a36Sopenharmony_ci arg_size = m->arg_size[m->nr_args - 1]; 199062306a36Sopenharmony_ci if (arg_size <= 4) { 199162306a36Sopenharmony_ci off = -(stack_size - 4); 199262306a36Sopenharmony_ci prog = *pprog; 199362306a36Sopenharmony_ci /* mov DWORD PTR [rbp + off], 0 */ 199462306a36Sopenharmony_ci if (!is_imm8(off)) 199562306a36Sopenharmony_ci EMIT2_off32(0xC7, 0x85, off); 199662306a36Sopenharmony_ci else 199762306a36Sopenharmony_ci EMIT3(0xC7, 0x45, off); 199862306a36Sopenharmony_ci EMIT(0, 4); 199962306a36Sopenharmony_ci *pprog = prog; 200062306a36Sopenharmony_ci } 200162306a36Sopenharmony_ci} 200262306a36Sopenharmony_ci 200362306a36Sopenharmony_ci/* get the count of the regs that are used to pass arguments */ 200462306a36Sopenharmony_cistatic int get_nr_used_regs(const struct btf_func_model *m) 200562306a36Sopenharmony_ci{ 200662306a36Sopenharmony_ci int i, arg_regs, nr_used_regs = 0; 200762306a36Sopenharmony_ci 200862306a36Sopenharmony_ci for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { 200962306a36Sopenharmony_ci arg_regs = (m->arg_size[i] + 7) / 8; 201062306a36Sopenharmony_ci if (nr_used_regs + arg_regs <= 6) 201162306a36Sopenharmony_ci nr_used_regs += arg_regs; 201262306a36Sopenharmony_ci 201362306a36Sopenharmony_ci if (nr_used_regs >= 6) 201462306a36Sopenharmony_ci break; 201562306a36Sopenharmony_ci } 201662306a36Sopenharmony_ci 201762306a36Sopenharmony_ci return nr_used_regs; 201862306a36Sopenharmony_ci} 201962306a36Sopenharmony_ci 202062306a36Sopenharmony_cistatic void save_args(const struct btf_func_model *m, u8 **prog, 202162306a36Sopenharmony_ci int stack_size, bool for_call_origin) 202262306a36Sopenharmony_ci{ 202362306a36Sopenharmony_ci int arg_regs, first_off = 0, nr_regs = 0, nr_stack_slots = 0; 202462306a36Sopenharmony_ci int i, j; 202562306a36Sopenharmony_ci 202662306a36Sopenharmony_ci /* Store function arguments to stack. 202762306a36Sopenharmony_ci * For a function that accepts two pointers the sequence will be: 202862306a36Sopenharmony_ci * mov QWORD PTR [rbp-0x10],rdi 202962306a36Sopenharmony_ci * mov QWORD PTR [rbp-0x8],rsi 203062306a36Sopenharmony_ci */ 203162306a36Sopenharmony_ci for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { 203262306a36Sopenharmony_ci arg_regs = (m->arg_size[i] + 7) / 8; 203362306a36Sopenharmony_ci 203462306a36Sopenharmony_ci /* According to the research of Yonghong, struct members 203562306a36Sopenharmony_ci * should be all in register or all on the stack. 203662306a36Sopenharmony_ci * Meanwhile, the compiler will pass the argument on regs 203762306a36Sopenharmony_ci * if the remaining regs can hold the argument. 203862306a36Sopenharmony_ci * 203962306a36Sopenharmony_ci * Disorder of the args can happen. For example: 204062306a36Sopenharmony_ci * 204162306a36Sopenharmony_ci * struct foo_struct { 204262306a36Sopenharmony_ci * long a; 204362306a36Sopenharmony_ci * int b; 204462306a36Sopenharmony_ci * }; 204562306a36Sopenharmony_ci * int foo(char, char, char, char, char, struct foo_struct, 204662306a36Sopenharmony_ci * char); 204762306a36Sopenharmony_ci * 204862306a36Sopenharmony_ci * the arg1-5,arg7 will be passed by regs, and arg6 will 204962306a36Sopenharmony_ci * by stack. 205062306a36Sopenharmony_ci */ 205162306a36Sopenharmony_ci if (nr_regs + arg_regs > 6) { 205262306a36Sopenharmony_ci /* copy function arguments from origin stack frame 205362306a36Sopenharmony_ci * into current stack frame. 205462306a36Sopenharmony_ci * 205562306a36Sopenharmony_ci * The starting address of the arguments on-stack 205662306a36Sopenharmony_ci * is: 205762306a36Sopenharmony_ci * rbp + 8(push rbp) + 205862306a36Sopenharmony_ci * 8(return addr of origin call) + 205962306a36Sopenharmony_ci * 8(return addr of the caller) 206062306a36Sopenharmony_ci * which means: rbp + 24 206162306a36Sopenharmony_ci */ 206262306a36Sopenharmony_ci for (j = 0; j < arg_regs; j++) { 206362306a36Sopenharmony_ci emit_ldx(prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 206462306a36Sopenharmony_ci nr_stack_slots * 8 + 0x18); 206562306a36Sopenharmony_ci emit_stx(prog, BPF_DW, BPF_REG_FP, BPF_REG_0, 206662306a36Sopenharmony_ci -stack_size); 206762306a36Sopenharmony_ci 206862306a36Sopenharmony_ci if (!nr_stack_slots) 206962306a36Sopenharmony_ci first_off = stack_size; 207062306a36Sopenharmony_ci stack_size -= 8; 207162306a36Sopenharmony_ci nr_stack_slots++; 207262306a36Sopenharmony_ci } 207362306a36Sopenharmony_ci } else { 207462306a36Sopenharmony_ci /* Only copy the arguments on-stack to current 207562306a36Sopenharmony_ci * 'stack_size' and ignore the regs, used to 207662306a36Sopenharmony_ci * prepare the arguments on-stack for orign call. 207762306a36Sopenharmony_ci */ 207862306a36Sopenharmony_ci if (for_call_origin) { 207962306a36Sopenharmony_ci nr_regs += arg_regs; 208062306a36Sopenharmony_ci continue; 208162306a36Sopenharmony_ci } 208262306a36Sopenharmony_ci 208362306a36Sopenharmony_ci /* copy the arguments from regs into stack */ 208462306a36Sopenharmony_ci for (j = 0; j < arg_regs; j++) { 208562306a36Sopenharmony_ci emit_stx(prog, BPF_DW, BPF_REG_FP, 208662306a36Sopenharmony_ci nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, 208762306a36Sopenharmony_ci -stack_size); 208862306a36Sopenharmony_ci stack_size -= 8; 208962306a36Sopenharmony_ci nr_regs++; 209062306a36Sopenharmony_ci } 209162306a36Sopenharmony_ci } 209262306a36Sopenharmony_ci } 209362306a36Sopenharmony_ci 209462306a36Sopenharmony_ci clean_stack_garbage(m, prog, nr_stack_slots, first_off); 209562306a36Sopenharmony_ci} 209662306a36Sopenharmony_ci 209762306a36Sopenharmony_cistatic void restore_regs(const struct btf_func_model *m, u8 **prog, 209862306a36Sopenharmony_ci int stack_size) 209962306a36Sopenharmony_ci{ 210062306a36Sopenharmony_ci int i, j, arg_regs, nr_regs = 0; 210162306a36Sopenharmony_ci 210262306a36Sopenharmony_ci /* Restore function arguments from stack. 210362306a36Sopenharmony_ci * For a function that accepts two pointers the sequence will be: 210462306a36Sopenharmony_ci * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] 210562306a36Sopenharmony_ci * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] 210662306a36Sopenharmony_ci * 210762306a36Sopenharmony_ci * The logic here is similar to what we do in save_args() 210862306a36Sopenharmony_ci */ 210962306a36Sopenharmony_ci for (i = 0; i < min_t(int, m->nr_args, MAX_BPF_FUNC_ARGS); i++) { 211062306a36Sopenharmony_ci arg_regs = (m->arg_size[i] + 7) / 8; 211162306a36Sopenharmony_ci if (nr_regs + arg_regs <= 6) { 211262306a36Sopenharmony_ci for (j = 0; j < arg_regs; j++) { 211362306a36Sopenharmony_ci emit_ldx(prog, BPF_DW, 211462306a36Sopenharmony_ci nr_regs == 5 ? X86_REG_R9 : BPF_REG_1 + nr_regs, 211562306a36Sopenharmony_ci BPF_REG_FP, 211662306a36Sopenharmony_ci -stack_size); 211762306a36Sopenharmony_ci stack_size -= 8; 211862306a36Sopenharmony_ci nr_regs++; 211962306a36Sopenharmony_ci } 212062306a36Sopenharmony_ci } else { 212162306a36Sopenharmony_ci stack_size -= 8 * arg_regs; 212262306a36Sopenharmony_ci } 212362306a36Sopenharmony_ci 212462306a36Sopenharmony_ci if (nr_regs >= 6) 212562306a36Sopenharmony_ci break; 212662306a36Sopenharmony_ci } 212762306a36Sopenharmony_ci} 212862306a36Sopenharmony_ci 212962306a36Sopenharmony_cistatic int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, 213062306a36Sopenharmony_ci struct bpf_tramp_link *l, int stack_size, 213162306a36Sopenharmony_ci int run_ctx_off, bool save_ret) 213262306a36Sopenharmony_ci{ 213362306a36Sopenharmony_ci u8 *prog = *pprog; 213462306a36Sopenharmony_ci u8 *jmp_insn; 213562306a36Sopenharmony_ci int ctx_cookie_off = offsetof(struct bpf_tramp_run_ctx, bpf_cookie); 213662306a36Sopenharmony_ci struct bpf_prog *p = l->link.prog; 213762306a36Sopenharmony_ci u64 cookie = l->cookie; 213862306a36Sopenharmony_ci 213962306a36Sopenharmony_ci /* mov rdi, cookie */ 214062306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_1, (long) cookie >> 32, (u32) (long) cookie); 214162306a36Sopenharmony_ci 214262306a36Sopenharmony_ci /* Prepare struct bpf_tramp_run_ctx. 214362306a36Sopenharmony_ci * 214462306a36Sopenharmony_ci * bpf_tramp_run_ctx is already preserved by 214562306a36Sopenharmony_ci * arch_prepare_bpf_trampoline(). 214662306a36Sopenharmony_ci * 214762306a36Sopenharmony_ci * mov QWORD PTR [rbp - run_ctx_off + ctx_cookie_off], rdi 214862306a36Sopenharmony_ci */ 214962306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_1, -run_ctx_off + ctx_cookie_off); 215062306a36Sopenharmony_ci 215162306a36Sopenharmony_ci /* arg1: mov rdi, progs[i] */ 215262306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); 215362306a36Sopenharmony_ci /* arg2: lea rsi, [rbp - ctx_cookie_off] */ 215462306a36Sopenharmony_ci if (!is_imm8(-run_ctx_off)) 215562306a36Sopenharmony_ci EMIT3_off32(0x48, 0x8D, 0xB5, -run_ctx_off); 215662306a36Sopenharmony_ci else 215762306a36Sopenharmony_ci EMIT4(0x48, 0x8D, 0x75, -run_ctx_off); 215862306a36Sopenharmony_ci 215962306a36Sopenharmony_ci if (emit_rsb_call(&prog, bpf_trampoline_enter(p), prog)) 216062306a36Sopenharmony_ci return -EINVAL; 216162306a36Sopenharmony_ci /* remember prog start time returned by __bpf_prog_enter */ 216262306a36Sopenharmony_ci emit_mov_reg(&prog, true, BPF_REG_6, BPF_REG_0); 216362306a36Sopenharmony_ci 216462306a36Sopenharmony_ci /* if (__bpf_prog_enter*(prog) == 0) 216562306a36Sopenharmony_ci * goto skip_exec_of_prog; 216662306a36Sopenharmony_ci */ 216762306a36Sopenharmony_ci EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ 216862306a36Sopenharmony_ci /* emit 2 nops that will be replaced with JE insn */ 216962306a36Sopenharmony_ci jmp_insn = prog; 217062306a36Sopenharmony_ci emit_nops(&prog, 2); 217162306a36Sopenharmony_ci 217262306a36Sopenharmony_ci /* arg1: lea rdi, [rbp - stack_size] */ 217362306a36Sopenharmony_ci if (!is_imm8(-stack_size)) 217462306a36Sopenharmony_ci EMIT3_off32(0x48, 0x8D, 0xBD, -stack_size); 217562306a36Sopenharmony_ci else 217662306a36Sopenharmony_ci EMIT4(0x48, 0x8D, 0x7D, -stack_size); 217762306a36Sopenharmony_ci /* arg2: progs[i]->insnsi for interpreter */ 217862306a36Sopenharmony_ci if (!p->jited) 217962306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_2, 218062306a36Sopenharmony_ci (long) p->insnsi >> 32, 218162306a36Sopenharmony_ci (u32) (long) p->insnsi); 218262306a36Sopenharmony_ci /* call JITed bpf program or interpreter */ 218362306a36Sopenharmony_ci if (emit_rsb_call(&prog, p->bpf_func, prog)) 218462306a36Sopenharmony_ci return -EINVAL; 218562306a36Sopenharmony_ci 218662306a36Sopenharmony_ci /* 218762306a36Sopenharmony_ci * BPF_TRAMP_MODIFY_RETURN trampolines can modify the return 218862306a36Sopenharmony_ci * of the previous call which is then passed on the stack to 218962306a36Sopenharmony_ci * the next BPF program. 219062306a36Sopenharmony_ci * 219162306a36Sopenharmony_ci * BPF_TRAMP_FENTRY trampoline may need to return the return 219262306a36Sopenharmony_ci * value of BPF_PROG_TYPE_STRUCT_OPS prog. 219362306a36Sopenharmony_ci */ 219462306a36Sopenharmony_ci if (save_ret) 219562306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 219662306a36Sopenharmony_ci 219762306a36Sopenharmony_ci /* replace 2 nops with JE insn, since jmp target is known */ 219862306a36Sopenharmony_ci jmp_insn[0] = X86_JE; 219962306a36Sopenharmony_ci jmp_insn[1] = prog - jmp_insn - 2; 220062306a36Sopenharmony_ci 220162306a36Sopenharmony_ci /* arg1: mov rdi, progs[i] */ 220262306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p); 220362306a36Sopenharmony_ci /* arg2: mov rsi, rbx <- start time in nsec */ 220462306a36Sopenharmony_ci emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6); 220562306a36Sopenharmony_ci /* arg3: lea rdx, [rbp - run_ctx_off] */ 220662306a36Sopenharmony_ci if (!is_imm8(-run_ctx_off)) 220762306a36Sopenharmony_ci EMIT3_off32(0x48, 0x8D, 0x95, -run_ctx_off); 220862306a36Sopenharmony_ci else 220962306a36Sopenharmony_ci EMIT4(0x48, 0x8D, 0x55, -run_ctx_off); 221062306a36Sopenharmony_ci if (emit_rsb_call(&prog, bpf_trampoline_exit(p), prog)) 221162306a36Sopenharmony_ci return -EINVAL; 221262306a36Sopenharmony_ci 221362306a36Sopenharmony_ci *pprog = prog; 221462306a36Sopenharmony_ci return 0; 221562306a36Sopenharmony_ci} 221662306a36Sopenharmony_ci 221762306a36Sopenharmony_cistatic void emit_align(u8 **pprog, u32 align) 221862306a36Sopenharmony_ci{ 221962306a36Sopenharmony_ci u8 *target, *prog = *pprog; 222062306a36Sopenharmony_ci 222162306a36Sopenharmony_ci target = PTR_ALIGN(prog, align); 222262306a36Sopenharmony_ci if (target != prog) 222362306a36Sopenharmony_ci emit_nops(&prog, target - prog); 222462306a36Sopenharmony_ci 222562306a36Sopenharmony_ci *pprog = prog; 222662306a36Sopenharmony_ci} 222762306a36Sopenharmony_ci 222862306a36Sopenharmony_cistatic int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) 222962306a36Sopenharmony_ci{ 223062306a36Sopenharmony_ci u8 *prog = *pprog; 223162306a36Sopenharmony_ci s64 offset; 223262306a36Sopenharmony_ci 223362306a36Sopenharmony_ci offset = func - (ip + 2 + 4); 223462306a36Sopenharmony_ci if (!is_simm32(offset)) { 223562306a36Sopenharmony_ci pr_err("Target %p is out of range\n", func); 223662306a36Sopenharmony_ci return -EINVAL; 223762306a36Sopenharmony_ci } 223862306a36Sopenharmony_ci EMIT2_off32(0x0F, jmp_cond + 0x10, offset); 223962306a36Sopenharmony_ci *pprog = prog; 224062306a36Sopenharmony_ci return 0; 224162306a36Sopenharmony_ci} 224262306a36Sopenharmony_ci 224362306a36Sopenharmony_cistatic int invoke_bpf(const struct btf_func_model *m, u8 **pprog, 224462306a36Sopenharmony_ci struct bpf_tramp_links *tl, int stack_size, 224562306a36Sopenharmony_ci int run_ctx_off, bool save_ret) 224662306a36Sopenharmony_ci{ 224762306a36Sopenharmony_ci int i; 224862306a36Sopenharmony_ci u8 *prog = *pprog; 224962306a36Sopenharmony_ci 225062306a36Sopenharmony_ci for (i = 0; i < tl->nr_links; i++) { 225162306a36Sopenharmony_ci if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, 225262306a36Sopenharmony_ci run_ctx_off, save_ret)) 225362306a36Sopenharmony_ci return -EINVAL; 225462306a36Sopenharmony_ci } 225562306a36Sopenharmony_ci *pprog = prog; 225662306a36Sopenharmony_ci return 0; 225762306a36Sopenharmony_ci} 225862306a36Sopenharmony_ci 225962306a36Sopenharmony_cistatic int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, 226062306a36Sopenharmony_ci struct bpf_tramp_links *tl, int stack_size, 226162306a36Sopenharmony_ci int run_ctx_off, u8 **branches) 226262306a36Sopenharmony_ci{ 226362306a36Sopenharmony_ci u8 *prog = *pprog; 226462306a36Sopenharmony_ci int i; 226562306a36Sopenharmony_ci 226662306a36Sopenharmony_ci /* The first fmod_ret program will receive a garbage return value. 226762306a36Sopenharmony_ci * Set this to 0 to avoid confusing the program. 226862306a36Sopenharmony_ci */ 226962306a36Sopenharmony_ci emit_mov_imm32(&prog, false, BPF_REG_0, 0); 227062306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 227162306a36Sopenharmony_ci for (i = 0; i < tl->nr_links; i++) { 227262306a36Sopenharmony_ci if (invoke_bpf_prog(m, &prog, tl->links[i], stack_size, run_ctx_off, true)) 227362306a36Sopenharmony_ci return -EINVAL; 227462306a36Sopenharmony_ci 227562306a36Sopenharmony_ci /* mod_ret prog stored return value into [rbp - 8]. Emit: 227662306a36Sopenharmony_ci * if (*(u64 *)(rbp - 8) != 0) 227762306a36Sopenharmony_ci * goto do_fexit; 227862306a36Sopenharmony_ci */ 227962306a36Sopenharmony_ci /* cmp QWORD PTR [rbp - 0x8], 0x0 */ 228062306a36Sopenharmony_ci EMIT4(0x48, 0x83, 0x7d, 0xf8); EMIT1(0x00); 228162306a36Sopenharmony_ci 228262306a36Sopenharmony_ci /* Save the location of the branch and Generate 6 nops 228362306a36Sopenharmony_ci * (4 bytes for an offset and 2 bytes for the jump) These nops 228462306a36Sopenharmony_ci * are replaced with a conditional jump once do_fexit (i.e. the 228562306a36Sopenharmony_ci * start of the fexit invocation) is finalized. 228662306a36Sopenharmony_ci */ 228762306a36Sopenharmony_ci branches[i] = prog; 228862306a36Sopenharmony_ci emit_nops(&prog, 4 + 2); 228962306a36Sopenharmony_ci } 229062306a36Sopenharmony_ci 229162306a36Sopenharmony_ci *pprog = prog; 229262306a36Sopenharmony_ci return 0; 229362306a36Sopenharmony_ci} 229462306a36Sopenharmony_ci 229562306a36Sopenharmony_ci/* Example: 229662306a36Sopenharmony_ci * __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev); 229762306a36Sopenharmony_ci * its 'struct btf_func_model' will be nr_args=2 229862306a36Sopenharmony_ci * The assembly code when eth_type_trans is executing after trampoline: 229962306a36Sopenharmony_ci * 230062306a36Sopenharmony_ci * push rbp 230162306a36Sopenharmony_ci * mov rbp, rsp 230262306a36Sopenharmony_ci * sub rsp, 16 // space for skb and dev 230362306a36Sopenharmony_ci * push rbx // temp regs to pass start time 230462306a36Sopenharmony_ci * mov qword ptr [rbp - 16], rdi // save skb pointer to stack 230562306a36Sopenharmony_ci * mov qword ptr [rbp - 8], rsi // save dev pointer to stack 230662306a36Sopenharmony_ci * call __bpf_prog_enter // rcu_read_lock and preempt_disable 230762306a36Sopenharmony_ci * mov rbx, rax // remember start time in bpf stats are enabled 230862306a36Sopenharmony_ci * lea rdi, [rbp - 16] // R1==ctx of bpf prog 230962306a36Sopenharmony_ci * call addr_of_jited_FENTRY_prog 231062306a36Sopenharmony_ci * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 231162306a36Sopenharmony_ci * mov rsi, rbx // prog start time 231262306a36Sopenharmony_ci * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 231362306a36Sopenharmony_ci * mov rdi, qword ptr [rbp - 16] // restore skb pointer from stack 231462306a36Sopenharmony_ci * mov rsi, qword ptr [rbp - 8] // restore dev pointer from stack 231562306a36Sopenharmony_ci * pop rbx 231662306a36Sopenharmony_ci * leave 231762306a36Sopenharmony_ci * ret 231862306a36Sopenharmony_ci * 231962306a36Sopenharmony_ci * eth_type_trans has 5 byte nop at the beginning. These 5 bytes will be 232062306a36Sopenharmony_ci * replaced with 'call generated_bpf_trampoline'. When it returns 232162306a36Sopenharmony_ci * eth_type_trans will continue executing with original skb and dev pointers. 232262306a36Sopenharmony_ci * 232362306a36Sopenharmony_ci * The assembly code when eth_type_trans is called from trampoline: 232462306a36Sopenharmony_ci * 232562306a36Sopenharmony_ci * push rbp 232662306a36Sopenharmony_ci * mov rbp, rsp 232762306a36Sopenharmony_ci * sub rsp, 24 // space for skb, dev, return value 232862306a36Sopenharmony_ci * push rbx // temp regs to pass start time 232962306a36Sopenharmony_ci * mov qword ptr [rbp - 24], rdi // save skb pointer to stack 233062306a36Sopenharmony_ci * mov qword ptr [rbp - 16], rsi // save dev pointer to stack 233162306a36Sopenharmony_ci * call __bpf_prog_enter // rcu_read_lock and preempt_disable 233262306a36Sopenharmony_ci * mov rbx, rax // remember start time if bpf stats are enabled 233362306a36Sopenharmony_ci * lea rdi, [rbp - 24] // R1==ctx of bpf prog 233462306a36Sopenharmony_ci * call addr_of_jited_FENTRY_prog // bpf prog can access skb and dev 233562306a36Sopenharmony_ci * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 233662306a36Sopenharmony_ci * mov rsi, rbx // prog start time 233762306a36Sopenharmony_ci * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 233862306a36Sopenharmony_ci * mov rdi, qword ptr [rbp - 24] // restore skb pointer from stack 233962306a36Sopenharmony_ci * mov rsi, qword ptr [rbp - 16] // restore dev pointer from stack 234062306a36Sopenharmony_ci * call eth_type_trans+5 // execute body of eth_type_trans 234162306a36Sopenharmony_ci * mov qword ptr [rbp - 8], rax // save return value 234262306a36Sopenharmony_ci * call __bpf_prog_enter // rcu_read_lock and preempt_disable 234362306a36Sopenharmony_ci * mov rbx, rax // remember start time in bpf stats are enabled 234462306a36Sopenharmony_ci * lea rdi, [rbp - 24] // R1==ctx of bpf prog 234562306a36Sopenharmony_ci * call addr_of_jited_FEXIT_prog // bpf prog can access skb, dev, return value 234662306a36Sopenharmony_ci * movabsq rdi, 64bit_addr_of_struct_bpf_prog // unused if bpf stats are off 234762306a36Sopenharmony_ci * mov rsi, rbx // prog start time 234862306a36Sopenharmony_ci * call __bpf_prog_exit // rcu_read_unlock, preempt_enable and stats math 234962306a36Sopenharmony_ci * mov rax, qword ptr [rbp - 8] // restore eth_type_trans's return value 235062306a36Sopenharmony_ci * pop rbx 235162306a36Sopenharmony_ci * leave 235262306a36Sopenharmony_ci * add rsp, 8 // skip eth_type_trans's frame 235362306a36Sopenharmony_ci * ret // return to its caller 235462306a36Sopenharmony_ci */ 235562306a36Sopenharmony_ciint arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, 235662306a36Sopenharmony_ci const struct btf_func_model *m, u32 flags, 235762306a36Sopenharmony_ci struct bpf_tramp_links *tlinks, 235862306a36Sopenharmony_ci void *func_addr) 235962306a36Sopenharmony_ci{ 236062306a36Sopenharmony_ci int i, ret, nr_regs = m->nr_args, stack_size = 0; 236162306a36Sopenharmony_ci int regs_off, nregs_off, ip_off, run_ctx_off, arg_stack_off, rbx_off; 236262306a36Sopenharmony_ci struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; 236362306a36Sopenharmony_ci struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; 236462306a36Sopenharmony_ci struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; 236562306a36Sopenharmony_ci void *orig_call = func_addr; 236662306a36Sopenharmony_ci u8 **branches = NULL; 236762306a36Sopenharmony_ci u8 *prog; 236862306a36Sopenharmony_ci bool save_ret; 236962306a36Sopenharmony_ci 237062306a36Sopenharmony_ci /* extra registers for struct arguments */ 237162306a36Sopenharmony_ci for (i = 0; i < m->nr_args; i++) 237262306a36Sopenharmony_ci if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) 237362306a36Sopenharmony_ci nr_regs += (m->arg_size[i] + 7) / 8 - 1; 237462306a36Sopenharmony_ci 237562306a36Sopenharmony_ci /* x86-64 supports up to MAX_BPF_FUNC_ARGS arguments. 1-6 237662306a36Sopenharmony_ci * are passed through regs, the remains are through stack. 237762306a36Sopenharmony_ci */ 237862306a36Sopenharmony_ci if (nr_regs > MAX_BPF_FUNC_ARGS) 237962306a36Sopenharmony_ci return -ENOTSUPP; 238062306a36Sopenharmony_ci 238162306a36Sopenharmony_ci /* Generated trampoline stack layout: 238262306a36Sopenharmony_ci * 238362306a36Sopenharmony_ci * RBP + 8 [ return address ] 238462306a36Sopenharmony_ci * RBP + 0 [ RBP ] 238562306a36Sopenharmony_ci * 238662306a36Sopenharmony_ci * RBP - 8 [ return value ] BPF_TRAMP_F_CALL_ORIG or 238762306a36Sopenharmony_ci * BPF_TRAMP_F_RET_FENTRY_RET flags 238862306a36Sopenharmony_ci * 238962306a36Sopenharmony_ci * [ reg_argN ] always 239062306a36Sopenharmony_ci * [ ... ] 239162306a36Sopenharmony_ci * RBP - regs_off [ reg_arg1 ] program's ctx pointer 239262306a36Sopenharmony_ci * 239362306a36Sopenharmony_ci * RBP - nregs_off [ regs count ] always 239462306a36Sopenharmony_ci * 239562306a36Sopenharmony_ci * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag 239662306a36Sopenharmony_ci * 239762306a36Sopenharmony_ci * RBP - rbx_off [ rbx value ] always 239862306a36Sopenharmony_ci * 239962306a36Sopenharmony_ci * RBP - run_ctx_off [ bpf_tramp_run_ctx ] 240062306a36Sopenharmony_ci * 240162306a36Sopenharmony_ci * [ stack_argN ] BPF_TRAMP_F_CALL_ORIG 240262306a36Sopenharmony_ci * [ ... ] 240362306a36Sopenharmony_ci * [ stack_arg2 ] 240462306a36Sopenharmony_ci * RBP - arg_stack_off [ stack_arg1 ] 240562306a36Sopenharmony_ci * RSP [ tail_call_cnt ] BPF_TRAMP_F_TAIL_CALL_CTX 240662306a36Sopenharmony_ci */ 240762306a36Sopenharmony_ci 240862306a36Sopenharmony_ci /* room for return value of orig_call or fentry prog */ 240962306a36Sopenharmony_ci save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); 241062306a36Sopenharmony_ci if (save_ret) 241162306a36Sopenharmony_ci stack_size += 8; 241262306a36Sopenharmony_ci 241362306a36Sopenharmony_ci stack_size += nr_regs * 8; 241462306a36Sopenharmony_ci regs_off = stack_size; 241562306a36Sopenharmony_ci 241662306a36Sopenharmony_ci /* regs count */ 241762306a36Sopenharmony_ci stack_size += 8; 241862306a36Sopenharmony_ci nregs_off = stack_size; 241962306a36Sopenharmony_ci 242062306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_IP_ARG) 242162306a36Sopenharmony_ci stack_size += 8; /* room for IP address argument */ 242262306a36Sopenharmony_ci 242362306a36Sopenharmony_ci ip_off = stack_size; 242462306a36Sopenharmony_ci 242562306a36Sopenharmony_ci stack_size += 8; 242662306a36Sopenharmony_ci rbx_off = stack_size; 242762306a36Sopenharmony_ci 242862306a36Sopenharmony_ci stack_size += (sizeof(struct bpf_tramp_run_ctx) + 7) & ~0x7; 242962306a36Sopenharmony_ci run_ctx_off = stack_size; 243062306a36Sopenharmony_ci 243162306a36Sopenharmony_ci if (nr_regs > 6 && (flags & BPF_TRAMP_F_CALL_ORIG)) { 243262306a36Sopenharmony_ci /* the space that used to pass arguments on-stack */ 243362306a36Sopenharmony_ci stack_size += (nr_regs - get_nr_used_regs(m)) * 8; 243462306a36Sopenharmony_ci /* make sure the stack pointer is 16-byte aligned if we 243562306a36Sopenharmony_ci * need pass arguments on stack, which means 243662306a36Sopenharmony_ci * [stack_size + 8(rbp) + 8(rip) + 8(origin rip)] 243762306a36Sopenharmony_ci * should be 16-byte aligned. Following code depend on 243862306a36Sopenharmony_ci * that stack_size is already 8-byte aligned. 243962306a36Sopenharmony_ci */ 244062306a36Sopenharmony_ci stack_size += (stack_size % 16) ? 0 : 8; 244162306a36Sopenharmony_ci } 244262306a36Sopenharmony_ci 244362306a36Sopenharmony_ci arg_stack_off = stack_size; 244462306a36Sopenharmony_ci 244562306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_SKIP_FRAME) { 244662306a36Sopenharmony_ci /* skip patched call instruction and point orig_call to actual 244762306a36Sopenharmony_ci * body of the kernel function. 244862306a36Sopenharmony_ci */ 244962306a36Sopenharmony_ci if (is_endbr(*(u32 *)orig_call)) 245062306a36Sopenharmony_ci orig_call += ENDBR_INSN_SIZE; 245162306a36Sopenharmony_ci orig_call += X86_PATCH_SIZE; 245262306a36Sopenharmony_ci } 245362306a36Sopenharmony_ci 245462306a36Sopenharmony_ci prog = image; 245562306a36Sopenharmony_ci 245662306a36Sopenharmony_ci EMIT_ENDBR(); 245762306a36Sopenharmony_ci /* 245862306a36Sopenharmony_ci * This is the direct-call trampoline, as such it needs accounting 245962306a36Sopenharmony_ci * for the __fentry__ call. 246062306a36Sopenharmony_ci */ 246162306a36Sopenharmony_ci x86_call_depth_emit_accounting(&prog, NULL); 246262306a36Sopenharmony_ci EMIT1(0x55); /* push rbp */ 246362306a36Sopenharmony_ci EMIT3(0x48, 0x89, 0xE5); /* mov rbp, rsp */ 246462306a36Sopenharmony_ci if (!is_imm8(stack_size)) 246562306a36Sopenharmony_ci /* sub rsp, stack_size */ 246662306a36Sopenharmony_ci EMIT3_off32(0x48, 0x81, 0xEC, stack_size); 246762306a36Sopenharmony_ci else 246862306a36Sopenharmony_ci /* sub rsp, stack_size */ 246962306a36Sopenharmony_ci EMIT4(0x48, 0x83, 0xEC, stack_size); 247062306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 247162306a36Sopenharmony_ci EMIT1(0x50); /* push rax */ 247262306a36Sopenharmony_ci /* mov QWORD PTR [rbp - rbx_off], rbx */ 247362306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_6, -rbx_off); 247462306a36Sopenharmony_ci 247562306a36Sopenharmony_ci /* Store number of argument registers of the traced function: 247662306a36Sopenharmony_ci * mov rax, nr_regs 247762306a36Sopenharmony_ci * mov QWORD PTR [rbp - nregs_off], rax 247862306a36Sopenharmony_ci */ 247962306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_regs); 248062306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -nregs_off); 248162306a36Sopenharmony_ci 248262306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_IP_ARG) { 248362306a36Sopenharmony_ci /* Store IP address of the traced function: 248462306a36Sopenharmony_ci * movabsq rax, func_addr 248562306a36Sopenharmony_ci * mov QWORD PTR [rbp - ip_off], rax 248662306a36Sopenharmony_ci */ 248762306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr); 248862306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); 248962306a36Sopenharmony_ci } 249062306a36Sopenharmony_ci 249162306a36Sopenharmony_ci save_args(m, &prog, regs_off, false); 249262306a36Sopenharmony_ci 249362306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_CALL_ORIG) { 249462306a36Sopenharmony_ci /* arg1: mov rdi, im */ 249562306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); 249662306a36Sopenharmony_ci if (emit_rsb_call(&prog, __bpf_tramp_enter, prog)) { 249762306a36Sopenharmony_ci ret = -EINVAL; 249862306a36Sopenharmony_ci goto cleanup; 249962306a36Sopenharmony_ci } 250062306a36Sopenharmony_ci } 250162306a36Sopenharmony_ci 250262306a36Sopenharmony_ci if (fentry->nr_links) 250362306a36Sopenharmony_ci if (invoke_bpf(m, &prog, fentry, regs_off, run_ctx_off, 250462306a36Sopenharmony_ci flags & BPF_TRAMP_F_RET_FENTRY_RET)) 250562306a36Sopenharmony_ci return -EINVAL; 250662306a36Sopenharmony_ci 250762306a36Sopenharmony_ci if (fmod_ret->nr_links) { 250862306a36Sopenharmony_ci branches = kcalloc(fmod_ret->nr_links, sizeof(u8 *), 250962306a36Sopenharmony_ci GFP_KERNEL); 251062306a36Sopenharmony_ci if (!branches) 251162306a36Sopenharmony_ci return -ENOMEM; 251262306a36Sopenharmony_ci 251362306a36Sopenharmony_ci if (invoke_bpf_mod_ret(m, &prog, fmod_ret, regs_off, 251462306a36Sopenharmony_ci run_ctx_off, branches)) { 251562306a36Sopenharmony_ci ret = -EINVAL; 251662306a36Sopenharmony_ci goto cleanup; 251762306a36Sopenharmony_ci } 251862306a36Sopenharmony_ci } 251962306a36Sopenharmony_ci 252062306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_CALL_ORIG) { 252162306a36Sopenharmony_ci restore_regs(m, &prog, regs_off); 252262306a36Sopenharmony_ci save_args(m, &prog, arg_stack_off, true); 252362306a36Sopenharmony_ci 252462306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 252562306a36Sopenharmony_ci /* Before calling the original function, restore the 252662306a36Sopenharmony_ci * tail_call_cnt from stack to rax. 252762306a36Sopenharmony_ci */ 252862306a36Sopenharmony_ci RESTORE_TAIL_CALL_CNT(stack_size); 252962306a36Sopenharmony_ci 253062306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_ORIG_STACK) { 253162306a36Sopenharmony_ci emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, 8); 253262306a36Sopenharmony_ci EMIT2(0xff, 0xd3); /* call *rbx */ 253362306a36Sopenharmony_ci } else { 253462306a36Sopenharmony_ci /* call original function */ 253562306a36Sopenharmony_ci if (emit_rsb_call(&prog, orig_call, prog)) { 253662306a36Sopenharmony_ci ret = -EINVAL; 253762306a36Sopenharmony_ci goto cleanup; 253862306a36Sopenharmony_ci } 253962306a36Sopenharmony_ci } 254062306a36Sopenharmony_ci /* remember return value in a stack for bpf prog to access */ 254162306a36Sopenharmony_ci emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8); 254262306a36Sopenharmony_ci im->ip_after_call = prog; 254362306a36Sopenharmony_ci memcpy(prog, x86_nops[5], X86_PATCH_SIZE); 254462306a36Sopenharmony_ci prog += X86_PATCH_SIZE; 254562306a36Sopenharmony_ci } 254662306a36Sopenharmony_ci 254762306a36Sopenharmony_ci if (fmod_ret->nr_links) { 254862306a36Sopenharmony_ci /* From Intel 64 and IA-32 Architectures Optimization 254962306a36Sopenharmony_ci * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler 255062306a36Sopenharmony_ci * Coding Rule 11: All branch targets should be 16-byte 255162306a36Sopenharmony_ci * aligned. 255262306a36Sopenharmony_ci */ 255362306a36Sopenharmony_ci emit_align(&prog, 16); 255462306a36Sopenharmony_ci /* Update the branches saved in invoke_bpf_mod_ret with the 255562306a36Sopenharmony_ci * aligned address of do_fexit. 255662306a36Sopenharmony_ci */ 255762306a36Sopenharmony_ci for (i = 0; i < fmod_ret->nr_links; i++) 255862306a36Sopenharmony_ci emit_cond_near_jump(&branches[i], prog, branches[i], 255962306a36Sopenharmony_ci X86_JNE); 256062306a36Sopenharmony_ci } 256162306a36Sopenharmony_ci 256262306a36Sopenharmony_ci if (fexit->nr_links) 256362306a36Sopenharmony_ci if (invoke_bpf(m, &prog, fexit, regs_off, run_ctx_off, false)) { 256462306a36Sopenharmony_ci ret = -EINVAL; 256562306a36Sopenharmony_ci goto cleanup; 256662306a36Sopenharmony_ci } 256762306a36Sopenharmony_ci 256862306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_RESTORE_REGS) 256962306a36Sopenharmony_ci restore_regs(m, &prog, regs_off); 257062306a36Sopenharmony_ci 257162306a36Sopenharmony_ci /* This needs to be done regardless. If there were fmod_ret programs, 257262306a36Sopenharmony_ci * the return value is only updated on the stack and still needs to be 257362306a36Sopenharmony_ci * restored to R0. 257462306a36Sopenharmony_ci */ 257562306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_CALL_ORIG) { 257662306a36Sopenharmony_ci im->ip_epilogue = prog; 257762306a36Sopenharmony_ci /* arg1: mov rdi, im */ 257862306a36Sopenharmony_ci emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im); 257962306a36Sopenharmony_ci if (emit_rsb_call(&prog, __bpf_tramp_exit, prog)) { 258062306a36Sopenharmony_ci ret = -EINVAL; 258162306a36Sopenharmony_ci goto cleanup; 258262306a36Sopenharmony_ci } 258362306a36Sopenharmony_ci } else if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) 258462306a36Sopenharmony_ci /* Before running the original function, restore the 258562306a36Sopenharmony_ci * tail_call_cnt from stack to rax. 258662306a36Sopenharmony_ci */ 258762306a36Sopenharmony_ci RESTORE_TAIL_CALL_CNT(stack_size); 258862306a36Sopenharmony_ci 258962306a36Sopenharmony_ci /* restore return value of orig_call or fentry prog back into RAX */ 259062306a36Sopenharmony_ci if (save_ret) 259162306a36Sopenharmony_ci emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8); 259262306a36Sopenharmony_ci 259362306a36Sopenharmony_ci emit_ldx(&prog, BPF_DW, BPF_REG_6, BPF_REG_FP, -rbx_off); 259462306a36Sopenharmony_ci EMIT1(0xC9); /* leave */ 259562306a36Sopenharmony_ci if (flags & BPF_TRAMP_F_SKIP_FRAME) 259662306a36Sopenharmony_ci /* skip our return address and return to parent */ 259762306a36Sopenharmony_ci EMIT4(0x48, 0x83, 0xC4, 8); /* add rsp, 8 */ 259862306a36Sopenharmony_ci emit_return(&prog, prog); 259962306a36Sopenharmony_ci /* Make sure the trampoline generation logic doesn't overflow */ 260062306a36Sopenharmony_ci if (WARN_ON_ONCE(prog > (u8 *)image_end - BPF_INSN_SAFETY)) { 260162306a36Sopenharmony_ci ret = -EFAULT; 260262306a36Sopenharmony_ci goto cleanup; 260362306a36Sopenharmony_ci } 260462306a36Sopenharmony_ci ret = prog - (u8 *)image; 260562306a36Sopenharmony_ci 260662306a36Sopenharmony_cicleanup: 260762306a36Sopenharmony_ci kfree(branches); 260862306a36Sopenharmony_ci return ret; 260962306a36Sopenharmony_ci} 261062306a36Sopenharmony_ci 261162306a36Sopenharmony_cistatic int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf) 261262306a36Sopenharmony_ci{ 261362306a36Sopenharmony_ci u8 *jg_reloc, *prog = *pprog; 261462306a36Sopenharmony_ci int pivot, err, jg_bytes = 1; 261562306a36Sopenharmony_ci s64 jg_offset; 261662306a36Sopenharmony_ci 261762306a36Sopenharmony_ci if (a == b) { 261862306a36Sopenharmony_ci /* Leaf node of recursion, i.e. not a range of indices 261962306a36Sopenharmony_ci * anymore. 262062306a36Sopenharmony_ci */ 262162306a36Sopenharmony_ci EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ 262262306a36Sopenharmony_ci if (!is_simm32(progs[a])) 262362306a36Sopenharmony_ci return -1; 262462306a36Sopenharmony_ci EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), 262562306a36Sopenharmony_ci progs[a]); 262662306a36Sopenharmony_ci err = emit_cond_near_jump(&prog, /* je func */ 262762306a36Sopenharmony_ci (void *)progs[a], image + (prog - buf), 262862306a36Sopenharmony_ci X86_JE); 262962306a36Sopenharmony_ci if (err) 263062306a36Sopenharmony_ci return err; 263162306a36Sopenharmony_ci 263262306a36Sopenharmony_ci emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf)); 263362306a36Sopenharmony_ci 263462306a36Sopenharmony_ci *pprog = prog; 263562306a36Sopenharmony_ci return 0; 263662306a36Sopenharmony_ci } 263762306a36Sopenharmony_ci 263862306a36Sopenharmony_ci /* Not a leaf node, so we pivot, and recursively descend into 263962306a36Sopenharmony_ci * the lower and upper ranges. 264062306a36Sopenharmony_ci */ 264162306a36Sopenharmony_ci pivot = (b - a) / 2; 264262306a36Sopenharmony_ci EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ 264362306a36Sopenharmony_ci if (!is_simm32(progs[a + pivot])) 264462306a36Sopenharmony_ci return -1; 264562306a36Sopenharmony_ci EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]); 264662306a36Sopenharmony_ci 264762306a36Sopenharmony_ci if (pivot > 2) { /* jg upper_part */ 264862306a36Sopenharmony_ci /* Require near jump. */ 264962306a36Sopenharmony_ci jg_bytes = 4; 265062306a36Sopenharmony_ci EMIT2_off32(0x0F, X86_JG + 0x10, 0); 265162306a36Sopenharmony_ci } else { 265262306a36Sopenharmony_ci EMIT2(X86_JG, 0); 265362306a36Sopenharmony_ci } 265462306a36Sopenharmony_ci jg_reloc = prog; 265562306a36Sopenharmony_ci 265662306a36Sopenharmony_ci err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ 265762306a36Sopenharmony_ci progs, image, buf); 265862306a36Sopenharmony_ci if (err) 265962306a36Sopenharmony_ci return err; 266062306a36Sopenharmony_ci 266162306a36Sopenharmony_ci /* From Intel 64 and IA-32 Architectures Optimization 266262306a36Sopenharmony_ci * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler 266362306a36Sopenharmony_ci * Coding Rule 11: All branch targets should be 16-byte 266462306a36Sopenharmony_ci * aligned. 266562306a36Sopenharmony_ci */ 266662306a36Sopenharmony_ci emit_align(&prog, 16); 266762306a36Sopenharmony_ci jg_offset = prog - jg_reloc; 266862306a36Sopenharmony_ci emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); 266962306a36Sopenharmony_ci 267062306a36Sopenharmony_ci err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ 267162306a36Sopenharmony_ci b, progs, image, buf); 267262306a36Sopenharmony_ci if (err) 267362306a36Sopenharmony_ci return err; 267462306a36Sopenharmony_ci 267562306a36Sopenharmony_ci *pprog = prog; 267662306a36Sopenharmony_ci return 0; 267762306a36Sopenharmony_ci} 267862306a36Sopenharmony_ci 267962306a36Sopenharmony_cistatic int cmp_ips(const void *a, const void *b) 268062306a36Sopenharmony_ci{ 268162306a36Sopenharmony_ci const s64 *ipa = a; 268262306a36Sopenharmony_ci const s64 *ipb = b; 268362306a36Sopenharmony_ci 268462306a36Sopenharmony_ci if (*ipa > *ipb) 268562306a36Sopenharmony_ci return 1; 268662306a36Sopenharmony_ci if (*ipa < *ipb) 268762306a36Sopenharmony_ci return -1; 268862306a36Sopenharmony_ci return 0; 268962306a36Sopenharmony_ci} 269062306a36Sopenharmony_ci 269162306a36Sopenharmony_ciint arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs) 269262306a36Sopenharmony_ci{ 269362306a36Sopenharmony_ci u8 *prog = buf; 269462306a36Sopenharmony_ci 269562306a36Sopenharmony_ci sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); 269662306a36Sopenharmony_ci return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf); 269762306a36Sopenharmony_ci} 269862306a36Sopenharmony_ci 269962306a36Sopenharmony_cistruct x64_jit_data { 270062306a36Sopenharmony_ci struct bpf_binary_header *rw_header; 270162306a36Sopenharmony_ci struct bpf_binary_header *header; 270262306a36Sopenharmony_ci int *addrs; 270362306a36Sopenharmony_ci u8 *image; 270462306a36Sopenharmony_ci int proglen; 270562306a36Sopenharmony_ci struct jit_context ctx; 270662306a36Sopenharmony_ci}; 270762306a36Sopenharmony_ci 270862306a36Sopenharmony_ci#define MAX_PASSES 20 270962306a36Sopenharmony_ci#define PADDING_PASSES (MAX_PASSES - 5) 271062306a36Sopenharmony_ci 271162306a36Sopenharmony_cistruct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) 271262306a36Sopenharmony_ci{ 271362306a36Sopenharmony_ci struct bpf_binary_header *rw_header = NULL; 271462306a36Sopenharmony_ci struct bpf_binary_header *header = NULL; 271562306a36Sopenharmony_ci struct bpf_prog *tmp, *orig_prog = prog; 271662306a36Sopenharmony_ci struct x64_jit_data *jit_data; 271762306a36Sopenharmony_ci int proglen, oldproglen = 0; 271862306a36Sopenharmony_ci struct jit_context ctx = {}; 271962306a36Sopenharmony_ci bool tmp_blinded = false; 272062306a36Sopenharmony_ci bool extra_pass = false; 272162306a36Sopenharmony_ci bool padding = false; 272262306a36Sopenharmony_ci u8 *rw_image = NULL; 272362306a36Sopenharmony_ci u8 *image = NULL; 272462306a36Sopenharmony_ci int *addrs; 272562306a36Sopenharmony_ci int pass; 272662306a36Sopenharmony_ci int i; 272762306a36Sopenharmony_ci 272862306a36Sopenharmony_ci if (!prog->jit_requested) 272962306a36Sopenharmony_ci return orig_prog; 273062306a36Sopenharmony_ci 273162306a36Sopenharmony_ci tmp = bpf_jit_blind_constants(prog); 273262306a36Sopenharmony_ci /* 273362306a36Sopenharmony_ci * If blinding was requested and we failed during blinding, 273462306a36Sopenharmony_ci * we must fall back to the interpreter. 273562306a36Sopenharmony_ci */ 273662306a36Sopenharmony_ci if (IS_ERR(tmp)) 273762306a36Sopenharmony_ci return orig_prog; 273862306a36Sopenharmony_ci if (tmp != prog) { 273962306a36Sopenharmony_ci tmp_blinded = true; 274062306a36Sopenharmony_ci prog = tmp; 274162306a36Sopenharmony_ci } 274262306a36Sopenharmony_ci 274362306a36Sopenharmony_ci jit_data = prog->aux->jit_data; 274462306a36Sopenharmony_ci if (!jit_data) { 274562306a36Sopenharmony_ci jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); 274662306a36Sopenharmony_ci if (!jit_data) { 274762306a36Sopenharmony_ci prog = orig_prog; 274862306a36Sopenharmony_ci goto out; 274962306a36Sopenharmony_ci } 275062306a36Sopenharmony_ci prog->aux->jit_data = jit_data; 275162306a36Sopenharmony_ci } 275262306a36Sopenharmony_ci addrs = jit_data->addrs; 275362306a36Sopenharmony_ci if (addrs) { 275462306a36Sopenharmony_ci ctx = jit_data->ctx; 275562306a36Sopenharmony_ci oldproglen = jit_data->proglen; 275662306a36Sopenharmony_ci image = jit_data->image; 275762306a36Sopenharmony_ci header = jit_data->header; 275862306a36Sopenharmony_ci rw_header = jit_data->rw_header; 275962306a36Sopenharmony_ci rw_image = (void *)rw_header + ((void *)image - (void *)header); 276062306a36Sopenharmony_ci extra_pass = true; 276162306a36Sopenharmony_ci padding = true; 276262306a36Sopenharmony_ci goto skip_init_addrs; 276362306a36Sopenharmony_ci } 276462306a36Sopenharmony_ci addrs = kvmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL); 276562306a36Sopenharmony_ci if (!addrs) { 276662306a36Sopenharmony_ci prog = orig_prog; 276762306a36Sopenharmony_ci goto out_addrs; 276862306a36Sopenharmony_ci } 276962306a36Sopenharmony_ci 277062306a36Sopenharmony_ci /* 277162306a36Sopenharmony_ci * Before first pass, make a rough estimation of addrs[] 277262306a36Sopenharmony_ci * each BPF instruction is translated to less than 64 bytes 277362306a36Sopenharmony_ci */ 277462306a36Sopenharmony_ci for (proglen = 0, i = 0; i <= prog->len; i++) { 277562306a36Sopenharmony_ci proglen += 64; 277662306a36Sopenharmony_ci addrs[i] = proglen; 277762306a36Sopenharmony_ci } 277862306a36Sopenharmony_ci ctx.cleanup_addr = proglen; 277962306a36Sopenharmony_ciskip_init_addrs: 278062306a36Sopenharmony_ci 278162306a36Sopenharmony_ci /* 278262306a36Sopenharmony_ci * JITed image shrinks with every pass and the loop iterates 278362306a36Sopenharmony_ci * until the image stops shrinking. Very large BPF programs 278462306a36Sopenharmony_ci * may converge on the last pass. In such case do one more 278562306a36Sopenharmony_ci * pass to emit the final image. 278662306a36Sopenharmony_ci */ 278762306a36Sopenharmony_ci for (pass = 0; pass < MAX_PASSES || image; pass++) { 278862306a36Sopenharmony_ci if (!padding && pass >= PADDING_PASSES) 278962306a36Sopenharmony_ci padding = true; 279062306a36Sopenharmony_ci proglen = do_jit(prog, addrs, image, rw_image, oldproglen, &ctx, padding); 279162306a36Sopenharmony_ci if (proglen <= 0) { 279262306a36Sopenharmony_ciout_image: 279362306a36Sopenharmony_ci image = NULL; 279462306a36Sopenharmony_ci if (header) { 279562306a36Sopenharmony_ci bpf_arch_text_copy(&header->size, &rw_header->size, 279662306a36Sopenharmony_ci sizeof(rw_header->size)); 279762306a36Sopenharmony_ci bpf_jit_binary_pack_free(header, rw_header); 279862306a36Sopenharmony_ci } 279962306a36Sopenharmony_ci /* Fall back to interpreter mode */ 280062306a36Sopenharmony_ci prog = orig_prog; 280162306a36Sopenharmony_ci if (extra_pass) { 280262306a36Sopenharmony_ci prog->bpf_func = NULL; 280362306a36Sopenharmony_ci prog->jited = 0; 280462306a36Sopenharmony_ci prog->jited_len = 0; 280562306a36Sopenharmony_ci } 280662306a36Sopenharmony_ci goto out_addrs; 280762306a36Sopenharmony_ci } 280862306a36Sopenharmony_ci if (image) { 280962306a36Sopenharmony_ci if (proglen != oldproglen) { 281062306a36Sopenharmony_ci pr_err("bpf_jit: proglen=%d != oldproglen=%d\n", 281162306a36Sopenharmony_ci proglen, oldproglen); 281262306a36Sopenharmony_ci goto out_image; 281362306a36Sopenharmony_ci } 281462306a36Sopenharmony_ci break; 281562306a36Sopenharmony_ci } 281662306a36Sopenharmony_ci if (proglen == oldproglen) { 281762306a36Sopenharmony_ci /* 281862306a36Sopenharmony_ci * The number of entries in extable is the number of BPF_LDX 281962306a36Sopenharmony_ci * insns that access kernel memory via "pointer to BTF type". 282062306a36Sopenharmony_ci * The verifier changed their opcode from LDX|MEM|size 282162306a36Sopenharmony_ci * to LDX|PROBE_MEM|size to make JITing easier. 282262306a36Sopenharmony_ci */ 282362306a36Sopenharmony_ci u32 align = __alignof__(struct exception_table_entry); 282462306a36Sopenharmony_ci u32 extable_size = prog->aux->num_exentries * 282562306a36Sopenharmony_ci sizeof(struct exception_table_entry); 282662306a36Sopenharmony_ci 282762306a36Sopenharmony_ci /* allocate module memory for x86 insns and extable */ 282862306a36Sopenharmony_ci header = bpf_jit_binary_pack_alloc(roundup(proglen, align) + extable_size, 282962306a36Sopenharmony_ci &image, align, &rw_header, &rw_image, 283062306a36Sopenharmony_ci jit_fill_hole); 283162306a36Sopenharmony_ci if (!header) { 283262306a36Sopenharmony_ci prog = orig_prog; 283362306a36Sopenharmony_ci goto out_addrs; 283462306a36Sopenharmony_ci } 283562306a36Sopenharmony_ci prog->aux->extable = (void *) image + roundup(proglen, align); 283662306a36Sopenharmony_ci } 283762306a36Sopenharmony_ci oldproglen = proglen; 283862306a36Sopenharmony_ci cond_resched(); 283962306a36Sopenharmony_ci } 284062306a36Sopenharmony_ci 284162306a36Sopenharmony_ci if (bpf_jit_enable > 1) 284262306a36Sopenharmony_ci bpf_jit_dump(prog->len, proglen, pass + 1, rw_image); 284362306a36Sopenharmony_ci 284462306a36Sopenharmony_ci if (image) { 284562306a36Sopenharmony_ci if (!prog->is_func || extra_pass) { 284662306a36Sopenharmony_ci /* 284762306a36Sopenharmony_ci * bpf_jit_binary_pack_finalize fails in two scenarios: 284862306a36Sopenharmony_ci * 1) header is not pointing to proper module memory; 284962306a36Sopenharmony_ci * 2) the arch doesn't support bpf_arch_text_copy(). 285062306a36Sopenharmony_ci * 285162306a36Sopenharmony_ci * Both cases are serious bugs and justify WARN_ON. 285262306a36Sopenharmony_ci */ 285362306a36Sopenharmony_ci if (WARN_ON(bpf_jit_binary_pack_finalize(prog, header, rw_header))) { 285462306a36Sopenharmony_ci /* header has been freed */ 285562306a36Sopenharmony_ci header = NULL; 285662306a36Sopenharmony_ci goto out_image; 285762306a36Sopenharmony_ci } 285862306a36Sopenharmony_ci 285962306a36Sopenharmony_ci bpf_tail_call_direct_fixup(prog); 286062306a36Sopenharmony_ci } else { 286162306a36Sopenharmony_ci jit_data->addrs = addrs; 286262306a36Sopenharmony_ci jit_data->ctx = ctx; 286362306a36Sopenharmony_ci jit_data->proglen = proglen; 286462306a36Sopenharmony_ci jit_data->image = image; 286562306a36Sopenharmony_ci jit_data->header = header; 286662306a36Sopenharmony_ci jit_data->rw_header = rw_header; 286762306a36Sopenharmony_ci } 286862306a36Sopenharmony_ci prog->bpf_func = (void *)image; 286962306a36Sopenharmony_ci prog->jited = 1; 287062306a36Sopenharmony_ci prog->jited_len = proglen; 287162306a36Sopenharmony_ci } else { 287262306a36Sopenharmony_ci prog = orig_prog; 287362306a36Sopenharmony_ci } 287462306a36Sopenharmony_ci 287562306a36Sopenharmony_ci if (!image || !prog->is_func || extra_pass) { 287662306a36Sopenharmony_ci if (image) 287762306a36Sopenharmony_ci bpf_prog_fill_jited_linfo(prog, addrs + 1); 287862306a36Sopenharmony_ciout_addrs: 287962306a36Sopenharmony_ci kvfree(addrs); 288062306a36Sopenharmony_ci kfree(jit_data); 288162306a36Sopenharmony_ci prog->aux->jit_data = NULL; 288262306a36Sopenharmony_ci } 288362306a36Sopenharmony_ciout: 288462306a36Sopenharmony_ci if (tmp_blinded) 288562306a36Sopenharmony_ci bpf_jit_prog_release_other(prog, prog == orig_prog ? 288662306a36Sopenharmony_ci tmp : orig_prog); 288762306a36Sopenharmony_ci return prog; 288862306a36Sopenharmony_ci} 288962306a36Sopenharmony_ci 289062306a36Sopenharmony_cibool bpf_jit_supports_kfunc_call(void) 289162306a36Sopenharmony_ci{ 289262306a36Sopenharmony_ci return true; 289362306a36Sopenharmony_ci} 289462306a36Sopenharmony_ci 289562306a36Sopenharmony_civoid *bpf_arch_text_copy(void *dst, void *src, size_t len) 289662306a36Sopenharmony_ci{ 289762306a36Sopenharmony_ci if (text_poke_copy(dst, src, len) == NULL) 289862306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 289962306a36Sopenharmony_ci return dst; 290062306a36Sopenharmony_ci} 290162306a36Sopenharmony_ci 290262306a36Sopenharmony_ci/* Indicate the JIT backend supports mixing bpf2bpf and tailcalls. */ 290362306a36Sopenharmony_cibool bpf_jit_supports_subprog_tailcalls(void) 290462306a36Sopenharmony_ci{ 290562306a36Sopenharmony_ci return true; 290662306a36Sopenharmony_ci} 290762306a36Sopenharmony_ci 290862306a36Sopenharmony_civoid bpf_jit_free(struct bpf_prog *prog) 290962306a36Sopenharmony_ci{ 291062306a36Sopenharmony_ci if (prog->jited) { 291162306a36Sopenharmony_ci struct x64_jit_data *jit_data = prog->aux->jit_data; 291262306a36Sopenharmony_ci struct bpf_binary_header *hdr; 291362306a36Sopenharmony_ci 291462306a36Sopenharmony_ci /* 291562306a36Sopenharmony_ci * If we fail the final pass of JIT (from jit_subprogs), 291662306a36Sopenharmony_ci * the program may not be finalized yet. Call finalize here 291762306a36Sopenharmony_ci * before freeing it. 291862306a36Sopenharmony_ci */ 291962306a36Sopenharmony_ci if (jit_data) { 292062306a36Sopenharmony_ci bpf_jit_binary_pack_finalize(prog, jit_data->header, 292162306a36Sopenharmony_ci jit_data->rw_header); 292262306a36Sopenharmony_ci kvfree(jit_data->addrs); 292362306a36Sopenharmony_ci kfree(jit_data); 292462306a36Sopenharmony_ci } 292562306a36Sopenharmony_ci hdr = bpf_jit_binary_pack_hdr(prog); 292662306a36Sopenharmony_ci bpf_jit_binary_pack_free(hdr, NULL); 292762306a36Sopenharmony_ci WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(prog)); 292862306a36Sopenharmony_ci } 292962306a36Sopenharmony_ci 293062306a36Sopenharmony_ci bpf_prog_unlock_free(prog); 293162306a36Sopenharmony_ci} 293262306a36Sopenharmony_ci 293362306a36Sopenharmony_civoid bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke, 293462306a36Sopenharmony_ci struct bpf_prog *new, struct bpf_prog *old) 293562306a36Sopenharmony_ci{ 293662306a36Sopenharmony_ci u8 *old_addr, *new_addr, *old_bypass_addr; 293762306a36Sopenharmony_ci int ret; 293862306a36Sopenharmony_ci 293962306a36Sopenharmony_ci old_bypass_addr = old ? NULL : poke->bypass_addr; 294062306a36Sopenharmony_ci old_addr = old ? (u8 *)old->bpf_func + poke->adj_off : NULL; 294162306a36Sopenharmony_ci new_addr = new ? (u8 *)new->bpf_func + poke->adj_off : NULL; 294262306a36Sopenharmony_ci 294362306a36Sopenharmony_ci /* 294462306a36Sopenharmony_ci * On program loading or teardown, the program's kallsym entry 294562306a36Sopenharmony_ci * might not be in place, so we use __bpf_arch_text_poke to skip 294662306a36Sopenharmony_ci * the kallsyms check. 294762306a36Sopenharmony_ci */ 294862306a36Sopenharmony_ci if (new) { 294962306a36Sopenharmony_ci ret = __bpf_arch_text_poke(poke->tailcall_target, 295062306a36Sopenharmony_ci BPF_MOD_JUMP, 295162306a36Sopenharmony_ci old_addr, new_addr); 295262306a36Sopenharmony_ci BUG_ON(ret < 0); 295362306a36Sopenharmony_ci if (!old) { 295462306a36Sopenharmony_ci ret = __bpf_arch_text_poke(poke->tailcall_bypass, 295562306a36Sopenharmony_ci BPF_MOD_JUMP, 295662306a36Sopenharmony_ci poke->bypass_addr, 295762306a36Sopenharmony_ci NULL); 295862306a36Sopenharmony_ci BUG_ON(ret < 0); 295962306a36Sopenharmony_ci } 296062306a36Sopenharmony_ci } else { 296162306a36Sopenharmony_ci ret = __bpf_arch_text_poke(poke->tailcall_bypass, 296262306a36Sopenharmony_ci BPF_MOD_JUMP, 296362306a36Sopenharmony_ci old_bypass_addr, 296462306a36Sopenharmony_ci poke->bypass_addr); 296562306a36Sopenharmony_ci BUG_ON(ret < 0); 296662306a36Sopenharmony_ci /* let other CPUs finish the execution of program 296762306a36Sopenharmony_ci * so that it will not possible to expose them 296862306a36Sopenharmony_ci * to invalid nop, stack unwind, nop state 296962306a36Sopenharmony_ci */ 297062306a36Sopenharmony_ci if (!ret) 297162306a36Sopenharmony_ci synchronize_rcu(); 297262306a36Sopenharmony_ci ret = __bpf_arch_text_poke(poke->tailcall_target, 297362306a36Sopenharmony_ci BPF_MOD_JUMP, 297462306a36Sopenharmony_ci old_addr, NULL); 297562306a36Sopenharmony_ci BUG_ON(ret < 0); 297662306a36Sopenharmony_ci } 297762306a36Sopenharmony_ci} 2978