1// SPDX-License-Identifier: GPL-2.0
2/*
3 * Just-In-Time compiler for eBPF filters on IA32 (32bit x86)
4 *
5 * Author: Wang YanQing (udknight@gmail.com)
6 * The code based on code and ideas from:
7 * Eric Dumazet (eric.dumazet@gmail.com)
8 * and from:
9 * Shubham Bansal <illusionist.neo@gmail.com>
10 */
11
12#include <linux/netdevice.h>
13#include <linux/filter.h>
14#include <linux/if_vlan.h>
15#include <asm/cacheflush.h>
16#include <asm/set_memory.h>
17#include <asm/nospec-branch.h>
18#include <asm/asm-prototypes.h>
19#include <linux/bpf.h>
20
21/*
22 * eBPF prog stack layout:
23 *
24 *                         high
25 * original ESP =>        +-----+
26 *                        |     | callee saved registers
27 *                        +-----+
28 *                        | ... | eBPF JIT scratch space
29 * BPF_FP,IA32_EBP  =>    +-----+
30 *                        | ... | eBPF prog stack
31 *                        +-----+
32 *                        |RSVD | JIT scratchpad
33 * current ESP =>         +-----+
34 *                        |     |
35 *                        | ... | Function call stack
36 *                        |     |
37 *                        +-----+
38 *                          low
39 *
40 * The callee saved registers:
41 *
42 *                                high
43 * original ESP =>        +------------------+ \
44 *                        |        ebp       | |
45 * current EBP =>         +------------------+ } callee saved registers
46 *                        |    ebx,esi,edi   | |
47 *                        +------------------+ /
48 *                                low
49 */
50
51static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len)
52{
53	if (len == 1)
54		*ptr = bytes;
55	else if (len == 2)
56		*(u16 *)ptr = bytes;
57	else {
58		*(u32 *)ptr = bytes;
59		barrier();
60	}
61	return ptr + len;
62}
63
64#define EMIT(bytes, len) \
65	do { prog = emit_code(prog, bytes, len); cnt += len; } while (0)
66
67#define EMIT1(b1)		EMIT(b1, 1)
68#define EMIT2(b1, b2)		EMIT((b1) + ((b2) << 8), 2)
69#define EMIT3(b1, b2, b3)	EMIT((b1) + ((b2) << 8) + ((b3) << 16), 3)
70#define EMIT4(b1, b2, b3, b4)   \
71	EMIT((b1) + ((b2) << 8) + ((b3) << 16) + ((b4) << 24), 4)
72
73#define EMIT1_off32(b1, off) \
74	do { EMIT1(b1); EMIT(off, 4); } while (0)
75#define EMIT2_off32(b1, b2, off) \
76	do { EMIT2(b1, b2); EMIT(off, 4); } while (0)
77#define EMIT3_off32(b1, b2, b3, off) \
78	do { EMIT3(b1, b2, b3); EMIT(off, 4); } while (0)
79#define EMIT4_off32(b1, b2, b3, b4, off) \
80	do { EMIT4(b1, b2, b3, b4); EMIT(off, 4); } while (0)
81
82#define jmp_label(label, jmp_insn_len) (label - cnt - jmp_insn_len)
83
84static bool is_imm8(int value)
85{
86	return value <= 127 && value >= -128;
87}
88
89static bool is_simm32(s64 value)
90{
91	return value == (s64) (s32) value;
92}
93
94#define STACK_OFFSET(k)	(k)
95#define TCALL_CNT	(MAX_BPF_JIT_REG + 0)	/* Tail Call Count */
96
97#define IA32_EAX	(0x0)
98#define IA32_EBX	(0x3)
99#define IA32_ECX	(0x1)
100#define IA32_EDX	(0x2)
101#define IA32_ESI	(0x6)
102#define IA32_EDI	(0x7)
103#define IA32_EBP	(0x5)
104#define IA32_ESP	(0x4)
105
106/*
107 * List of x86 cond jumps opcodes (. + s8)
108 * Add 0x10 (and an extra 0x0f) to generate far jumps (. + s32)
109 */
110#define IA32_JB  0x72
111#define IA32_JAE 0x73
112#define IA32_JE  0x74
113#define IA32_JNE 0x75
114#define IA32_JBE 0x76
115#define IA32_JA  0x77
116#define IA32_JL  0x7C
117#define IA32_JGE 0x7D
118#define IA32_JLE 0x7E
119#define IA32_JG  0x7F
120
121#define COND_JMP_OPCODE_INVALID	(0xFF)
122
123/*
124 * Map eBPF registers to IA32 32bit registers or stack scratch space.
125 *
126 * 1. All the registers, R0-R10, are mapped to scratch space on stack.
127 * 2. We need two 64 bit temp registers to do complex operations on eBPF
128 *    registers.
129 * 3. For performance reason, the BPF_REG_AX for blinding constant, is
130 *    mapped to real hardware register pair, IA32_ESI and IA32_EDI.
131 *
132 * As the eBPF registers are all 64 bit registers and IA32 has only 32 bit
133 * registers, we have to map each eBPF registers with two IA32 32 bit regs
134 * or scratch memory space and we have to build eBPF 64 bit register from those.
135 *
136 * We use IA32_EAX, IA32_EDX, IA32_ECX, IA32_EBX as temporary registers.
137 */
138static const u8 bpf2ia32[][2] = {
139	/* Return value from in-kernel function, and exit value from eBPF */
140	[BPF_REG_0] = {STACK_OFFSET(0), STACK_OFFSET(4)},
141
142	/* The arguments from eBPF program to in-kernel function */
143	/* Stored on stack scratch space */
144	[BPF_REG_1] = {STACK_OFFSET(8), STACK_OFFSET(12)},
145	[BPF_REG_2] = {STACK_OFFSET(16), STACK_OFFSET(20)},
146	[BPF_REG_3] = {STACK_OFFSET(24), STACK_OFFSET(28)},
147	[BPF_REG_4] = {STACK_OFFSET(32), STACK_OFFSET(36)},
148	[BPF_REG_5] = {STACK_OFFSET(40), STACK_OFFSET(44)},
149
150	/* Callee saved registers that in-kernel function will preserve */
151	/* Stored on stack scratch space */
152	[BPF_REG_6] = {STACK_OFFSET(48), STACK_OFFSET(52)},
153	[BPF_REG_7] = {STACK_OFFSET(56), STACK_OFFSET(60)},
154	[BPF_REG_8] = {STACK_OFFSET(64), STACK_OFFSET(68)},
155	[BPF_REG_9] = {STACK_OFFSET(72), STACK_OFFSET(76)},
156
157	/* Read only Frame Pointer to access Stack */
158	[BPF_REG_FP] = {STACK_OFFSET(80), STACK_OFFSET(84)},
159
160	/* Temporary register for blinding constants. */
161	[BPF_REG_AX] = {IA32_ESI, IA32_EDI},
162
163	/* Tail call count. Stored on stack scratch space. */
164	[TCALL_CNT] = {STACK_OFFSET(88), STACK_OFFSET(92)},
165};
166
167#define dst_lo	dst[0]
168#define dst_hi	dst[1]
169#define src_lo	src[0]
170#define src_hi	src[1]
171
172#define STACK_ALIGNMENT	8
173/*
174 * Stack space for BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4,
175 * BPF_REG_5, BPF_REG_6, BPF_REG_7, BPF_REG_8, BPF_REG_9,
176 * BPF_REG_FP, BPF_REG_AX and Tail call counts.
177 */
178#define SCRATCH_SIZE 96
179
180/* Total stack size used in JITed code */
181#define _STACK_SIZE	(stack_depth + SCRATCH_SIZE)
182
183#define STACK_SIZE ALIGN(_STACK_SIZE, STACK_ALIGNMENT)
184
185/* Get the offset of eBPF REGISTERs stored on scratch space. */
186#define STACK_VAR(off) (off)
187
188/* Encode 'dst_reg' register into IA32 opcode 'byte' */
189static u8 add_1reg(u8 byte, u32 dst_reg)
190{
191	return byte + dst_reg;
192}
193
194/* Encode 'dst_reg' and 'src_reg' registers into IA32 opcode 'byte' */
195static u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
196{
197	return byte + dst_reg + (src_reg << 3);
198}
199
200static void jit_fill_hole(void *area, unsigned int size)
201{
202	/* Fill whole space with int3 instructions */
203	memset(area, 0xcc, size);
204}
205
206static inline void emit_ia32_mov_i(const u8 dst, const u32 val, bool dstk,
207				   u8 **pprog)
208{
209	u8 *prog = *pprog;
210	int cnt = 0;
211
212	if (dstk) {
213		if (val == 0) {
214			/* xor eax,eax */
215			EMIT2(0x33, add_2reg(0xC0, IA32_EAX, IA32_EAX));
216			/* mov dword ptr [ebp+off],eax */
217			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
218			      STACK_VAR(dst));
219		} else {
220			EMIT3_off32(0xC7, add_1reg(0x40, IA32_EBP),
221				    STACK_VAR(dst), val);
222		}
223	} else {
224		if (val == 0)
225			EMIT2(0x33, add_2reg(0xC0, dst, dst));
226		else
227			EMIT2_off32(0xC7, add_1reg(0xC0, dst),
228				    val);
229	}
230	*pprog = prog;
231}
232
233/* dst = imm (4 bytes)*/
234static inline void emit_ia32_mov_r(const u8 dst, const u8 src, bool dstk,
235				   bool sstk, u8 **pprog)
236{
237	u8 *prog = *pprog;
238	int cnt = 0;
239	u8 sreg = sstk ? IA32_EAX : src;
240
241	if (sstk)
242		/* mov eax,dword ptr [ebp+off] */
243		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
244	if (dstk)
245		/* mov dword ptr [ebp+off],eax */
246		EMIT3(0x89, add_2reg(0x40, IA32_EBP, sreg), STACK_VAR(dst));
247	else
248		/* mov dst,sreg */
249		EMIT2(0x89, add_2reg(0xC0, dst, sreg));
250
251	*pprog = prog;
252}
253
254/* dst = src */
255static inline void emit_ia32_mov_r64(const bool is64, const u8 dst[],
256				     const u8 src[], bool dstk,
257				     bool sstk, u8 **pprog,
258				     const struct bpf_prog_aux *aux)
259{
260	emit_ia32_mov_r(dst_lo, src_lo, dstk, sstk, pprog);
261	if (is64)
262		/* complete 8 byte move */
263		emit_ia32_mov_r(dst_hi, src_hi, dstk, sstk, pprog);
264	else if (!aux->verifier_zext)
265		/* zero out high 4 bytes */
266		emit_ia32_mov_i(dst_hi, 0, dstk, pprog);
267}
268
269/* Sign extended move */
270static inline void emit_ia32_mov_i64(const bool is64, const u8 dst[],
271				     const u32 val, bool dstk, u8 **pprog)
272{
273	u32 hi = 0;
274
275	if (is64 && (val & (1<<31)))
276		hi = (u32)~0;
277	emit_ia32_mov_i(dst_lo, val, dstk, pprog);
278	emit_ia32_mov_i(dst_hi, hi, dstk, pprog);
279}
280
281/*
282 * ALU operation (32 bit)
283 * dst = dst * src
284 */
285static inline void emit_ia32_mul_r(const u8 dst, const u8 src, bool dstk,
286				   bool sstk, u8 **pprog)
287{
288	u8 *prog = *pprog;
289	int cnt = 0;
290	u8 sreg = sstk ? IA32_ECX : src;
291
292	if (sstk)
293		/* mov ecx,dword ptr [ebp+off] */
294		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
295
296	if (dstk)
297		/* mov eax,dword ptr [ebp+off] */
298		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
299	else
300		/* mov eax,dst */
301		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
302
303
304	EMIT2(0xF7, add_1reg(0xE0, sreg));
305
306	if (dstk)
307		/* mov dword ptr [ebp+off],eax */
308		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
309		      STACK_VAR(dst));
310	else
311		/* mov dst,eax */
312		EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
313
314	*pprog = prog;
315}
316
317static inline void emit_ia32_to_le_r64(const u8 dst[], s32 val,
318					 bool dstk, u8 **pprog,
319					 const struct bpf_prog_aux *aux)
320{
321	u8 *prog = *pprog;
322	int cnt = 0;
323	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
324	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
325
326	if (dstk && val != 64) {
327		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
328		      STACK_VAR(dst_lo));
329		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
330		      STACK_VAR(dst_hi));
331	}
332	switch (val) {
333	case 16:
334		/*
335		 * Emit 'movzwl eax,ax' to zero extend 16-bit
336		 * into 64 bit
337		 */
338		EMIT2(0x0F, 0xB7);
339		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
340		if (!aux->verifier_zext)
341			/* xor dreg_hi,dreg_hi */
342			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
343		break;
344	case 32:
345		if (!aux->verifier_zext)
346			/* xor dreg_hi,dreg_hi */
347			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
348		break;
349	case 64:
350		/* nop */
351		break;
352	}
353
354	if (dstk && val != 64) {
355		/* mov dword ptr [ebp+off],dreg_lo */
356		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
357		      STACK_VAR(dst_lo));
358		/* mov dword ptr [ebp+off],dreg_hi */
359		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
360		      STACK_VAR(dst_hi));
361	}
362	*pprog = prog;
363}
364
365static inline void emit_ia32_to_be_r64(const u8 dst[], s32 val,
366				       bool dstk, u8 **pprog,
367				       const struct bpf_prog_aux *aux)
368{
369	u8 *prog = *pprog;
370	int cnt = 0;
371	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
372	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
373
374	if (dstk) {
375		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
376		      STACK_VAR(dst_lo));
377		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
378		      STACK_VAR(dst_hi));
379	}
380	switch (val) {
381	case 16:
382		/* Emit 'ror %ax, 8' to swap lower 2 bytes */
383		EMIT1(0x66);
384		EMIT3(0xC1, add_1reg(0xC8, dreg_lo), 8);
385
386		EMIT2(0x0F, 0xB7);
387		EMIT1(add_2reg(0xC0, dreg_lo, dreg_lo));
388
389		if (!aux->verifier_zext)
390			/* xor dreg_hi,dreg_hi */
391			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
392		break;
393	case 32:
394		/* Emit 'bswap eax' to swap lower 4 bytes */
395		EMIT1(0x0F);
396		EMIT1(add_1reg(0xC8, dreg_lo));
397
398		if (!aux->verifier_zext)
399			/* xor dreg_hi,dreg_hi */
400			EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
401		break;
402	case 64:
403		/* Emit 'bswap eax' to swap lower 4 bytes */
404		EMIT1(0x0F);
405		EMIT1(add_1reg(0xC8, dreg_lo));
406
407		/* Emit 'bswap edx' to swap lower 4 bytes */
408		EMIT1(0x0F);
409		EMIT1(add_1reg(0xC8, dreg_hi));
410
411		/* mov ecx,dreg_hi */
412		EMIT2(0x89, add_2reg(0xC0, IA32_ECX, dreg_hi));
413		/* mov dreg_hi,dreg_lo */
414		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
415		/* mov dreg_lo,ecx */
416		EMIT2(0x89, add_2reg(0xC0, dreg_lo, IA32_ECX));
417
418		break;
419	}
420	if (dstk) {
421		/* mov dword ptr [ebp+off],dreg_lo */
422		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
423		      STACK_VAR(dst_lo));
424		/* mov dword ptr [ebp+off],dreg_hi */
425		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
426		      STACK_VAR(dst_hi));
427	}
428	*pprog = prog;
429}
430
431/*
432 * ALU operation (32 bit)
433 * dst = dst (div|mod) src
434 */
435static inline void emit_ia32_div_mod_r(const u8 op, const u8 dst, const u8 src,
436				       bool dstk, bool sstk, u8 **pprog)
437{
438	u8 *prog = *pprog;
439	int cnt = 0;
440
441	if (sstk)
442		/* mov ecx,dword ptr [ebp+off] */
443		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
444		      STACK_VAR(src));
445	else if (src != IA32_ECX)
446		/* mov ecx,src */
447		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
448
449	if (dstk)
450		/* mov eax,dword ptr [ebp+off] */
451		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
452		      STACK_VAR(dst));
453	else
454		/* mov eax,dst */
455		EMIT2(0x8B, add_2reg(0xC0, dst, IA32_EAX));
456
457	/* xor edx,edx */
458	EMIT2(0x31, add_2reg(0xC0, IA32_EDX, IA32_EDX));
459	/* div ecx */
460	EMIT2(0xF7, add_1reg(0xF0, IA32_ECX));
461
462	if (op == BPF_MOD) {
463		if (dstk)
464			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
465			      STACK_VAR(dst));
466		else
467			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EDX));
468	} else {
469		if (dstk)
470			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
471			      STACK_VAR(dst));
472		else
473			EMIT2(0x89, add_2reg(0xC0, dst, IA32_EAX));
474	}
475	*pprog = prog;
476}
477
478/*
479 * ALU operation (32 bit)
480 * dst = dst (shift) src
481 */
482static inline void emit_ia32_shift_r(const u8 op, const u8 dst, const u8 src,
483				     bool dstk, bool sstk, u8 **pprog)
484{
485	u8 *prog = *pprog;
486	int cnt = 0;
487	u8 dreg = dstk ? IA32_EAX : dst;
488	u8 b2;
489
490	if (dstk)
491		/* mov eax,dword ptr [ebp+off] */
492		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
493
494	if (sstk)
495		/* mov ecx,dword ptr [ebp+off] */
496		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src));
497	else if (src != IA32_ECX)
498		/* mov ecx,src */
499		EMIT2(0x8B, add_2reg(0xC0, src, IA32_ECX));
500
501	switch (op) {
502	case BPF_LSH:
503		b2 = 0xE0; break;
504	case BPF_RSH:
505		b2 = 0xE8; break;
506	case BPF_ARSH:
507		b2 = 0xF8; break;
508	default:
509		return;
510	}
511	EMIT2(0xD3, add_1reg(b2, dreg));
512
513	if (dstk)
514		/* mov dword ptr [ebp+off],dreg */
515		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg), STACK_VAR(dst));
516	*pprog = prog;
517}
518
519/*
520 * ALU operation (32 bit)
521 * dst = dst (op) src
522 */
523static inline void emit_ia32_alu_r(const bool is64, const bool hi, const u8 op,
524				   const u8 dst, const u8 src, bool dstk,
525				   bool sstk, u8 **pprog)
526{
527	u8 *prog = *pprog;
528	int cnt = 0;
529	u8 sreg = sstk ? IA32_EAX : src;
530	u8 dreg = dstk ? IA32_EDX : dst;
531
532	if (sstk)
533		/* mov eax,dword ptr [ebp+off] */
534		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(src));
535
536	if (dstk)
537		/* mov eax,dword ptr [ebp+off] */
538		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(dst));
539
540	switch (BPF_OP(op)) {
541	/* dst = dst + src */
542	case BPF_ADD:
543		if (hi && is64)
544			EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
545		else
546			EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
547		break;
548	/* dst = dst - src */
549	case BPF_SUB:
550		if (hi && is64)
551			EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
552		else
553			EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
554		break;
555	/* dst = dst | src */
556	case BPF_OR:
557		EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
558		break;
559	/* dst = dst & src */
560	case BPF_AND:
561		EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
562		break;
563	/* dst = dst ^ src */
564	case BPF_XOR:
565		EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
566		break;
567	}
568
569	if (dstk)
570		/* mov dword ptr [ebp+off],dreg */
571		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
572		      STACK_VAR(dst));
573	*pprog = prog;
574}
575
576/* ALU operation (64 bit) */
577static inline void emit_ia32_alu_r64(const bool is64, const u8 op,
578				     const u8 dst[], const u8 src[],
579				     bool dstk,  bool sstk,
580				     u8 **pprog, const struct bpf_prog_aux *aux)
581{
582	u8 *prog = *pprog;
583
584	emit_ia32_alu_r(is64, false, op, dst_lo, src_lo, dstk, sstk, &prog);
585	if (is64)
586		emit_ia32_alu_r(is64, true, op, dst_hi, src_hi, dstk, sstk,
587				&prog);
588	else if (!aux->verifier_zext)
589		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
590	*pprog = prog;
591}
592
593/*
594 * ALU operation (32 bit)
595 * dst = dst (op) val
596 */
597static inline void emit_ia32_alu_i(const bool is64, const bool hi, const u8 op,
598				   const u8 dst, const s32 val, bool dstk,
599				   u8 **pprog)
600{
601	u8 *prog = *pprog;
602	int cnt = 0;
603	u8 dreg = dstk ? IA32_EAX : dst;
604	u8 sreg = IA32_EDX;
605
606	if (dstk)
607		/* mov eax,dword ptr [ebp+off] */
608		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(dst));
609
610	if (!is_imm8(val))
611		/* mov edx,imm32*/
612		EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EDX), val);
613
614	switch (op) {
615	/* dst = dst + val */
616	case BPF_ADD:
617		if (hi && is64) {
618			if (is_imm8(val))
619				EMIT3(0x83, add_1reg(0xD0, dreg), val);
620			else
621				EMIT2(0x11, add_2reg(0xC0, dreg, sreg));
622		} else {
623			if (is_imm8(val))
624				EMIT3(0x83, add_1reg(0xC0, dreg), val);
625			else
626				EMIT2(0x01, add_2reg(0xC0, dreg, sreg));
627		}
628		break;
629	/* dst = dst - val */
630	case BPF_SUB:
631		if (hi && is64) {
632			if (is_imm8(val))
633				EMIT3(0x83, add_1reg(0xD8, dreg), val);
634			else
635				EMIT2(0x19, add_2reg(0xC0, dreg, sreg));
636		} else {
637			if (is_imm8(val))
638				EMIT3(0x83, add_1reg(0xE8, dreg), val);
639			else
640				EMIT2(0x29, add_2reg(0xC0, dreg, sreg));
641		}
642		break;
643	/* dst = dst | val */
644	case BPF_OR:
645		if (is_imm8(val))
646			EMIT3(0x83, add_1reg(0xC8, dreg), val);
647		else
648			EMIT2(0x09, add_2reg(0xC0, dreg, sreg));
649		break;
650	/* dst = dst & val */
651	case BPF_AND:
652		if (is_imm8(val))
653			EMIT3(0x83, add_1reg(0xE0, dreg), val);
654		else
655			EMIT2(0x21, add_2reg(0xC0, dreg, sreg));
656		break;
657	/* dst = dst ^ val */
658	case BPF_XOR:
659		if (is_imm8(val))
660			EMIT3(0x83, add_1reg(0xF0, dreg), val);
661		else
662			EMIT2(0x31, add_2reg(0xC0, dreg, sreg));
663		break;
664	case BPF_NEG:
665		EMIT2(0xF7, add_1reg(0xD8, dreg));
666		break;
667	}
668
669	if (dstk)
670		/* mov dword ptr [ebp+off],dreg */
671		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg),
672		      STACK_VAR(dst));
673	*pprog = prog;
674}
675
676/* ALU operation (64 bit) */
677static inline void emit_ia32_alu_i64(const bool is64, const u8 op,
678				     const u8 dst[], const u32 val,
679				     bool dstk, u8 **pprog,
680				     const struct bpf_prog_aux *aux)
681{
682	u8 *prog = *pprog;
683	u32 hi = 0;
684
685	if (is64 && (val & (1<<31)))
686		hi = (u32)~0;
687
688	emit_ia32_alu_i(is64, false, op, dst_lo, val, dstk, &prog);
689	if (is64)
690		emit_ia32_alu_i(is64, true, op, dst_hi, hi, dstk, &prog);
691	else if (!aux->verifier_zext)
692		emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
693
694	*pprog = prog;
695}
696
697/* dst = ~dst (64 bit) */
698static inline void emit_ia32_neg64(const u8 dst[], bool dstk, u8 **pprog)
699{
700	u8 *prog = *pprog;
701	int cnt = 0;
702	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
703	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
704
705	if (dstk) {
706		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
707		      STACK_VAR(dst_lo));
708		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
709		      STACK_VAR(dst_hi));
710	}
711
712	/* neg dreg_lo */
713	EMIT2(0xF7, add_1reg(0xD8, dreg_lo));
714	/* adc dreg_hi,0x0 */
715	EMIT3(0x83, add_1reg(0xD0, dreg_hi), 0x00);
716	/* neg dreg_hi */
717	EMIT2(0xF7, add_1reg(0xD8, dreg_hi));
718
719	if (dstk) {
720		/* mov dword ptr [ebp+off],dreg_lo */
721		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
722		      STACK_VAR(dst_lo));
723		/* mov dword ptr [ebp+off],dreg_hi */
724		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
725		      STACK_VAR(dst_hi));
726	}
727	*pprog = prog;
728}
729
730/* dst = dst << src */
731static inline void emit_ia32_lsh_r64(const u8 dst[], const u8 src[],
732				     bool dstk, bool sstk, u8 **pprog)
733{
734	u8 *prog = *pprog;
735	int cnt = 0;
736	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
737	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
738
739	if (dstk) {
740		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
741		      STACK_VAR(dst_lo));
742		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
743		      STACK_VAR(dst_hi));
744	}
745
746	if (sstk)
747		/* mov ecx,dword ptr [ebp+off] */
748		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
749		      STACK_VAR(src_lo));
750	else
751		/* mov ecx,src_lo */
752		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
753
754	/* shld dreg_hi,dreg_lo,cl */
755	EMIT3(0x0F, 0xA5, add_2reg(0xC0, dreg_hi, dreg_lo));
756	/* shl dreg_lo,cl */
757	EMIT2(0xD3, add_1reg(0xE0, dreg_lo));
758
759	/* if ecx >= 32, mov dreg_lo into dreg_hi and clear dreg_lo */
760
761	/* cmp ecx,32 */
762	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
763	/* skip the next two instructions (4 bytes) when < 32 */
764	EMIT2(IA32_JB, 4);
765
766	/* mov dreg_hi,dreg_lo */
767	EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
768	/* xor dreg_lo,dreg_lo */
769	EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
770
771	if (dstk) {
772		/* mov dword ptr [ebp+off],dreg_lo */
773		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
774		      STACK_VAR(dst_lo));
775		/* mov dword ptr [ebp+off],dreg_hi */
776		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
777		      STACK_VAR(dst_hi));
778	}
779	/* out: */
780	*pprog = prog;
781}
782
783/* dst = dst >> src (signed)*/
784static inline void emit_ia32_arsh_r64(const u8 dst[], const u8 src[],
785				      bool dstk, bool sstk, u8 **pprog)
786{
787	u8 *prog = *pprog;
788	int cnt = 0;
789	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
790	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
791
792	if (dstk) {
793		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
794		      STACK_VAR(dst_lo));
795		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
796		      STACK_VAR(dst_hi));
797	}
798
799	if (sstk)
800		/* mov ecx,dword ptr [ebp+off] */
801		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
802		      STACK_VAR(src_lo));
803	else
804		/* mov ecx,src_lo */
805		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
806
807	/* shrd dreg_lo,dreg_hi,cl */
808	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
809	/* sar dreg_hi,cl */
810	EMIT2(0xD3, add_1reg(0xF8, dreg_hi));
811
812	/* if ecx >= 32, mov dreg_hi to dreg_lo and set/clear dreg_hi depending on sign */
813
814	/* cmp ecx,32 */
815	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
816	/* skip the next two instructions (5 bytes) when < 32 */
817	EMIT2(IA32_JB, 5);
818
819	/* mov dreg_lo,dreg_hi */
820	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
821	/* sar dreg_hi,31 */
822	EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
823
824	if (dstk) {
825		/* mov dword ptr [ebp+off],dreg_lo */
826		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
827		      STACK_VAR(dst_lo));
828		/* mov dword ptr [ebp+off],dreg_hi */
829		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
830		      STACK_VAR(dst_hi));
831	}
832	/* out: */
833	*pprog = prog;
834}
835
836/* dst = dst >> src */
837static inline void emit_ia32_rsh_r64(const u8 dst[], const u8 src[], bool dstk,
838				     bool sstk, u8 **pprog)
839{
840	u8 *prog = *pprog;
841	int cnt = 0;
842	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
843	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
844
845	if (dstk) {
846		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
847		      STACK_VAR(dst_lo));
848		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
849		      STACK_VAR(dst_hi));
850	}
851
852	if (sstk)
853		/* mov ecx,dword ptr [ebp+off] */
854		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
855		      STACK_VAR(src_lo));
856	else
857		/* mov ecx,src_lo */
858		EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_ECX));
859
860	/* shrd dreg_lo,dreg_hi,cl */
861	EMIT3(0x0F, 0xAD, add_2reg(0xC0, dreg_lo, dreg_hi));
862	/* shr dreg_hi,cl */
863	EMIT2(0xD3, add_1reg(0xE8, dreg_hi));
864
865	/* if ecx >= 32, mov dreg_hi to dreg_lo and clear dreg_hi */
866
867	/* cmp ecx,32 */
868	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), 32);
869	/* skip the next two instructions (4 bytes) when < 32 */
870	EMIT2(IA32_JB, 4);
871
872	/* mov dreg_lo,dreg_hi */
873	EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
874	/* xor dreg_hi,dreg_hi */
875	EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
876
877	if (dstk) {
878		/* mov dword ptr [ebp+off],dreg_lo */
879		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
880		      STACK_VAR(dst_lo));
881		/* mov dword ptr [ebp+off],dreg_hi */
882		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
883		      STACK_VAR(dst_hi));
884	}
885	/* out: */
886	*pprog = prog;
887}
888
889/* dst = dst << val */
890static inline void emit_ia32_lsh_i64(const u8 dst[], const u32 val,
891				     bool dstk, u8 **pprog)
892{
893	u8 *prog = *pprog;
894	int cnt = 0;
895	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
896	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
897
898	if (dstk) {
899		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
900		      STACK_VAR(dst_lo));
901		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
902		      STACK_VAR(dst_hi));
903	}
904	/* Do LSH operation */
905	if (val < 32) {
906		/* shld dreg_hi,dreg_lo,imm8 */
907		EMIT4(0x0F, 0xA4, add_2reg(0xC0, dreg_hi, dreg_lo), val);
908		/* shl dreg_lo,imm8 */
909		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), val);
910	} else if (val >= 32 && val < 64) {
911		u32 value = val - 32;
912
913		/* shl dreg_lo,imm8 */
914		EMIT3(0xC1, add_1reg(0xE0, dreg_lo), value);
915		/* mov dreg_hi,dreg_lo */
916		EMIT2(0x89, add_2reg(0xC0, dreg_hi, dreg_lo));
917		/* xor dreg_lo,dreg_lo */
918		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
919	} else {
920		/* xor dreg_lo,dreg_lo */
921		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
922		/* xor dreg_hi,dreg_hi */
923		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
924	}
925
926	if (dstk) {
927		/* mov dword ptr [ebp+off],dreg_lo */
928		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
929		      STACK_VAR(dst_lo));
930		/* mov dword ptr [ebp+off],dreg_hi */
931		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
932		      STACK_VAR(dst_hi));
933	}
934	*pprog = prog;
935}
936
937/* dst = dst >> val */
938static inline void emit_ia32_rsh_i64(const u8 dst[], const u32 val,
939				     bool dstk, u8 **pprog)
940{
941	u8 *prog = *pprog;
942	int cnt = 0;
943	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
944	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
945
946	if (dstk) {
947		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
948		      STACK_VAR(dst_lo));
949		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
950		      STACK_VAR(dst_hi));
951	}
952
953	/* Do RSH operation */
954	if (val < 32) {
955		/* shrd dreg_lo,dreg_hi,imm8 */
956		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
957		/* shr dreg_hi,imm8 */
958		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), val);
959	} else if (val >= 32 && val < 64) {
960		u32 value = val - 32;
961
962		/* shr dreg_hi,imm8 */
963		EMIT3(0xC1, add_1reg(0xE8, dreg_hi), value);
964		/* mov dreg_lo,dreg_hi */
965		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
966		/* xor dreg_hi,dreg_hi */
967		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
968	} else {
969		/* xor dreg_lo,dreg_lo */
970		EMIT2(0x33, add_2reg(0xC0, dreg_lo, dreg_lo));
971		/* xor dreg_hi,dreg_hi */
972		EMIT2(0x33, add_2reg(0xC0, dreg_hi, dreg_hi));
973	}
974
975	if (dstk) {
976		/* mov dword ptr [ebp+off],dreg_lo */
977		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
978		      STACK_VAR(dst_lo));
979		/* mov dword ptr [ebp+off],dreg_hi */
980		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
981		      STACK_VAR(dst_hi));
982	}
983	*pprog = prog;
984}
985
986/* dst = dst >> val (signed) */
987static inline void emit_ia32_arsh_i64(const u8 dst[], const u32 val,
988				      bool dstk, u8 **pprog)
989{
990	u8 *prog = *pprog;
991	int cnt = 0;
992	u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
993	u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
994
995	if (dstk) {
996		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
997		      STACK_VAR(dst_lo));
998		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
999		      STACK_VAR(dst_hi));
1000	}
1001	/* Do RSH operation */
1002	if (val < 32) {
1003		/* shrd dreg_lo,dreg_hi,imm8 */
1004		EMIT4(0x0F, 0xAC, add_2reg(0xC0, dreg_lo, dreg_hi), val);
1005		/* ashr dreg_hi,imm8 */
1006		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), val);
1007	} else if (val >= 32 && val < 64) {
1008		u32 value = val - 32;
1009
1010		/* ashr dreg_hi,imm8 */
1011		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), value);
1012		/* mov dreg_lo,dreg_hi */
1013		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1014
1015		/* ashr dreg_hi,imm8 */
1016		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1017	} else {
1018		/* ashr dreg_hi,imm8 */
1019		EMIT3(0xC1, add_1reg(0xF8, dreg_hi), 31);
1020		/* mov dreg_lo,dreg_hi */
1021		EMIT2(0x89, add_2reg(0xC0, dreg_lo, dreg_hi));
1022	}
1023
1024	if (dstk) {
1025		/* mov dword ptr [ebp+off],dreg_lo */
1026		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_lo),
1027		      STACK_VAR(dst_lo));
1028		/* mov dword ptr [ebp+off],dreg_hi */
1029		EMIT3(0x89, add_2reg(0x40, IA32_EBP, dreg_hi),
1030		      STACK_VAR(dst_hi));
1031	}
1032	*pprog = prog;
1033}
1034
1035static inline void emit_ia32_mul_r64(const u8 dst[], const u8 src[], bool dstk,
1036				     bool sstk, u8 **pprog)
1037{
1038	u8 *prog = *pprog;
1039	int cnt = 0;
1040
1041	if (dstk)
1042		/* mov eax,dword ptr [ebp+off] */
1043		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1044		      STACK_VAR(dst_hi));
1045	else
1046		/* mov eax,dst_hi */
1047		EMIT2(0x8B, add_2reg(0xC0, dst_hi, IA32_EAX));
1048
1049	if (sstk)
1050		/* mul dword ptr [ebp+off] */
1051		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1052	else
1053		/* mul src_lo */
1054		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1055
1056	/* mov ecx,eax */
1057	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1058
1059	if (dstk)
1060		/* mov eax,dword ptr [ebp+off] */
1061		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1062		      STACK_VAR(dst_lo));
1063	else
1064		/* mov eax,dst_lo */
1065		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1066
1067	if (sstk)
1068		/* mul dword ptr [ebp+off] */
1069		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_hi));
1070	else
1071		/* mul src_hi */
1072		EMIT2(0xF7, add_1reg(0xE0, src_hi));
1073
1074	/* add eax,eax */
1075	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1076
1077	if (dstk)
1078		/* mov eax,dword ptr [ebp+off] */
1079		EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1080		      STACK_VAR(dst_lo));
1081	else
1082		/* mov eax,dst_lo */
1083		EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1084
1085	if (sstk)
1086		/* mul dword ptr [ebp+off] */
1087		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(src_lo));
1088	else
1089		/* mul src_lo */
1090		EMIT2(0xF7, add_1reg(0xE0, src_lo));
1091
1092	/* add ecx,edx */
1093	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1094
1095	if (dstk) {
1096		/* mov dword ptr [ebp+off],eax */
1097		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1098		      STACK_VAR(dst_lo));
1099		/* mov dword ptr [ebp+off],ecx */
1100		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1101		      STACK_VAR(dst_hi));
1102	} else {
1103		/* mov dst_lo,eax */
1104		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1105		/* mov dst_hi,ecx */
1106		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1107	}
1108
1109	*pprog = prog;
1110}
1111
1112static inline void emit_ia32_mul_i64(const u8 dst[], const u32 val,
1113				     bool dstk, u8 **pprog)
1114{
1115	u8 *prog = *pprog;
1116	int cnt = 0;
1117	u32 hi;
1118
1119	hi = val & (1<<31) ? (u32)~0 : 0;
1120	/* movl eax,imm32 */
1121	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1122	if (dstk)
1123		/* mul dword ptr [ebp+off] */
1124		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_hi));
1125	else
1126		/* mul dst_hi */
1127		EMIT2(0xF7, add_1reg(0xE0, dst_hi));
1128
1129	/* mov ecx,eax */
1130	EMIT2(0x89, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1131
1132	/* movl eax,imm32 */
1133	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), hi);
1134	if (dstk)
1135		/* mul dword ptr [ebp+off] */
1136		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1137	else
1138		/* mul dst_lo */
1139		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1140	/* add ecx,eax */
1141	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EAX));
1142
1143	/* movl eax,imm32 */
1144	EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EAX), val);
1145	if (dstk)
1146		/* mul dword ptr [ebp+off] */
1147		EMIT3(0xF7, add_1reg(0x60, IA32_EBP), STACK_VAR(dst_lo));
1148	else
1149		/* mul dst_lo */
1150		EMIT2(0xF7, add_1reg(0xE0, dst_lo));
1151
1152	/* add ecx,edx */
1153	EMIT2(0x01, add_2reg(0xC0, IA32_ECX, IA32_EDX));
1154
1155	if (dstk) {
1156		/* mov dword ptr [ebp+off],eax */
1157		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1158		      STACK_VAR(dst_lo));
1159		/* mov dword ptr [ebp+off],ecx */
1160		EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX),
1161		      STACK_VAR(dst_hi));
1162	} else {
1163		/* mov dword ptr [ebp+off],eax */
1164		EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EAX));
1165		/* mov dword ptr [ebp+off],ecx */
1166		EMIT2(0x89, add_2reg(0xC0, dst_hi, IA32_ECX));
1167	}
1168
1169	*pprog = prog;
1170}
1171
1172static int bpf_size_to_x86_bytes(int bpf_size)
1173{
1174	if (bpf_size == BPF_W)
1175		return 4;
1176	else if (bpf_size == BPF_H)
1177		return 2;
1178	else if (bpf_size == BPF_B)
1179		return 1;
1180	else if (bpf_size == BPF_DW)
1181		return 4; /* imm32 */
1182	else
1183		return 0;
1184}
1185
1186struct jit_context {
1187	int cleanup_addr; /* Epilogue code offset */
1188};
1189
1190/* Maximum number of bytes emitted while JITing one eBPF insn */
1191#define BPF_MAX_INSN_SIZE	128
1192#define BPF_INSN_SAFETY		64
1193
1194#define PROLOGUE_SIZE 35
1195
1196/*
1197 * Emit prologue code for BPF program and check it's size.
1198 * bpf_tail_call helper will skip it while jumping into another program.
1199 */
1200static void emit_prologue(u8 **pprog, u32 stack_depth)
1201{
1202	u8 *prog = *pprog;
1203	int cnt = 0;
1204	const u8 *r1 = bpf2ia32[BPF_REG_1];
1205	const u8 fplo = bpf2ia32[BPF_REG_FP][0];
1206	const u8 fphi = bpf2ia32[BPF_REG_FP][1];
1207	const u8 *tcc = bpf2ia32[TCALL_CNT];
1208
1209	/* push ebp */
1210	EMIT1(0x55);
1211	/* mov ebp,esp */
1212	EMIT2(0x89, 0xE5);
1213	/* push edi */
1214	EMIT1(0x57);
1215	/* push esi */
1216	EMIT1(0x56);
1217	/* push ebx */
1218	EMIT1(0x53);
1219
1220	/* sub esp,STACK_SIZE */
1221	EMIT2_off32(0x81, 0xEC, STACK_SIZE);
1222	/* sub ebp,SCRATCH_SIZE+12*/
1223	EMIT3(0x83, add_1reg(0xE8, IA32_EBP), SCRATCH_SIZE + 12);
1224	/* xor ebx,ebx */
1225	EMIT2(0x31, add_2reg(0xC0, IA32_EBX, IA32_EBX));
1226
1227	/* Set up BPF prog stack base register */
1228	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBP), STACK_VAR(fplo));
1229	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(fphi));
1230
1231	/* Move BPF_CTX (EAX) to BPF_REG_R1 */
1232	/* mov dword ptr [ebp+off],eax */
1233	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1234	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(r1[1]));
1235
1236	/* Initialize Tail Count */
1237	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[0]));
1238	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1239
1240	BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
1241	*pprog = prog;
1242}
1243
1244/* Emit epilogue code for BPF program */
1245static void emit_epilogue(u8 **pprog, u32 stack_depth)
1246{
1247	u8 *prog = *pprog;
1248	const u8 *r0 = bpf2ia32[BPF_REG_0];
1249	int cnt = 0;
1250
1251	/* mov eax,dword ptr [ebp+off]*/
1252	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r0[0]));
1253	/* mov edx,dword ptr [ebp+off]*/
1254	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r0[1]));
1255
1256	/* add ebp,SCRATCH_SIZE+12*/
1257	EMIT3(0x83, add_1reg(0xC0, IA32_EBP), SCRATCH_SIZE + 12);
1258
1259	/* mov ebx,dword ptr [ebp-12]*/
1260	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), -12);
1261	/* mov esi,dword ptr [ebp-8]*/
1262	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ESI), -8);
1263	/* mov edi,dword ptr [ebp-4]*/
1264	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDI), -4);
1265
1266	EMIT1(0xC9); /* leave */
1267	EMIT1(0xC3); /* ret */
1268	*pprog = prog;
1269}
1270
1271static int emit_jmp_edx(u8 **pprog, u8 *ip)
1272{
1273	u8 *prog = *pprog;
1274	int cnt = 0;
1275
1276#ifdef CONFIG_RETPOLINE
1277	EMIT1_off32(0xE9, (u8 *)__x86_indirect_thunk_edx - (ip + 5));
1278#else
1279	EMIT2(0xFF, 0xE2);
1280#endif
1281	*pprog = prog;
1282
1283	return cnt;
1284}
1285
1286/*
1287 * Generate the following code:
1288 * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
1289 *   if (index >= array->map.max_entries)
1290 *     goto out;
1291 *   if (++tail_call_cnt > MAX_TAIL_CALL_CNT)
1292 *     goto out;
1293 *   prog = array->ptrs[index];
1294 *   if (prog == NULL)
1295 *     goto out;
1296 *   goto *(prog->bpf_func + prologue_size);
1297 * out:
1298 */
1299static void emit_bpf_tail_call(u8 **pprog, u8 *ip)
1300{
1301	u8 *prog = *pprog;
1302	int cnt = 0;
1303	const u8 *r1 = bpf2ia32[BPF_REG_1];
1304	const u8 *r2 = bpf2ia32[BPF_REG_2];
1305	const u8 *r3 = bpf2ia32[BPF_REG_3];
1306	const u8 *tcc = bpf2ia32[TCALL_CNT];
1307	u32 lo, hi;
1308	static int jmp_label1 = -1;
1309
1310	/*
1311	 * if (index >= array->map.max_entries)
1312	 *     goto out;
1313	 */
1314	/* mov eax,dword ptr [ebp+off] */
1315	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r2[0]));
1316	/* mov edx,dword ptr [ebp+off] */
1317	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX), STACK_VAR(r3[0]));
1318
1319	/* cmp dword ptr [eax+off],edx */
1320	EMIT3(0x39, add_2reg(0x40, IA32_EAX, IA32_EDX),
1321	      offsetof(struct bpf_array, map.max_entries));
1322	/* jbe out */
1323	EMIT2(IA32_JBE, jmp_label(jmp_label1, 2));
1324
1325	/*
1326	 * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
1327	 *     goto out;
1328	 */
1329	lo = (u32)MAX_TAIL_CALL_CNT;
1330	hi = (u32)((u64)MAX_TAIL_CALL_CNT >> 32);
1331	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1332	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1333
1334	/* cmp edx,hi */
1335	EMIT3(0x83, add_1reg(0xF8, IA32_EBX), hi);
1336	EMIT2(IA32_JNE, 3);
1337	/* cmp ecx,lo */
1338	EMIT3(0x83, add_1reg(0xF8, IA32_ECX), lo);
1339
1340	/* ja out */
1341	EMIT2(IA32_JAE, jmp_label(jmp_label1, 2));
1342
1343	/* add eax,0x1 */
1344	EMIT3(0x83, add_1reg(0xC0, IA32_ECX), 0x01);
1345	/* adc ebx,0x0 */
1346	EMIT3(0x83, add_1reg(0xD0, IA32_EBX), 0x00);
1347
1348	/* mov dword ptr [ebp+off],eax */
1349	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(tcc[0]));
1350	/* mov dword ptr [ebp+off],edx */
1351	EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EBX), STACK_VAR(tcc[1]));
1352
1353	/* prog = array->ptrs[index]; */
1354	/* mov edx, [eax + edx * 4 + offsetof(...)] */
1355	EMIT3_off32(0x8B, 0x94, 0x90, offsetof(struct bpf_array, ptrs));
1356
1357	/*
1358	 * if (prog == NULL)
1359	 *     goto out;
1360	 */
1361	/* test edx,edx */
1362	EMIT2(0x85, add_2reg(0xC0, IA32_EDX, IA32_EDX));
1363	/* je out */
1364	EMIT2(IA32_JE, jmp_label(jmp_label1, 2));
1365
1366	/* goto *(prog->bpf_func + prologue_size); */
1367	/* mov edx, dword ptr [edx + 32] */
1368	EMIT3(0x8B, add_2reg(0x40, IA32_EDX, IA32_EDX),
1369	      offsetof(struct bpf_prog, bpf_func));
1370	/* add edx,prologue_size */
1371	EMIT3(0x83, add_1reg(0xC0, IA32_EDX), PROLOGUE_SIZE);
1372
1373	/* mov eax,dword ptr [ebp+off] */
1374	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX), STACK_VAR(r1[0]));
1375
1376	/*
1377	 * Now we're ready to jump into next BPF program:
1378	 * eax == ctx (1st arg)
1379	 * edx == prog->bpf_func + prologue_size
1380	 */
1381	cnt += emit_jmp_edx(&prog, ip + cnt);
1382
1383	if (jmp_label1 == -1)
1384		jmp_label1 = cnt;
1385
1386	/* out: */
1387	*pprog = prog;
1388}
1389
1390/* Push the scratch stack register on top of the stack. */
1391static inline void emit_push_r64(const u8 src[], u8 **pprog)
1392{
1393	u8 *prog = *pprog;
1394	int cnt = 0;
1395
1396	/* mov ecx,dword ptr [ebp+off] */
1397	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_hi));
1398	/* push ecx */
1399	EMIT1(0x51);
1400
1401	/* mov ecx,dword ptr [ebp+off] */
1402	EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX), STACK_VAR(src_lo));
1403	/* push ecx */
1404	EMIT1(0x51);
1405
1406	*pprog = prog;
1407}
1408
1409static u8 get_cond_jmp_opcode(const u8 op, bool is_cmp_lo)
1410{
1411	u8 jmp_cond;
1412
1413	/* Convert BPF opcode to x86 */
1414	switch (op) {
1415	case BPF_JEQ:
1416		jmp_cond = IA32_JE;
1417		break;
1418	case BPF_JSET:
1419	case BPF_JNE:
1420		jmp_cond = IA32_JNE;
1421		break;
1422	case BPF_JGT:
1423		/* GT is unsigned '>', JA in x86 */
1424		jmp_cond = IA32_JA;
1425		break;
1426	case BPF_JLT:
1427		/* LT is unsigned '<', JB in x86 */
1428		jmp_cond = IA32_JB;
1429		break;
1430	case BPF_JGE:
1431		/* GE is unsigned '>=', JAE in x86 */
1432		jmp_cond = IA32_JAE;
1433		break;
1434	case BPF_JLE:
1435		/* LE is unsigned '<=', JBE in x86 */
1436		jmp_cond = IA32_JBE;
1437		break;
1438	case BPF_JSGT:
1439		if (!is_cmp_lo)
1440			/* Signed '>', GT in x86 */
1441			jmp_cond = IA32_JG;
1442		else
1443			/* GT is unsigned '>', JA in x86 */
1444			jmp_cond = IA32_JA;
1445		break;
1446	case BPF_JSLT:
1447		if (!is_cmp_lo)
1448			/* Signed '<', LT in x86 */
1449			jmp_cond = IA32_JL;
1450		else
1451			/* LT is unsigned '<', JB in x86 */
1452			jmp_cond = IA32_JB;
1453		break;
1454	case BPF_JSGE:
1455		if (!is_cmp_lo)
1456			/* Signed '>=', GE in x86 */
1457			jmp_cond = IA32_JGE;
1458		else
1459			/* GE is unsigned '>=', JAE in x86 */
1460			jmp_cond = IA32_JAE;
1461		break;
1462	case BPF_JSLE:
1463		if (!is_cmp_lo)
1464			/* Signed '<=', LE in x86 */
1465			jmp_cond = IA32_JLE;
1466		else
1467			/* LE is unsigned '<=', JBE in x86 */
1468			jmp_cond = IA32_JBE;
1469		break;
1470	default: /* to silence GCC warning */
1471		jmp_cond = COND_JMP_OPCODE_INVALID;
1472		break;
1473	}
1474
1475	return jmp_cond;
1476}
1477
1478static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
1479		  int oldproglen, struct jit_context *ctx)
1480{
1481	struct bpf_insn *insn = bpf_prog->insnsi;
1482	int insn_cnt = bpf_prog->len;
1483	bool seen_exit = false;
1484	u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
1485	int i, cnt = 0;
1486	int proglen = 0;
1487	u8 *prog = temp;
1488
1489	emit_prologue(&prog, bpf_prog->aux->stack_depth);
1490
1491	for (i = 0; i < insn_cnt; i++, insn++) {
1492		const s32 imm32 = insn->imm;
1493		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
1494		const bool dstk = insn->dst_reg != BPF_REG_AX;
1495		const bool sstk = insn->src_reg != BPF_REG_AX;
1496		const u8 code = insn->code;
1497		const u8 *dst = bpf2ia32[insn->dst_reg];
1498		const u8 *src = bpf2ia32[insn->src_reg];
1499		const u8 *r0 = bpf2ia32[BPF_REG_0];
1500		s64 jmp_offset;
1501		u8 jmp_cond;
1502		int ilen;
1503		u8 *func;
1504
1505		switch (code) {
1506		/* ALU operations */
1507		/* dst = src */
1508		case BPF_ALU | BPF_MOV | BPF_K:
1509		case BPF_ALU | BPF_MOV | BPF_X:
1510		case BPF_ALU64 | BPF_MOV | BPF_K:
1511		case BPF_ALU64 | BPF_MOV | BPF_X:
1512			switch (BPF_SRC(code)) {
1513			case BPF_X:
1514				if (imm32 == 1) {
1515					/* Special mov32 for zext. */
1516					emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1517					break;
1518				}
1519				emit_ia32_mov_r64(is64, dst, src, dstk, sstk,
1520						  &prog, bpf_prog->aux);
1521				break;
1522			case BPF_K:
1523				/* Sign-extend immediate value to dst reg */
1524				emit_ia32_mov_i64(is64, dst, imm32,
1525						  dstk, &prog);
1526				break;
1527			}
1528			break;
1529		/* dst = dst + src/imm */
1530		/* dst = dst - src/imm */
1531		/* dst = dst | src/imm */
1532		/* dst = dst & src/imm */
1533		/* dst = dst ^ src/imm */
1534		/* dst = dst * src/imm */
1535		/* dst = dst << src */
1536		/* dst = dst >> src */
1537		case BPF_ALU | BPF_ADD | BPF_K:
1538		case BPF_ALU | BPF_ADD | BPF_X:
1539		case BPF_ALU | BPF_SUB | BPF_K:
1540		case BPF_ALU | BPF_SUB | BPF_X:
1541		case BPF_ALU | BPF_OR | BPF_K:
1542		case BPF_ALU | BPF_OR | BPF_X:
1543		case BPF_ALU | BPF_AND | BPF_K:
1544		case BPF_ALU | BPF_AND | BPF_X:
1545		case BPF_ALU | BPF_XOR | BPF_K:
1546		case BPF_ALU | BPF_XOR | BPF_X:
1547		case BPF_ALU64 | BPF_ADD | BPF_K:
1548		case BPF_ALU64 | BPF_ADD | BPF_X:
1549		case BPF_ALU64 | BPF_SUB | BPF_K:
1550		case BPF_ALU64 | BPF_SUB | BPF_X:
1551		case BPF_ALU64 | BPF_OR | BPF_K:
1552		case BPF_ALU64 | BPF_OR | BPF_X:
1553		case BPF_ALU64 | BPF_AND | BPF_K:
1554		case BPF_ALU64 | BPF_AND | BPF_X:
1555		case BPF_ALU64 | BPF_XOR | BPF_K:
1556		case BPF_ALU64 | BPF_XOR | BPF_X:
1557			switch (BPF_SRC(code)) {
1558			case BPF_X:
1559				emit_ia32_alu_r64(is64, BPF_OP(code), dst,
1560						  src, dstk, sstk, &prog,
1561						  bpf_prog->aux);
1562				break;
1563			case BPF_K:
1564				emit_ia32_alu_i64(is64, BPF_OP(code), dst,
1565						  imm32, dstk, &prog,
1566						  bpf_prog->aux);
1567				break;
1568			}
1569			break;
1570		case BPF_ALU | BPF_MUL | BPF_K:
1571		case BPF_ALU | BPF_MUL | BPF_X:
1572			switch (BPF_SRC(code)) {
1573			case BPF_X:
1574				emit_ia32_mul_r(dst_lo, src_lo, dstk,
1575						sstk, &prog);
1576				break;
1577			case BPF_K:
1578				/* mov ecx,imm32*/
1579				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1580					    imm32);
1581				emit_ia32_mul_r(dst_lo, IA32_ECX, dstk,
1582						false, &prog);
1583				break;
1584			}
1585			if (!bpf_prog->aux->verifier_zext)
1586				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1587			break;
1588		case BPF_ALU | BPF_LSH | BPF_X:
1589		case BPF_ALU | BPF_RSH | BPF_X:
1590		case BPF_ALU | BPF_ARSH | BPF_K:
1591		case BPF_ALU | BPF_ARSH | BPF_X:
1592			switch (BPF_SRC(code)) {
1593			case BPF_X:
1594				emit_ia32_shift_r(BPF_OP(code), dst_lo, src_lo,
1595						  dstk, sstk, &prog);
1596				break;
1597			case BPF_K:
1598				/* mov ecx,imm32*/
1599				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1600					    imm32);
1601				emit_ia32_shift_r(BPF_OP(code), dst_lo,
1602						  IA32_ECX, dstk, false,
1603						  &prog);
1604				break;
1605			}
1606			if (!bpf_prog->aux->verifier_zext)
1607				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1608			break;
1609		/* dst = dst / src(imm) */
1610		/* dst = dst % src(imm) */
1611		case BPF_ALU | BPF_DIV | BPF_K:
1612		case BPF_ALU | BPF_DIV | BPF_X:
1613		case BPF_ALU | BPF_MOD | BPF_K:
1614		case BPF_ALU | BPF_MOD | BPF_X:
1615			switch (BPF_SRC(code)) {
1616			case BPF_X:
1617				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1618						    src_lo, dstk, sstk, &prog);
1619				break;
1620			case BPF_K:
1621				/* mov ecx,imm32*/
1622				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX),
1623					    imm32);
1624				emit_ia32_div_mod_r(BPF_OP(code), dst_lo,
1625						    IA32_ECX, dstk, false,
1626						    &prog);
1627				break;
1628			}
1629			if (!bpf_prog->aux->verifier_zext)
1630				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1631			break;
1632		case BPF_ALU64 | BPF_DIV | BPF_K:
1633		case BPF_ALU64 | BPF_DIV | BPF_X:
1634		case BPF_ALU64 | BPF_MOD | BPF_K:
1635		case BPF_ALU64 | BPF_MOD | BPF_X:
1636			goto notyet;
1637		/* dst = dst >> imm */
1638		/* dst = dst << imm */
1639		case BPF_ALU | BPF_RSH | BPF_K:
1640		case BPF_ALU | BPF_LSH | BPF_K:
1641			if (unlikely(imm32 > 31))
1642				return -EINVAL;
1643			/* mov ecx,imm32*/
1644			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
1645			emit_ia32_shift_r(BPF_OP(code), dst_lo, IA32_ECX, dstk,
1646					  false, &prog);
1647			if (!bpf_prog->aux->verifier_zext)
1648				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1649			break;
1650		/* dst = dst << imm */
1651		case BPF_ALU64 | BPF_LSH | BPF_K:
1652			if (unlikely(imm32 > 63))
1653				return -EINVAL;
1654			emit_ia32_lsh_i64(dst, imm32, dstk, &prog);
1655			break;
1656		/* dst = dst >> imm */
1657		case BPF_ALU64 | BPF_RSH | BPF_K:
1658			if (unlikely(imm32 > 63))
1659				return -EINVAL;
1660			emit_ia32_rsh_i64(dst, imm32, dstk, &prog);
1661			break;
1662		/* dst = dst << src */
1663		case BPF_ALU64 | BPF_LSH | BPF_X:
1664			emit_ia32_lsh_r64(dst, src, dstk, sstk, &prog);
1665			break;
1666		/* dst = dst >> src */
1667		case BPF_ALU64 | BPF_RSH | BPF_X:
1668			emit_ia32_rsh_r64(dst, src, dstk, sstk, &prog);
1669			break;
1670		/* dst = dst >> src (signed) */
1671		case BPF_ALU64 | BPF_ARSH | BPF_X:
1672			emit_ia32_arsh_r64(dst, src, dstk, sstk, &prog);
1673			break;
1674		/* dst = dst >> imm (signed) */
1675		case BPF_ALU64 | BPF_ARSH | BPF_K:
1676			if (unlikely(imm32 > 63))
1677				return -EINVAL;
1678			emit_ia32_arsh_i64(dst, imm32, dstk, &prog);
1679			break;
1680		/* dst = ~dst */
1681		case BPF_ALU | BPF_NEG:
1682			emit_ia32_alu_i(is64, false, BPF_OP(code),
1683					dst_lo, 0, dstk, &prog);
1684			if (!bpf_prog->aux->verifier_zext)
1685				emit_ia32_mov_i(dst_hi, 0, dstk, &prog);
1686			break;
1687		/* dst = ~dst (64 bit) */
1688		case BPF_ALU64 | BPF_NEG:
1689			emit_ia32_neg64(dst, dstk, &prog);
1690			break;
1691		/* dst = dst * src/imm */
1692		case BPF_ALU64 | BPF_MUL | BPF_X:
1693		case BPF_ALU64 | BPF_MUL | BPF_K:
1694			switch (BPF_SRC(code)) {
1695			case BPF_X:
1696				emit_ia32_mul_r64(dst, src, dstk, sstk, &prog);
1697				break;
1698			case BPF_K:
1699				emit_ia32_mul_i64(dst, imm32, dstk, &prog);
1700				break;
1701			}
1702			break;
1703		/* dst = htole(dst) */
1704		case BPF_ALU | BPF_END | BPF_FROM_LE:
1705			emit_ia32_to_le_r64(dst, imm32, dstk, &prog,
1706					    bpf_prog->aux);
1707			break;
1708		/* dst = htobe(dst) */
1709		case BPF_ALU | BPF_END | BPF_FROM_BE:
1710			emit_ia32_to_be_r64(dst, imm32, dstk, &prog,
1711					    bpf_prog->aux);
1712			break;
1713		/* dst = imm64 */
1714		case BPF_LD | BPF_IMM | BPF_DW: {
1715			s32 hi, lo = imm32;
1716
1717			hi = insn[1].imm;
1718			emit_ia32_mov_i(dst_lo, lo, dstk, &prog);
1719			emit_ia32_mov_i(dst_hi, hi, dstk, &prog);
1720			insn++;
1721			i++;
1722			break;
1723		}
1724		/* speculation barrier */
1725		case BPF_ST | BPF_NOSPEC:
1726			if (boot_cpu_has(X86_FEATURE_XMM2))
1727				/* Emit 'lfence' */
1728				EMIT3(0x0F, 0xAE, 0xE8);
1729			break;
1730		/* ST: *(u8*)(dst_reg + off) = imm */
1731		case BPF_ST | BPF_MEM | BPF_H:
1732		case BPF_ST | BPF_MEM | BPF_B:
1733		case BPF_ST | BPF_MEM | BPF_W:
1734		case BPF_ST | BPF_MEM | BPF_DW:
1735			if (dstk)
1736				/* mov eax,dword ptr [ebp+off] */
1737				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1738				      STACK_VAR(dst_lo));
1739			else
1740				/* mov eax,dst_lo */
1741				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1742
1743			switch (BPF_SIZE(code)) {
1744			case BPF_B:
1745				EMIT(0xC6, 1); break;
1746			case BPF_H:
1747				EMIT2(0x66, 0xC7); break;
1748			case BPF_W:
1749			case BPF_DW:
1750				EMIT(0xC7, 1); break;
1751			}
1752
1753			if (is_imm8(insn->off))
1754				EMIT2(add_1reg(0x40, IA32_EAX), insn->off);
1755			else
1756				EMIT1_off32(add_1reg(0x80, IA32_EAX),
1757					    insn->off);
1758			EMIT(imm32, bpf_size_to_x86_bytes(BPF_SIZE(code)));
1759
1760			if (BPF_SIZE(code) == BPF_DW) {
1761				u32 hi;
1762
1763				hi = imm32 & (1<<31) ? (u32)~0 : 0;
1764				EMIT2_off32(0xC7, add_1reg(0x80, IA32_EAX),
1765					    insn->off + 4);
1766				EMIT(hi, 4);
1767			}
1768			break;
1769
1770		/* STX: *(u8*)(dst_reg + off) = src_reg */
1771		case BPF_STX | BPF_MEM | BPF_B:
1772		case BPF_STX | BPF_MEM | BPF_H:
1773		case BPF_STX | BPF_MEM | BPF_W:
1774		case BPF_STX | BPF_MEM | BPF_DW:
1775			if (dstk)
1776				/* mov eax,dword ptr [ebp+off] */
1777				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1778				      STACK_VAR(dst_lo));
1779			else
1780				/* mov eax,dst_lo */
1781				EMIT2(0x8B, add_2reg(0xC0, dst_lo, IA32_EAX));
1782
1783			if (sstk)
1784				/* mov edx,dword ptr [ebp+off] */
1785				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1786				      STACK_VAR(src_lo));
1787			else
1788				/* mov edx,src_lo */
1789				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EDX));
1790
1791			switch (BPF_SIZE(code)) {
1792			case BPF_B:
1793				EMIT(0x88, 1); break;
1794			case BPF_H:
1795				EMIT2(0x66, 0x89); break;
1796			case BPF_W:
1797			case BPF_DW:
1798				EMIT(0x89, 1); break;
1799			}
1800
1801			if (is_imm8(insn->off))
1802				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1803				      insn->off);
1804			else
1805				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1806					    insn->off);
1807
1808			if (BPF_SIZE(code) == BPF_DW) {
1809				if (sstk)
1810					/* mov edi,dword ptr [ebp+off] */
1811					EMIT3(0x8B, add_2reg(0x40, IA32_EBP,
1812							     IA32_EDX),
1813					      STACK_VAR(src_hi));
1814				else
1815					/* mov edi,src_hi */
1816					EMIT2(0x8B, add_2reg(0xC0, src_hi,
1817							     IA32_EDX));
1818				EMIT1(0x89);
1819				if (is_imm8(insn->off + 4)) {
1820					EMIT2(add_2reg(0x40, IA32_EAX,
1821						       IA32_EDX),
1822					      insn->off + 4);
1823				} else {
1824					EMIT1(add_2reg(0x80, IA32_EAX,
1825						       IA32_EDX));
1826					EMIT(insn->off + 4, 4);
1827				}
1828			}
1829			break;
1830
1831		/* LDX: dst_reg = *(u8*)(src_reg + off) */
1832		case BPF_LDX | BPF_MEM | BPF_B:
1833		case BPF_LDX | BPF_MEM | BPF_H:
1834		case BPF_LDX | BPF_MEM | BPF_W:
1835		case BPF_LDX | BPF_MEM | BPF_DW:
1836			if (sstk)
1837				/* mov eax,dword ptr [ebp+off] */
1838				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1839				      STACK_VAR(src_lo));
1840			else
1841				/* mov eax,dword ptr [ebp+off] */
1842				EMIT2(0x8B, add_2reg(0xC0, src_lo, IA32_EAX));
1843
1844			switch (BPF_SIZE(code)) {
1845			case BPF_B:
1846				EMIT2(0x0F, 0xB6); break;
1847			case BPF_H:
1848				EMIT2(0x0F, 0xB7); break;
1849			case BPF_W:
1850			case BPF_DW:
1851				EMIT(0x8B, 1); break;
1852			}
1853
1854			if (is_imm8(insn->off))
1855				EMIT2(add_2reg(0x40, IA32_EAX, IA32_EDX),
1856				      insn->off);
1857			else
1858				EMIT1_off32(add_2reg(0x80, IA32_EAX, IA32_EDX),
1859					    insn->off);
1860
1861			if (dstk)
1862				/* mov dword ptr [ebp+off],edx */
1863				EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1864				      STACK_VAR(dst_lo));
1865			else
1866				/* mov dst_lo,edx */
1867				EMIT2(0x89, add_2reg(0xC0, dst_lo, IA32_EDX));
1868			switch (BPF_SIZE(code)) {
1869			case BPF_B:
1870			case BPF_H:
1871			case BPF_W:
1872				if (bpf_prog->aux->verifier_zext)
1873					break;
1874				if (dstk) {
1875					EMIT3(0xC7, add_1reg(0x40, IA32_EBP),
1876					      STACK_VAR(dst_hi));
1877					EMIT(0x0, 4);
1878				} else {
1879					/* xor dst_hi,dst_hi */
1880					EMIT2(0x33,
1881					      add_2reg(0xC0, dst_hi, dst_hi));
1882				}
1883				break;
1884			case BPF_DW:
1885				EMIT2_off32(0x8B,
1886					    add_2reg(0x80, IA32_EAX, IA32_EDX),
1887					    insn->off + 4);
1888				if (dstk)
1889					EMIT3(0x89,
1890					      add_2reg(0x40, IA32_EBP,
1891						       IA32_EDX),
1892					      STACK_VAR(dst_hi));
1893				else
1894					EMIT2(0x89,
1895					      add_2reg(0xC0, dst_hi, IA32_EDX));
1896				break;
1897			default:
1898				break;
1899			}
1900			break;
1901		/* call */
1902		case BPF_JMP | BPF_CALL:
1903		{
1904			const u8 *r1 = bpf2ia32[BPF_REG_1];
1905			const u8 *r2 = bpf2ia32[BPF_REG_2];
1906			const u8 *r3 = bpf2ia32[BPF_REG_3];
1907			const u8 *r4 = bpf2ia32[BPF_REG_4];
1908			const u8 *r5 = bpf2ia32[BPF_REG_5];
1909
1910			if (insn->src_reg == BPF_PSEUDO_CALL)
1911				goto notyet;
1912
1913			func = (u8 *) __bpf_call_base + imm32;
1914			jmp_offset = func - (image + addrs[i]);
1915
1916			if (!imm32 || !is_simm32(jmp_offset)) {
1917				pr_err("unsupported BPF func %d addr %p image %p\n",
1918				       imm32, func, image);
1919				return -EINVAL;
1920			}
1921
1922			/* mov eax,dword ptr [ebp+off] */
1923			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1924			      STACK_VAR(r1[0]));
1925			/* mov edx,dword ptr [ebp+off] */
1926			EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EDX),
1927			      STACK_VAR(r1[1]));
1928
1929			emit_push_r64(r5, &prog);
1930			emit_push_r64(r4, &prog);
1931			emit_push_r64(r3, &prog);
1932			emit_push_r64(r2, &prog);
1933
1934			EMIT1_off32(0xE8, jmp_offset + 9);
1935
1936			/* mov dword ptr [ebp+off],eax */
1937			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EAX),
1938			      STACK_VAR(r0[0]));
1939			/* mov dword ptr [ebp+off],edx */
1940			EMIT3(0x89, add_2reg(0x40, IA32_EBP, IA32_EDX),
1941			      STACK_VAR(r0[1]));
1942
1943			/* add esp,32 */
1944			EMIT3(0x83, add_1reg(0xC0, IA32_ESP), 32);
1945			break;
1946		}
1947		case BPF_JMP | BPF_TAIL_CALL:
1948			emit_bpf_tail_call(&prog, image + addrs[i - 1]);
1949			break;
1950
1951		/* cond jump */
1952		case BPF_JMP | BPF_JEQ | BPF_X:
1953		case BPF_JMP | BPF_JNE | BPF_X:
1954		case BPF_JMP | BPF_JGT | BPF_X:
1955		case BPF_JMP | BPF_JLT | BPF_X:
1956		case BPF_JMP | BPF_JGE | BPF_X:
1957		case BPF_JMP | BPF_JLE | BPF_X:
1958		case BPF_JMP32 | BPF_JEQ | BPF_X:
1959		case BPF_JMP32 | BPF_JNE | BPF_X:
1960		case BPF_JMP32 | BPF_JGT | BPF_X:
1961		case BPF_JMP32 | BPF_JLT | BPF_X:
1962		case BPF_JMP32 | BPF_JGE | BPF_X:
1963		case BPF_JMP32 | BPF_JLE | BPF_X:
1964		case BPF_JMP32 | BPF_JSGT | BPF_X:
1965		case BPF_JMP32 | BPF_JSLE | BPF_X:
1966		case BPF_JMP32 | BPF_JSLT | BPF_X:
1967		case BPF_JMP32 | BPF_JSGE | BPF_X: {
1968			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
1969			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
1970			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
1971			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
1972			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
1973
1974			if (dstk) {
1975				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
1976				      STACK_VAR(dst_lo));
1977				if (is_jmp64)
1978					EMIT3(0x8B,
1979					      add_2reg(0x40, IA32_EBP,
1980						       IA32_EDX),
1981					      STACK_VAR(dst_hi));
1982			}
1983
1984			if (sstk) {
1985				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
1986				      STACK_VAR(src_lo));
1987				if (is_jmp64)
1988					EMIT3(0x8B,
1989					      add_2reg(0x40, IA32_EBP,
1990						       IA32_EBX),
1991					      STACK_VAR(src_hi));
1992			}
1993
1994			if (is_jmp64) {
1995				/* cmp dreg_hi,sreg_hi */
1996				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
1997				EMIT2(IA32_JNE, 2);
1998			}
1999			/* cmp dreg_lo,sreg_lo */
2000			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2001			goto emit_cond_jmp;
2002		}
2003		case BPF_JMP | BPF_JSGT | BPF_X:
2004		case BPF_JMP | BPF_JSLE | BPF_X:
2005		case BPF_JMP | BPF_JSLT | BPF_X:
2006		case BPF_JMP | BPF_JSGE | BPF_X: {
2007			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2008			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2009			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2010			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2011
2012			if (dstk) {
2013				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2014				      STACK_VAR(dst_lo));
2015				EMIT3(0x8B,
2016				      add_2reg(0x40, IA32_EBP,
2017					       IA32_EDX),
2018				      STACK_VAR(dst_hi));
2019			}
2020
2021			if (sstk) {
2022				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2023				      STACK_VAR(src_lo));
2024				EMIT3(0x8B,
2025				      add_2reg(0x40, IA32_EBP,
2026					       IA32_EBX),
2027				      STACK_VAR(src_hi));
2028			}
2029
2030			/* cmp dreg_hi,sreg_hi */
2031			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2032			EMIT2(IA32_JNE, 10);
2033			/* cmp dreg_lo,sreg_lo */
2034			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2035			goto emit_cond_jmp_signed;
2036		}
2037		case BPF_JMP | BPF_JSET | BPF_X:
2038		case BPF_JMP32 | BPF_JSET | BPF_X: {
2039			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2040			u8 dreg_lo = IA32_EAX;
2041			u8 dreg_hi = IA32_EDX;
2042			u8 sreg_lo = sstk ? IA32_ECX : src_lo;
2043			u8 sreg_hi = sstk ? IA32_EBX : src_hi;
2044
2045			if (dstk) {
2046				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2047				      STACK_VAR(dst_lo));
2048				if (is_jmp64)
2049					EMIT3(0x8B,
2050					      add_2reg(0x40, IA32_EBP,
2051						       IA32_EDX),
2052					      STACK_VAR(dst_hi));
2053			} else {
2054				/* mov dreg_lo,dst_lo */
2055				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2056				if (is_jmp64)
2057					/* mov dreg_hi,dst_hi */
2058					EMIT2(0x89,
2059					      add_2reg(0xC0, dreg_hi, dst_hi));
2060			}
2061
2062			if (sstk) {
2063				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_ECX),
2064				      STACK_VAR(src_lo));
2065				if (is_jmp64)
2066					EMIT3(0x8B,
2067					      add_2reg(0x40, IA32_EBP,
2068						       IA32_EBX),
2069					      STACK_VAR(src_hi));
2070			}
2071			/* and dreg_lo,sreg_lo */
2072			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2073			if (is_jmp64) {
2074				/* and dreg_hi,sreg_hi */
2075				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2076				/* or dreg_lo,dreg_hi */
2077				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2078			}
2079			goto emit_cond_jmp;
2080		}
2081		case BPF_JMP | BPF_JSET | BPF_K:
2082		case BPF_JMP32 | BPF_JSET | BPF_K: {
2083			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2084			u8 dreg_lo = IA32_EAX;
2085			u8 dreg_hi = IA32_EDX;
2086			u8 sreg_lo = IA32_ECX;
2087			u8 sreg_hi = IA32_EBX;
2088			u32 hi;
2089
2090			if (dstk) {
2091				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2092				      STACK_VAR(dst_lo));
2093				if (is_jmp64)
2094					EMIT3(0x8B,
2095					      add_2reg(0x40, IA32_EBP,
2096						       IA32_EDX),
2097					      STACK_VAR(dst_hi));
2098			} else {
2099				/* mov dreg_lo,dst_lo */
2100				EMIT2(0x89, add_2reg(0xC0, dreg_lo, dst_lo));
2101				if (is_jmp64)
2102					/* mov dreg_hi,dst_hi */
2103					EMIT2(0x89,
2104					      add_2reg(0xC0, dreg_hi, dst_hi));
2105			}
2106
2107			/* mov ecx,imm32 */
2108			EMIT2_off32(0xC7, add_1reg(0xC0, sreg_lo), imm32);
2109
2110			/* and dreg_lo,sreg_lo */
2111			EMIT2(0x23, add_2reg(0xC0, sreg_lo, dreg_lo));
2112			if (is_jmp64) {
2113				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2114				/* mov ebx,imm32 */
2115				EMIT2_off32(0xC7, add_1reg(0xC0, sreg_hi), hi);
2116				/* and dreg_hi,sreg_hi */
2117				EMIT2(0x23, add_2reg(0xC0, sreg_hi, dreg_hi));
2118				/* or dreg_lo,dreg_hi */
2119				EMIT2(0x09, add_2reg(0xC0, dreg_lo, dreg_hi));
2120			}
2121			goto emit_cond_jmp;
2122		}
2123		case BPF_JMP | BPF_JEQ | BPF_K:
2124		case BPF_JMP | BPF_JNE | BPF_K:
2125		case BPF_JMP | BPF_JGT | BPF_K:
2126		case BPF_JMP | BPF_JLT | BPF_K:
2127		case BPF_JMP | BPF_JGE | BPF_K:
2128		case BPF_JMP | BPF_JLE | BPF_K:
2129		case BPF_JMP32 | BPF_JEQ | BPF_K:
2130		case BPF_JMP32 | BPF_JNE | BPF_K:
2131		case BPF_JMP32 | BPF_JGT | BPF_K:
2132		case BPF_JMP32 | BPF_JLT | BPF_K:
2133		case BPF_JMP32 | BPF_JGE | BPF_K:
2134		case BPF_JMP32 | BPF_JLE | BPF_K:
2135		case BPF_JMP32 | BPF_JSGT | BPF_K:
2136		case BPF_JMP32 | BPF_JSLE | BPF_K:
2137		case BPF_JMP32 | BPF_JSLT | BPF_K:
2138		case BPF_JMP32 | BPF_JSGE | BPF_K: {
2139			bool is_jmp64 = BPF_CLASS(insn->code) == BPF_JMP;
2140			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2141			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2142			u8 sreg_lo = IA32_ECX;
2143			u8 sreg_hi = IA32_EBX;
2144			u32 hi;
2145
2146			if (dstk) {
2147				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2148				      STACK_VAR(dst_lo));
2149				if (is_jmp64)
2150					EMIT3(0x8B,
2151					      add_2reg(0x40, IA32_EBP,
2152						       IA32_EDX),
2153					      STACK_VAR(dst_hi));
2154			}
2155
2156			/* mov ecx,imm32 */
2157			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2158			if (is_jmp64) {
2159				hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2160				/* mov ebx,imm32 */
2161				EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2162				/* cmp dreg_hi,sreg_hi */
2163				EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2164				EMIT2(IA32_JNE, 2);
2165			}
2166			/* cmp dreg_lo,sreg_lo */
2167			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2168
2169emit_cond_jmp:		jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2170			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2171				return -EFAULT;
2172			jmp_offset = addrs[i + insn->off] - addrs[i];
2173			if (is_imm8(jmp_offset)) {
2174				EMIT2(jmp_cond, jmp_offset);
2175			} else if (is_simm32(jmp_offset)) {
2176				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2177			} else {
2178				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2179				return -EFAULT;
2180			}
2181			break;
2182		}
2183		case BPF_JMP | BPF_JSGT | BPF_K:
2184		case BPF_JMP | BPF_JSLE | BPF_K:
2185		case BPF_JMP | BPF_JSLT | BPF_K:
2186		case BPF_JMP | BPF_JSGE | BPF_K: {
2187			u8 dreg_lo = dstk ? IA32_EAX : dst_lo;
2188			u8 dreg_hi = dstk ? IA32_EDX : dst_hi;
2189			u8 sreg_lo = IA32_ECX;
2190			u8 sreg_hi = IA32_EBX;
2191			u32 hi;
2192
2193			if (dstk) {
2194				EMIT3(0x8B, add_2reg(0x40, IA32_EBP, IA32_EAX),
2195				      STACK_VAR(dst_lo));
2196				EMIT3(0x8B,
2197				      add_2reg(0x40, IA32_EBP,
2198					       IA32_EDX),
2199				      STACK_VAR(dst_hi));
2200			}
2201
2202			/* mov ecx,imm32 */
2203			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_ECX), imm32);
2204			hi = imm32 & (1 << 31) ? (u32)~0 : 0;
2205			/* mov ebx,imm32 */
2206			EMIT2_off32(0xC7, add_1reg(0xC0, IA32_EBX), hi);
2207			/* cmp dreg_hi,sreg_hi */
2208			EMIT2(0x39, add_2reg(0xC0, dreg_hi, sreg_hi));
2209			EMIT2(IA32_JNE, 10);
2210			/* cmp dreg_lo,sreg_lo */
2211			EMIT2(0x39, add_2reg(0xC0, dreg_lo, sreg_lo));
2212
2213			/*
2214			 * For simplicity of branch offset computation,
2215			 * let's use fixed jump coding here.
2216			 */
2217emit_cond_jmp_signed:	/* Check the condition for low 32-bit comparison */
2218			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), true);
2219			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2220				return -EFAULT;
2221			jmp_offset = addrs[i + insn->off] - addrs[i] + 8;
2222			if (is_simm32(jmp_offset)) {
2223				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2224			} else {
2225				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2226				return -EFAULT;
2227			}
2228			EMIT2(0xEB, 6);
2229
2230			/* Check the condition for high 32-bit comparison */
2231			jmp_cond = get_cond_jmp_opcode(BPF_OP(code), false);
2232			if (jmp_cond == COND_JMP_OPCODE_INVALID)
2233				return -EFAULT;
2234			jmp_offset = addrs[i + insn->off] - addrs[i];
2235			if (is_simm32(jmp_offset)) {
2236				EMIT2_off32(0x0F, jmp_cond + 0x10, jmp_offset);
2237			} else {
2238				pr_err("cond_jmp gen bug %llx\n", jmp_offset);
2239				return -EFAULT;
2240			}
2241			break;
2242		}
2243		case BPF_JMP | BPF_JA:
2244			if (insn->off == -1)
2245				/* -1 jmp instructions will always jump
2246				 * backwards two bytes. Explicitly handling
2247				 * this case avoids wasting too many passes
2248				 * when there are long sequences of replaced
2249				 * dead code.
2250				 */
2251				jmp_offset = -2;
2252			else
2253				jmp_offset = addrs[i + insn->off] - addrs[i];
2254
2255			if (!jmp_offset)
2256				/* Optimize out nop jumps */
2257				break;
2258emit_jmp:
2259			if (is_imm8(jmp_offset)) {
2260				EMIT2(0xEB, jmp_offset);
2261			} else if (is_simm32(jmp_offset)) {
2262				EMIT1_off32(0xE9, jmp_offset);
2263			} else {
2264				pr_err("jmp gen bug %llx\n", jmp_offset);
2265				return -EFAULT;
2266			}
2267			break;
2268		/* STX XADD: lock *(u32 *)(dst + off) += src */
2269		case BPF_STX | BPF_XADD | BPF_W:
2270		/* STX XADD: lock *(u64 *)(dst + off) += src */
2271		case BPF_STX | BPF_XADD | BPF_DW:
2272			goto notyet;
2273		case BPF_JMP | BPF_EXIT:
2274			if (seen_exit) {
2275				jmp_offset = ctx->cleanup_addr - addrs[i];
2276				goto emit_jmp;
2277			}
2278			seen_exit = true;
2279			/* Update cleanup_addr */
2280			ctx->cleanup_addr = proglen;
2281			emit_epilogue(&prog, bpf_prog->aux->stack_depth);
2282			break;
2283notyet:
2284			pr_info_once("*** NOT YET: opcode %02x ***\n", code);
2285			return -EFAULT;
2286		default:
2287			/*
2288			 * This error will be seen if new instruction was added
2289			 * to interpreter, but not to JIT or if there is junk in
2290			 * bpf_prog
2291			 */
2292			pr_err("bpf_jit: unknown opcode %02x\n", code);
2293			return -EINVAL;
2294		}
2295
2296		ilen = prog - temp;
2297		if (ilen > BPF_MAX_INSN_SIZE) {
2298			pr_err("bpf_jit: fatal insn size error\n");
2299			return -EFAULT;
2300		}
2301
2302		if (image) {
2303			/*
2304			 * When populating the image, assert that:
2305			 *
2306			 *  i) We do not write beyond the allocated space, and
2307			 * ii) addrs[i] did not change from the prior run, in order
2308			 *     to validate assumptions made for computing branch
2309			 *     displacements.
2310			 */
2311			if (unlikely(proglen + ilen > oldproglen ||
2312				     proglen + ilen != addrs[i])) {
2313				pr_err("bpf_jit: fatal error\n");
2314				return -EFAULT;
2315			}
2316			memcpy(image + proglen, temp, ilen);
2317		}
2318		proglen += ilen;
2319		addrs[i] = proglen;
2320		prog = temp;
2321	}
2322	return proglen;
2323}
2324
2325bool bpf_jit_needs_zext(void)
2326{
2327	return true;
2328}
2329
2330struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
2331{
2332	struct bpf_binary_header *header = NULL;
2333	struct bpf_prog *tmp, *orig_prog = prog;
2334	int proglen, oldproglen = 0;
2335	struct jit_context ctx = {};
2336	bool tmp_blinded = false;
2337	u8 *image = NULL;
2338	int *addrs;
2339	int pass;
2340	int i;
2341
2342	if (!prog->jit_requested)
2343		return orig_prog;
2344
2345	tmp = bpf_jit_blind_constants(prog);
2346	/*
2347	 * If blinding was requested and we failed during blinding,
2348	 * we must fall back to the interpreter.
2349	 */
2350	if (IS_ERR(tmp))
2351		return orig_prog;
2352	if (tmp != prog) {
2353		tmp_blinded = true;
2354		prog = tmp;
2355	}
2356
2357	addrs = kmalloc_array(prog->len, sizeof(*addrs), GFP_KERNEL);
2358	if (!addrs) {
2359		prog = orig_prog;
2360		goto out;
2361	}
2362
2363	/*
2364	 * Before first pass, make a rough estimation of addrs[]
2365	 * each BPF instruction is translated to less than 64 bytes
2366	 */
2367	for (proglen = 0, i = 0; i < prog->len; i++) {
2368		proglen += 64;
2369		addrs[i] = proglen;
2370	}
2371	ctx.cleanup_addr = proglen;
2372
2373	/*
2374	 * JITed image shrinks with every pass and the loop iterates
2375	 * until the image stops shrinking. Very large BPF programs
2376	 * may converge on the last pass. In such case do one more
2377	 * pass to emit the final image.
2378	 */
2379	for (pass = 0; pass < 20 || image; pass++) {
2380		proglen = do_jit(prog, addrs, image, oldproglen, &ctx);
2381		if (proglen <= 0) {
2382out_image:
2383			image = NULL;
2384			if (header)
2385				bpf_jit_binary_free(header);
2386			prog = orig_prog;
2387			goto out_addrs;
2388		}
2389		if (image) {
2390			if (proglen != oldproglen) {
2391				pr_err("bpf_jit: proglen=%d != oldproglen=%d\n",
2392				       proglen, oldproglen);
2393				goto out_image;
2394			}
2395			break;
2396		}
2397		if (proglen == oldproglen) {
2398			header = bpf_jit_binary_alloc(proglen, &image,
2399						      1, jit_fill_hole);
2400			if (!header) {
2401				prog = orig_prog;
2402				goto out_addrs;
2403			}
2404		}
2405		oldproglen = proglen;
2406		cond_resched();
2407	}
2408
2409	if (bpf_jit_enable > 1)
2410		bpf_jit_dump(prog->len, proglen, pass + 1, image);
2411
2412	if (image) {
2413		bpf_jit_binary_lock_ro(header);
2414		prog->bpf_func = (void *)image;
2415		prog->jited = 1;
2416		prog->jited_len = proglen;
2417	} else {
2418		prog = orig_prog;
2419	}
2420
2421out_addrs:
2422	kfree(addrs);
2423out:
2424	if (tmp_blinded)
2425		bpf_jit_prog_release_other(prog, prog == orig_prog ?
2426					   tmp : orig_prog);
2427	return prog;
2428}
2429