1// SPDX-License-Identifier: GPL-2.0
2/*
3 * BPF Jit compiler for s390.
4 *
5 * Minimum build requirements:
6 *
7 *  - HAVE_MARCH_Z196_FEATURES: laal, laalg
8 *  - HAVE_MARCH_Z10_FEATURES: msfi, cgrj, clgrj
9 *  - HAVE_MARCH_Z9_109_FEATURES: alfi, llilf, clfi, oilf, nilf
10 *  - 64BIT
11 *
12 * Copyright IBM Corp. 2012,2015
13 *
14 * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
15 *	      Michael Holzheu <holzheu@linux.vnet.ibm.com>
16 */
17
18#define KMSG_COMPONENT "bpf_jit"
19#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
20
21#include <linux/netdevice.h>
22#include <linux/filter.h>
23#include <linux/init.h>
24#include <linux/bpf.h>
25#include <linux/mm.h>
26#include <linux/kernel.h>
27#include <asm/cacheflush.h>
28#include <asm/extable.h>
29#include <asm/dis.h>
30#include <asm/facility.h>
31#include <asm/nospec-branch.h>
32#include <asm/set_memory.h>
33#include <asm/text-patching.h>
34#include "bpf_jit.h"
35
36struct bpf_jit {
37	u32 seen;		/* Flags to remember seen eBPF instructions */
38	u32 seen_reg[16];	/* Array to remember which registers are used */
39	u32 *addrs;		/* Array with relative instruction addresses */
40	u8 *prg_buf;		/* Start of program */
41	int size;		/* Size of program and literal pool */
42	int size_prg;		/* Size of program */
43	int prg;		/* Current position in program */
44	int lit32_start;	/* Start of 32-bit literal pool */
45	int lit32;		/* Current position in 32-bit literal pool */
46	int lit64_start;	/* Start of 64-bit literal pool */
47	int lit64;		/* Current position in 64-bit literal pool */
48	int base_ip;		/* Base address for literal pool */
49	int exit_ip;		/* Address of exit */
50	int r1_thunk_ip;	/* Address of expoline thunk for 'br %r1' */
51	int r14_thunk_ip;	/* Address of expoline thunk for 'br %r14' */
52	int tail_call_start;	/* Tail call start offset */
53	int excnt;		/* Number of exception table entries */
54	int prologue_plt_ret;	/* Return address for prologue hotpatch PLT */
55	int prologue_plt;	/* Start of prologue hotpatch PLT */
56};
57
58#define SEEN_MEM	BIT(0)		/* use mem[] for temporary storage */
59#define SEEN_LITERAL	BIT(1)		/* code uses literals */
60#define SEEN_FUNC	BIT(2)		/* calls C functions */
61#define SEEN_STACK	(SEEN_FUNC | SEEN_MEM)
62
63/*
64 * s390 registers
65 */
66#define REG_W0		(MAX_BPF_JIT_REG + 0)	/* Work register 1 (even) */
67#define REG_W1		(MAX_BPF_JIT_REG + 1)	/* Work register 2 (odd) */
68#define REG_L		(MAX_BPF_JIT_REG + 2)	/* Literal pool register */
69#define REG_15		(MAX_BPF_JIT_REG + 3)	/* Register 15 */
70#define REG_0		REG_W0			/* Register 0 */
71#define REG_1		REG_W1			/* Register 1 */
72#define REG_2		BPF_REG_1		/* Register 2 */
73#define REG_3		BPF_REG_2		/* Register 3 */
74#define REG_4		BPF_REG_3		/* Register 4 */
75#define REG_7		BPF_REG_6		/* Register 7 */
76#define REG_8		BPF_REG_7		/* Register 8 */
77#define REG_14		BPF_REG_0		/* Register 14 */
78
79/*
80 * Mapping of BPF registers to s390 registers
81 */
82static const int reg2hex[] = {
83	/* Return code */
84	[BPF_REG_0]	= 14,
85	/* Function parameters */
86	[BPF_REG_1]	= 2,
87	[BPF_REG_2]	= 3,
88	[BPF_REG_3]	= 4,
89	[BPF_REG_4]	= 5,
90	[BPF_REG_5]	= 6,
91	/* Call saved registers */
92	[BPF_REG_6]	= 7,
93	[BPF_REG_7]	= 8,
94	[BPF_REG_8]	= 9,
95	[BPF_REG_9]	= 10,
96	/* BPF stack pointer */
97	[BPF_REG_FP]	= 13,
98	/* Register for blinding */
99	[BPF_REG_AX]	= 12,
100	/* Work registers for s390x backend */
101	[REG_W0]	= 0,
102	[REG_W1]	= 1,
103	[REG_L]		= 11,
104	[REG_15]	= 15,
105};
106
107static inline u32 reg(u32 dst_reg, u32 src_reg)
108{
109	return reg2hex[dst_reg] << 4 | reg2hex[src_reg];
110}
111
112static inline u32 reg_high(u32 reg)
113{
114	return reg2hex[reg] << 4;
115}
116
117static inline void reg_set_seen(struct bpf_jit *jit, u32 b1)
118{
119	u32 r1 = reg2hex[b1];
120
121	if (r1 >= 6 && r1 <= 15 && !jit->seen_reg[r1])
122		jit->seen_reg[r1] = 1;
123}
124
125#define REG_SET_SEEN(b1)					\
126({								\
127	reg_set_seen(jit, b1);					\
128})
129
130#define REG_SEEN(b1) jit->seen_reg[reg2hex[(b1)]]
131
132/*
133 * EMIT macros for code generation
134 */
135
136#define _EMIT2(op)						\
137({								\
138	if (jit->prg_buf)					\
139		*(u16 *) (jit->prg_buf + jit->prg) = (op);	\
140	jit->prg += 2;						\
141})
142
143#define EMIT2(op, b1, b2)					\
144({								\
145	_EMIT2((op) | reg(b1, b2));				\
146	REG_SET_SEEN(b1);					\
147	REG_SET_SEEN(b2);					\
148})
149
150#define _EMIT4(op)						\
151({								\
152	if (jit->prg_buf)					\
153		*(u32 *) (jit->prg_buf + jit->prg) = (op);	\
154	jit->prg += 4;						\
155})
156
157#define EMIT4(op, b1, b2)					\
158({								\
159	_EMIT4((op) | reg(b1, b2));				\
160	REG_SET_SEEN(b1);					\
161	REG_SET_SEEN(b2);					\
162})
163
164#define EMIT4_RRF(op, b1, b2, b3)				\
165({								\
166	_EMIT4((op) | reg_high(b3) << 8 | reg(b1, b2));		\
167	REG_SET_SEEN(b1);					\
168	REG_SET_SEEN(b2);					\
169	REG_SET_SEEN(b3);					\
170})
171
172#define _EMIT4_DISP(op, disp)					\
173({								\
174	unsigned int __disp = (disp) & 0xfff;			\
175	_EMIT4((op) | __disp);					\
176})
177
178#define EMIT4_DISP(op, b1, b2, disp)				\
179({								\
180	_EMIT4_DISP((op) | reg_high(b1) << 16 |			\
181		    reg_high(b2) << 8, (disp));			\
182	REG_SET_SEEN(b1);					\
183	REG_SET_SEEN(b2);					\
184})
185
186#define EMIT4_IMM(op, b1, imm)					\
187({								\
188	unsigned int __imm = (imm) & 0xffff;			\
189	_EMIT4((op) | reg_high(b1) << 16 | __imm);		\
190	REG_SET_SEEN(b1);					\
191})
192
193#define EMIT4_PCREL(op, pcrel)					\
194({								\
195	long __pcrel = ((pcrel) >> 1) & 0xffff;			\
196	_EMIT4((op) | __pcrel);					\
197})
198
199#define EMIT4_PCREL_RIC(op, mask, target)			\
200({								\
201	int __rel = ((target) - jit->prg) / 2;			\
202	_EMIT4((op) | (mask) << 20 | (__rel & 0xffff));		\
203})
204
205#define _EMIT6(op1, op2)					\
206({								\
207	if (jit->prg_buf) {					\
208		*(u32 *) (jit->prg_buf + jit->prg) = (op1);	\
209		*(u16 *) (jit->prg_buf + jit->prg + 4) = (op2);	\
210	}							\
211	jit->prg += 6;						\
212})
213
214#define _EMIT6_DISP(op1, op2, disp)				\
215({								\
216	unsigned int __disp = (disp) & 0xfff;			\
217	_EMIT6((op1) | __disp, op2);				\
218})
219
220#define _EMIT6_DISP_LH(op1, op2, disp)				\
221({								\
222	u32 _disp = (u32) (disp);				\
223	unsigned int __disp_h = _disp & 0xff000;		\
224	unsigned int __disp_l = _disp & 0x00fff;		\
225	_EMIT6((op1) | __disp_l, (op2) | __disp_h >> 4);	\
226})
227
228#define EMIT6_DISP_LH(op1, op2, b1, b2, b3, disp)		\
229({								\
230	_EMIT6_DISP_LH((op1) | reg(b1, b2) << 16 |		\
231		       reg_high(b3) << 8, op2, disp);		\
232	REG_SET_SEEN(b1);					\
233	REG_SET_SEEN(b2);					\
234	REG_SET_SEEN(b3);					\
235})
236
237#define EMIT6_PCREL_RIEB(op1, op2, b1, b2, mask, target)	\
238({								\
239	unsigned int rel = (int)((target) - jit->prg) / 2;	\
240	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff),	\
241	       (op2) | (mask) << 12);				\
242	REG_SET_SEEN(b1);					\
243	REG_SET_SEEN(b2);					\
244})
245
246#define EMIT6_PCREL_RIEC(op1, op2, b1, imm, mask, target)	\
247({								\
248	unsigned int rel = (int)((target) - jit->prg) / 2;	\
249	_EMIT6((op1) | (reg_high(b1) | (mask)) << 16 |		\
250		(rel & 0xffff), (op2) | ((imm) & 0xff) << 8);	\
251	REG_SET_SEEN(b1);					\
252	BUILD_BUG_ON(((unsigned long) (imm)) > 0xff);		\
253})
254
255#define EMIT6_PCREL(op1, op2, b1, b2, i, off, mask)		\
256({								\
257	int rel = (addrs[(i) + (off) + 1] - jit->prg) / 2;	\
258	_EMIT6((op1) | reg(b1, b2) << 16 | (rel & 0xffff), (op2) | (mask));\
259	REG_SET_SEEN(b1);					\
260	REG_SET_SEEN(b2);					\
261})
262
263#define EMIT6_PCREL_RILB(op, b, target)				\
264({								\
265	unsigned int rel = (int)((target) - jit->prg) / 2;	\
266	_EMIT6((op) | reg_high(b) << 16 | rel >> 16, rel & 0xffff);\
267	REG_SET_SEEN(b);					\
268})
269
270#define EMIT6_PCREL_RIL(op, target)				\
271({								\
272	unsigned int rel = (int)((target) - jit->prg) / 2;	\
273	_EMIT6((op) | rel >> 16, rel & 0xffff);			\
274})
275
276#define EMIT6_PCREL_RILC(op, mask, target)			\
277({								\
278	EMIT6_PCREL_RIL((op) | (mask) << 20, (target));		\
279})
280
281#define _EMIT6_IMM(op, imm)					\
282({								\
283	unsigned int __imm = (imm);				\
284	_EMIT6((op) | (__imm >> 16), __imm & 0xffff);		\
285})
286
287#define EMIT6_IMM(op, b1, imm)					\
288({								\
289	_EMIT6_IMM((op) | reg_high(b1) << 16, imm);		\
290	REG_SET_SEEN(b1);					\
291})
292
293#define _EMIT_CONST_U32(val)					\
294({								\
295	unsigned int ret;					\
296	ret = jit->lit32;					\
297	if (jit->prg_buf)					\
298		*(u32 *)(jit->prg_buf + jit->lit32) = (u32)(val);\
299	jit->lit32 += 4;					\
300	ret;							\
301})
302
303#define EMIT_CONST_U32(val)					\
304({								\
305	jit->seen |= SEEN_LITERAL;				\
306	_EMIT_CONST_U32(val) - jit->base_ip;			\
307})
308
309#define _EMIT_CONST_U64(val)					\
310({								\
311	unsigned int ret;					\
312	ret = jit->lit64;					\
313	if (jit->prg_buf)					\
314		*(u64 *)(jit->prg_buf + jit->lit64) = (u64)(val);\
315	jit->lit64 += 8;					\
316	ret;							\
317})
318
319#define EMIT_CONST_U64(val)					\
320({								\
321	jit->seen |= SEEN_LITERAL;				\
322	_EMIT_CONST_U64(val) - jit->base_ip;			\
323})
324
325#define EMIT_ZERO(b1)						\
326({								\
327	if (!fp->aux->verifier_zext) {				\
328		/* llgfr %dst,%dst (zero extend to 64 bit) */	\
329		EMIT4(0xb9160000, b1, b1);			\
330		REG_SET_SEEN(b1);				\
331	}							\
332})
333
334/*
335 * Return whether this is the first pass. The first pass is special, since we
336 * don't know any sizes yet, and thus must be conservative.
337 */
338static bool is_first_pass(struct bpf_jit *jit)
339{
340	return jit->size == 0;
341}
342
343/*
344 * Return whether this is the code generation pass. The code generation pass is
345 * special, since we should change as little as possible.
346 */
347static bool is_codegen_pass(struct bpf_jit *jit)
348{
349	return jit->prg_buf;
350}
351
352/*
353 * Return whether "rel" can be encoded as a short PC-relative offset
354 */
355static bool is_valid_rel(int rel)
356{
357	return rel >= -65536 && rel <= 65534;
358}
359
360/*
361 * Return whether "off" can be reached using a short PC-relative offset
362 */
363static bool can_use_rel(struct bpf_jit *jit, int off)
364{
365	return is_valid_rel(off - jit->prg);
366}
367
368/*
369 * Return whether given displacement can be encoded using
370 * Long-Displacement Facility
371 */
372static bool is_valid_ldisp(int disp)
373{
374	return disp >= -524288 && disp <= 524287;
375}
376
377/*
378 * Return whether the next 32-bit literal pool entry can be referenced using
379 * Long-Displacement Facility
380 */
381static bool can_use_ldisp_for_lit32(struct bpf_jit *jit)
382{
383	return is_valid_ldisp(jit->lit32 - jit->base_ip);
384}
385
386/*
387 * Return whether the next 64-bit literal pool entry can be referenced using
388 * Long-Displacement Facility
389 */
390static bool can_use_ldisp_for_lit64(struct bpf_jit *jit)
391{
392	return is_valid_ldisp(jit->lit64 - jit->base_ip);
393}
394
395/*
396 * Fill whole space with illegal instructions
397 */
398static void jit_fill_hole(void *area, unsigned int size)
399{
400	memset(area, 0, size);
401}
402
403/*
404 * Save registers from "rs" (register start) to "re" (register end) on stack
405 */
406static void save_regs(struct bpf_jit *jit, u32 rs, u32 re)
407{
408	u32 off = STK_OFF_R6 + (rs - 6) * 8;
409
410	if (rs == re)
411		/* stg %rs,off(%r15) */
412		_EMIT6(0xe300f000 | rs << 20 | off, 0x0024);
413	else
414		/* stmg %rs,%re,off(%r15) */
415		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0024, off);
416}
417
418/*
419 * Restore registers from "rs" (register start) to "re" (register end) on stack
420 */
421static void restore_regs(struct bpf_jit *jit, u32 rs, u32 re, u32 stack_depth)
422{
423	u32 off = STK_OFF_R6 + (rs - 6) * 8;
424
425	if (jit->seen & SEEN_STACK)
426		off += STK_OFF + stack_depth;
427
428	if (rs == re)
429		/* lg %rs,off(%r15) */
430		_EMIT6(0xe300f000 | rs << 20 | off, 0x0004);
431	else
432		/* lmg %rs,%re,off(%r15) */
433		_EMIT6_DISP(0xeb00f000 | rs << 20 | re << 16, 0x0004, off);
434}
435
436/*
437 * Return first seen register (from start)
438 */
439static int get_start(struct bpf_jit *jit, int start)
440{
441	int i;
442
443	for (i = start; i <= 15; i++) {
444		if (jit->seen_reg[i])
445			return i;
446	}
447	return 0;
448}
449
450/*
451 * Return last seen register (from start) (gap >= 2)
452 */
453static int get_end(struct bpf_jit *jit, int start)
454{
455	int i;
456
457	for (i = start; i < 15; i++) {
458		if (!jit->seen_reg[i] && !jit->seen_reg[i + 1])
459			return i - 1;
460	}
461	return jit->seen_reg[15] ? 15 : 14;
462}
463
464#define REGS_SAVE	1
465#define REGS_RESTORE	0
466/*
467 * Save and restore clobbered registers (6-15) on stack.
468 * We save/restore registers in chunks with gap >= 2 registers.
469 */
470static void save_restore_regs(struct bpf_jit *jit, int op, u32 stack_depth)
471{
472	const int last = 15, save_restore_size = 6;
473	int re = 6, rs;
474
475	if (is_first_pass(jit)) {
476		/*
477		 * We don't know yet which registers are used. Reserve space
478		 * conservatively.
479		 */
480		jit->prg += (last - re + 1) * save_restore_size;
481		return;
482	}
483
484	do {
485		rs = get_start(jit, re);
486		if (!rs)
487			break;
488		re = get_end(jit, rs + 1);
489		if (op == REGS_SAVE)
490			save_regs(jit, rs, re);
491		else
492			restore_regs(jit, rs, re, stack_depth);
493		re++;
494	} while (re <= last);
495}
496
497static void bpf_skip(struct bpf_jit *jit, int size)
498{
499	if (size >= 6 && !is_valid_rel(size)) {
500		/* brcl 0xf,size */
501		EMIT6_PCREL_RIL(0xc0f4000000, size);
502		size -= 6;
503	} else if (size >= 4 && is_valid_rel(size)) {
504		/* brc 0xf,size */
505		EMIT4_PCREL(0xa7f40000, size);
506		size -= 4;
507	}
508	while (size >= 2) {
509		/* bcr 0,%0 */
510		_EMIT2(0x0700);
511		size -= 2;
512	}
513}
514
515/*
516 * PLT for hotpatchable calls. The calling convention is the same as for the
517 * ftrace hotpatch trampolines: %r0 is return address, %r1 is clobbered.
518 */
519extern const char bpf_plt[];
520extern const char bpf_plt_ret[];
521extern const char bpf_plt_target[];
522extern const char bpf_plt_end[];
523#define BPF_PLT_SIZE 32
524asm(
525	".pushsection .rodata\n"
526	"	.balign 8\n"
527	"bpf_plt:\n"
528	"	lgrl %r0,bpf_plt_ret\n"
529	"	lgrl %r1,bpf_plt_target\n"
530	"	br %r1\n"
531	"	.balign 8\n"
532	"bpf_plt_ret: .quad 0\n"
533	"bpf_plt_target: .quad 0\n"
534	"bpf_plt_end:\n"
535	"	.popsection\n"
536);
537
538static void bpf_jit_plt(void *plt, void *ret, void *target)
539{
540	memcpy(plt, bpf_plt, BPF_PLT_SIZE);
541	*(void **)((char *)plt + (bpf_plt_ret - bpf_plt)) = ret;
542	*(void **)((char *)plt + (bpf_plt_target - bpf_plt)) = target ?: ret;
543}
544
545/*
546 * Emit function prologue
547 *
548 * Save registers and create stack frame if necessary.
549 * See stack frame layout description in "bpf_jit.h"!
550 */
551static void bpf_jit_prologue(struct bpf_jit *jit, struct bpf_prog *fp,
552			     u32 stack_depth)
553{
554	/* No-op for hotpatching */
555	/* brcl 0,prologue_plt */
556	EMIT6_PCREL_RILC(0xc0040000, 0, jit->prologue_plt);
557	jit->prologue_plt_ret = jit->prg;
558
559	if (fp->aux->func_idx == 0) {
560		/* Initialize the tail call counter in the main program. */
561		/* xc STK_OFF_TCCNT(4,%r15),STK_OFF_TCCNT(%r15) */
562		_EMIT6(0xd703f000 | STK_OFF_TCCNT, 0xf000 | STK_OFF_TCCNT);
563	} else {
564		/*
565		 * Skip the tail call counter initialization in subprograms.
566		 * Insert nops in order to have tail_call_start at a
567		 * predictable offset.
568		 */
569		bpf_skip(jit, 6);
570	}
571	/* Tail calls have to skip above initialization */
572	jit->tail_call_start = jit->prg;
573	/* Save registers */
574	save_restore_regs(jit, REGS_SAVE, stack_depth);
575	/* Setup literal pool */
576	if (is_first_pass(jit) || (jit->seen & SEEN_LITERAL)) {
577		if (!is_first_pass(jit) &&
578		    is_valid_ldisp(jit->size - (jit->prg + 2))) {
579			/* basr %l,0 */
580			EMIT2(0x0d00, REG_L, REG_0);
581			jit->base_ip = jit->prg;
582		} else {
583			/* larl %l,lit32_start */
584			EMIT6_PCREL_RILB(0xc0000000, REG_L, jit->lit32_start);
585			jit->base_ip = jit->lit32_start;
586		}
587	}
588	/* Setup stack and backchain */
589	if (is_first_pass(jit) || (jit->seen & SEEN_STACK)) {
590		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
591			/* lgr %w1,%r15 (backchain) */
592			EMIT4(0xb9040000, REG_W1, REG_15);
593		/* la %bfp,STK_160_UNUSED(%r15) (BPF frame pointer) */
594		EMIT4_DISP(0x41000000, BPF_REG_FP, REG_15, STK_160_UNUSED);
595		/* aghi %r15,-STK_OFF */
596		EMIT4_IMM(0xa70b0000, REG_15, -(STK_OFF + stack_depth));
597		if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
598			/* stg %w1,152(%r15) (backchain) */
599			EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W1, REG_0,
600				      REG_15, 152);
601	}
602}
603
604/*
605 * Emit an expoline for a jump that follows
606 */
607static void emit_expoline(struct bpf_jit *jit)
608{
609	/* exrl %r0,.+10 */
610	EMIT6_PCREL_RIL(0xc6000000, jit->prg + 10);
611	/* j . */
612	EMIT4_PCREL(0xa7f40000, 0);
613}
614
615/*
616 * Emit __s390_indirect_jump_r1 thunk if necessary
617 */
618static void emit_r1_thunk(struct bpf_jit *jit)
619{
620	if (nospec_uses_trampoline()) {
621		jit->r1_thunk_ip = jit->prg;
622		emit_expoline(jit);
623		/* br %r1 */
624		_EMIT2(0x07f1);
625	}
626}
627
628/*
629 * Call r1 either directly or via __s390_indirect_jump_r1 thunk
630 */
631static void call_r1(struct bpf_jit *jit)
632{
633	if (nospec_uses_trampoline())
634		/* brasl %r14,__s390_indirect_jump_r1 */
635		EMIT6_PCREL_RILB(0xc0050000, REG_14, jit->r1_thunk_ip);
636	else
637		/* basr %r14,%r1 */
638		EMIT2(0x0d00, REG_14, REG_1);
639}
640
641/*
642 * Function epilogue
643 */
644static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
645{
646	jit->exit_ip = jit->prg;
647	/* Load exit code: lgr %r2,%b0 */
648	EMIT4(0xb9040000, REG_2, BPF_REG_0);
649	/* Restore registers */
650	save_restore_regs(jit, REGS_RESTORE, stack_depth);
651	if (nospec_uses_trampoline()) {
652		jit->r14_thunk_ip = jit->prg;
653		/* Generate __s390_indirect_jump_r14 thunk */
654		emit_expoline(jit);
655	}
656	/* br %r14 */
657	_EMIT2(0x07fe);
658
659	if (is_first_pass(jit) || (jit->seen & SEEN_FUNC))
660		emit_r1_thunk(jit);
661
662	jit->prg = ALIGN(jit->prg, 8);
663	jit->prologue_plt = jit->prg;
664	if (jit->prg_buf)
665		bpf_jit_plt(jit->prg_buf + jit->prg,
666			    jit->prg_buf + jit->prologue_plt_ret, NULL);
667	jit->prg += BPF_PLT_SIZE;
668}
669
670static int get_probe_mem_regno(const u8 *insn)
671{
672	/*
673	 * insn must point to llgc, llgh, llgf or lg, which have destination
674	 * register at the same position.
675	 */
676	if (insn[0] != 0xe3) /* common llgc, llgh, llgf and lg prefix */
677		return -1;
678	if (insn[5] != 0x90 && /* llgc */
679	    insn[5] != 0x91 && /* llgh */
680	    insn[5] != 0x16 && /* llgf */
681	    insn[5] != 0x04) /* lg */
682		return -1;
683	return insn[1] >> 4;
684}
685
686bool ex_handler_bpf(const struct exception_table_entry *x, struct pt_regs *regs)
687{
688	regs->psw.addr = extable_fixup(x);
689	regs->gprs[x->data] = 0;
690	return true;
691}
692
693static int bpf_jit_probe_mem(struct bpf_jit *jit, struct bpf_prog *fp,
694			     int probe_prg, int nop_prg)
695{
696	struct exception_table_entry *ex;
697	int reg, prg;
698	s64 delta;
699	u8 *insn;
700	int i;
701
702	if (!fp->aux->extable)
703		/* Do nothing during early JIT passes. */
704		return 0;
705	insn = jit->prg_buf + probe_prg;
706	reg = get_probe_mem_regno(insn);
707	if (WARN_ON_ONCE(reg < 0))
708		/* JIT bug - unexpected probe instruction. */
709		return -1;
710	if (WARN_ON_ONCE(probe_prg + insn_length(*insn) != nop_prg))
711		/* JIT bug - gap between probe and nop instructions. */
712		return -1;
713	for (i = 0; i < 2; i++) {
714		if (WARN_ON_ONCE(jit->excnt >= fp->aux->num_exentries))
715			/* Verifier bug - not enough entries. */
716			return -1;
717		ex = &fp->aux->extable[jit->excnt];
718		/* Add extable entries for probe and nop instructions. */
719		prg = i == 0 ? probe_prg : nop_prg;
720		delta = jit->prg_buf + prg - (u8 *)&ex->insn;
721		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
722			/* JIT bug - code and extable must be close. */
723			return -1;
724		ex->insn = delta;
725		/*
726		 * Always land on the nop. Note that extable infrastructure
727		 * ignores fixup field, it is handled by ex_handler_bpf().
728		 */
729		delta = jit->prg_buf + nop_prg - (u8 *)&ex->fixup;
730		if (WARN_ON_ONCE(delta < INT_MIN || delta > INT_MAX))
731			/* JIT bug - landing pad and extable must be close. */
732			return -1;
733		ex->fixup = delta;
734		ex->type = EX_TYPE_BPF;
735		ex->data = reg;
736		jit->excnt++;
737	}
738	return 0;
739}
740
741/*
742 * Sign-extend the register if necessary
743 */
744static int sign_extend(struct bpf_jit *jit, int r, u8 size, u8 flags)
745{
746	if (!(flags & BTF_FMODEL_SIGNED_ARG))
747		return 0;
748
749	switch (size) {
750	case 1:
751		/* lgbr %r,%r */
752		EMIT4(0xb9060000, r, r);
753		return 0;
754	case 2:
755		/* lghr %r,%r */
756		EMIT4(0xb9070000, r, r);
757		return 0;
758	case 4:
759		/* lgfr %r,%r */
760		EMIT4(0xb9140000, r, r);
761		return 0;
762	case 8:
763		return 0;
764	default:
765		return -1;
766	}
767}
768
769/*
770 * Compile one eBPF instruction into s390x code
771 *
772 * NOTE: Use noinline because for gcov (-fprofile-arcs) gcc allocates a lot of
773 * stack space for the large switch statement.
774 */
775static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
776				 int i, bool extra_pass, u32 stack_depth)
777{
778	struct bpf_insn *insn = &fp->insnsi[i];
779	u32 dst_reg = insn->dst_reg;
780	u32 src_reg = insn->src_reg;
781	int last, insn_count = 1;
782	u32 *addrs = jit->addrs;
783	s32 imm = insn->imm;
784	s16 off = insn->off;
785	int probe_prg = -1;
786	unsigned int mask;
787	int nop_prg;
788	int err;
789
790	if (BPF_CLASS(insn->code) == BPF_LDX &&
791	    BPF_MODE(insn->code) == BPF_PROBE_MEM)
792		probe_prg = jit->prg;
793
794	switch (insn->code) {
795	/*
796	 * BPF_MOV
797	 */
798	case BPF_ALU | BPF_MOV | BPF_X: /* dst = (u32) src */
799		/* llgfr %dst,%src */
800		EMIT4(0xb9160000, dst_reg, src_reg);
801		if (insn_is_zext(&insn[1]))
802			insn_count = 2;
803		break;
804	case BPF_ALU64 | BPF_MOV | BPF_X: /* dst = src */
805		/* lgr %dst,%src */
806		EMIT4(0xb9040000, dst_reg, src_reg);
807		break;
808	case BPF_ALU | BPF_MOV | BPF_K: /* dst = (u32) imm */
809		/* llilf %dst,imm */
810		EMIT6_IMM(0xc00f0000, dst_reg, imm);
811		if (insn_is_zext(&insn[1]))
812			insn_count = 2;
813		break;
814	case BPF_ALU64 | BPF_MOV | BPF_K: /* dst = imm */
815		/* lgfi %dst,imm */
816		EMIT6_IMM(0xc0010000, dst_reg, imm);
817		break;
818	/*
819	 * BPF_LD 64
820	 */
821	case BPF_LD | BPF_IMM | BPF_DW: /* dst = (u64) imm */
822	{
823		/* 16 byte instruction that uses two 'struct bpf_insn' */
824		u64 imm64;
825
826		imm64 = (u64)(u32) insn[0].imm | ((u64)(u32) insn[1].imm) << 32;
827		/* lgrl %dst,imm */
828		EMIT6_PCREL_RILB(0xc4080000, dst_reg, _EMIT_CONST_U64(imm64));
829		insn_count = 2;
830		break;
831	}
832	/*
833	 * BPF_ADD
834	 */
835	case BPF_ALU | BPF_ADD | BPF_X: /* dst = (u32) dst + (u32) src */
836		/* ar %dst,%src */
837		EMIT2(0x1a00, dst_reg, src_reg);
838		EMIT_ZERO(dst_reg);
839		break;
840	case BPF_ALU64 | BPF_ADD | BPF_X: /* dst = dst + src */
841		/* agr %dst,%src */
842		EMIT4(0xb9080000, dst_reg, src_reg);
843		break;
844	case BPF_ALU | BPF_ADD | BPF_K: /* dst = (u32) dst + (u32) imm */
845		if (imm != 0) {
846			/* alfi %dst,imm */
847			EMIT6_IMM(0xc20b0000, dst_reg, imm);
848		}
849		EMIT_ZERO(dst_reg);
850		break;
851	case BPF_ALU64 | BPF_ADD | BPF_K: /* dst = dst + imm */
852		if (!imm)
853			break;
854		/* agfi %dst,imm */
855		EMIT6_IMM(0xc2080000, dst_reg, imm);
856		break;
857	/*
858	 * BPF_SUB
859	 */
860	case BPF_ALU | BPF_SUB | BPF_X: /* dst = (u32) dst - (u32) src */
861		/* sr %dst,%src */
862		EMIT2(0x1b00, dst_reg, src_reg);
863		EMIT_ZERO(dst_reg);
864		break;
865	case BPF_ALU64 | BPF_SUB | BPF_X: /* dst = dst - src */
866		/* sgr %dst,%src */
867		EMIT4(0xb9090000, dst_reg, src_reg);
868		break;
869	case BPF_ALU | BPF_SUB | BPF_K: /* dst = (u32) dst - (u32) imm */
870		if (imm != 0) {
871			/* alfi %dst,-imm */
872			EMIT6_IMM(0xc20b0000, dst_reg, -imm);
873		}
874		EMIT_ZERO(dst_reg);
875		break;
876	case BPF_ALU64 | BPF_SUB | BPF_K: /* dst = dst - imm */
877		if (!imm)
878			break;
879		if (imm == -0x80000000) {
880			/* algfi %dst,0x80000000 */
881			EMIT6_IMM(0xc20a0000, dst_reg, 0x80000000);
882		} else {
883			/* agfi %dst,-imm */
884			EMIT6_IMM(0xc2080000, dst_reg, -imm);
885		}
886		break;
887	/*
888	 * BPF_MUL
889	 */
890	case BPF_ALU | BPF_MUL | BPF_X: /* dst = (u32) dst * (u32) src */
891		/* msr %dst,%src */
892		EMIT4(0xb2520000, dst_reg, src_reg);
893		EMIT_ZERO(dst_reg);
894		break;
895	case BPF_ALU64 | BPF_MUL | BPF_X: /* dst = dst * src */
896		/* msgr %dst,%src */
897		EMIT4(0xb90c0000, dst_reg, src_reg);
898		break;
899	case BPF_ALU | BPF_MUL | BPF_K: /* dst = (u32) dst * (u32) imm */
900		if (imm != 1) {
901			/* msfi %r5,imm */
902			EMIT6_IMM(0xc2010000, dst_reg, imm);
903		}
904		EMIT_ZERO(dst_reg);
905		break;
906	case BPF_ALU64 | BPF_MUL | BPF_K: /* dst = dst * imm */
907		if (imm == 1)
908			break;
909		/* msgfi %dst,imm */
910		EMIT6_IMM(0xc2000000, dst_reg, imm);
911		break;
912	/*
913	 * BPF_DIV / BPF_MOD
914	 */
915	case BPF_ALU | BPF_DIV | BPF_X: /* dst = (u32) dst / (u32) src */
916	case BPF_ALU | BPF_MOD | BPF_X: /* dst = (u32) dst % (u32) src */
917	{
918		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
919
920		/* lhi %w0,0 */
921		EMIT4_IMM(0xa7080000, REG_W0, 0);
922		/* lr %w1,%dst */
923		EMIT2(0x1800, REG_W1, dst_reg);
924		/* dlr %w0,%src */
925		EMIT4(0xb9970000, REG_W0, src_reg);
926		/* llgfr %dst,%rc */
927		EMIT4(0xb9160000, dst_reg, rc_reg);
928		if (insn_is_zext(&insn[1]))
929			insn_count = 2;
930		break;
931	}
932	case BPF_ALU64 | BPF_DIV | BPF_X: /* dst = dst / src */
933	case BPF_ALU64 | BPF_MOD | BPF_X: /* dst = dst % src */
934	{
935		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
936
937		/* lghi %w0,0 */
938		EMIT4_IMM(0xa7090000, REG_W0, 0);
939		/* lgr %w1,%dst */
940		EMIT4(0xb9040000, REG_W1, dst_reg);
941		/* dlgr %w0,%dst */
942		EMIT4(0xb9870000, REG_W0, src_reg);
943		/* lgr %dst,%rc */
944		EMIT4(0xb9040000, dst_reg, rc_reg);
945		break;
946	}
947	case BPF_ALU | BPF_DIV | BPF_K: /* dst = (u32) dst / (u32) imm */
948	case BPF_ALU | BPF_MOD | BPF_K: /* dst = (u32) dst % (u32) imm */
949	{
950		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
951
952		if (imm == 1) {
953			if (BPF_OP(insn->code) == BPF_MOD)
954				/* lhgi %dst,0 */
955				EMIT4_IMM(0xa7090000, dst_reg, 0);
956			else
957				EMIT_ZERO(dst_reg);
958			break;
959		}
960		/* lhi %w0,0 */
961		EMIT4_IMM(0xa7080000, REG_W0, 0);
962		/* lr %w1,%dst */
963		EMIT2(0x1800, REG_W1, dst_reg);
964		if (!is_first_pass(jit) && can_use_ldisp_for_lit32(jit)) {
965			/* dl %w0,<d(imm)>(%l) */
966			EMIT6_DISP_LH(0xe3000000, 0x0097, REG_W0, REG_0, REG_L,
967				      EMIT_CONST_U32(imm));
968		} else {
969			/* lgfrl %dst,imm */
970			EMIT6_PCREL_RILB(0xc40c0000, dst_reg,
971					 _EMIT_CONST_U32(imm));
972			jit->seen |= SEEN_LITERAL;
973			/* dlr %w0,%dst */
974			EMIT4(0xb9970000, REG_W0, dst_reg);
975		}
976		/* llgfr %dst,%rc */
977		EMIT4(0xb9160000, dst_reg, rc_reg);
978		if (insn_is_zext(&insn[1]))
979			insn_count = 2;
980		break;
981	}
982	case BPF_ALU64 | BPF_DIV | BPF_K: /* dst = dst / imm */
983	case BPF_ALU64 | BPF_MOD | BPF_K: /* dst = dst % imm */
984	{
985		int rc_reg = BPF_OP(insn->code) == BPF_DIV ? REG_W1 : REG_W0;
986
987		if (imm == 1) {
988			if (BPF_OP(insn->code) == BPF_MOD)
989				/* lhgi %dst,0 */
990				EMIT4_IMM(0xa7090000, dst_reg, 0);
991			break;
992		}
993		/* lghi %w0,0 */
994		EMIT4_IMM(0xa7090000, REG_W0, 0);
995		/* lgr %w1,%dst */
996		EMIT4(0xb9040000, REG_W1, dst_reg);
997		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
998			/* dlg %w0,<d(imm)>(%l) */
999			EMIT6_DISP_LH(0xe3000000, 0x0087, REG_W0, REG_0, REG_L,
1000				      EMIT_CONST_U64(imm));
1001		} else {
1002			/* lgrl %dst,imm */
1003			EMIT6_PCREL_RILB(0xc4080000, dst_reg,
1004					 _EMIT_CONST_U64(imm));
1005			jit->seen |= SEEN_LITERAL;
1006			/* dlgr %w0,%dst */
1007			EMIT4(0xb9870000, REG_W0, dst_reg);
1008		}
1009		/* lgr %dst,%rc */
1010		EMIT4(0xb9040000, dst_reg, rc_reg);
1011		break;
1012	}
1013	/*
1014	 * BPF_AND
1015	 */
1016	case BPF_ALU | BPF_AND | BPF_X: /* dst = (u32) dst & (u32) src */
1017		/* nr %dst,%src */
1018		EMIT2(0x1400, dst_reg, src_reg);
1019		EMIT_ZERO(dst_reg);
1020		break;
1021	case BPF_ALU64 | BPF_AND | BPF_X: /* dst = dst & src */
1022		/* ngr %dst,%src */
1023		EMIT4(0xb9800000, dst_reg, src_reg);
1024		break;
1025	case BPF_ALU | BPF_AND | BPF_K: /* dst = (u32) dst & (u32) imm */
1026		/* nilf %dst,imm */
1027		EMIT6_IMM(0xc00b0000, dst_reg, imm);
1028		EMIT_ZERO(dst_reg);
1029		break;
1030	case BPF_ALU64 | BPF_AND | BPF_K: /* dst = dst & imm */
1031		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1032			/* ng %dst,<d(imm)>(%l) */
1033			EMIT6_DISP_LH(0xe3000000, 0x0080,
1034				      dst_reg, REG_0, REG_L,
1035				      EMIT_CONST_U64(imm));
1036		} else {
1037			/* lgrl %w0,imm */
1038			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1039					 _EMIT_CONST_U64(imm));
1040			jit->seen |= SEEN_LITERAL;
1041			/* ngr %dst,%w0 */
1042			EMIT4(0xb9800000, dst_reg, REG_W0);
1043		}
1044		break;
1045	/*
1046	 * BPF_OR
1047	 */
1048	case BPF_ALU | BPF_OR | BPF_X: /* dst = (u32) dst | (u32) src */
1049		/* or %dst,%src */
1050		EMIT2(0x1600, dst_reg, src_reg);
1051		EMIT_ZERO(dst_reg);
1052		break;
1053	case BPF_ALU64 | BPF_OR | BPF_X: /* dst = dst | src */
1054		/* ogr %dst,%src */
1055		EMIT4(0xb9810000, dst_reg, src_reg);
1056		break;
1057	case BPF_ALU | BPF_OR | BPF_K: /* dst = (u32) dst | (u32) imm */
1058		/* oilf %dst,imm */
1059		EMIT6_IMM(0xc00d0000, dst_reg, imm);
1060		EMIT_ZERO(dst_reg);
1061		break;
1062	case BPF_ALU64 | BPF_OR | BPF_K: /* dst = dst | imm */
1063		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1064			/* og %dst,<d(imm)>(%l) */
1065			EMIT6_DISP_LH(0xe3000000, 0x0081,
1066				      dst_reg, REG_0, REG_L,
1067				      EMIT_CONST_U64(imm));
1068		} else {
1069			/* lgrl %w0,imm */
1070			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1071					 _EMIT_CONST_U64(imm));
1072			jit->seen |= SEEN_LITERAL;
1073			/* ogr %dst,%w0 */
1074			EMIT4(0xb9810000, dst_reg, REG_W0);
1075		}
1076		break;
1077	/*
1078	 * BPF_XOR
1079	 */
1080	case BPF_ALU | BPF_XOR | BPF_X: /* dst = (u32) dst ^ (u32) src */
1081		/* xr %dst,%src */
1082		EMIT2(0x1700, dst_reg, src_reg);
1083		EMIT_ZERO(dst_reg);
1084		break;
1085	case BPF_ALU64 | BPF_XOR | BPF_X: /* dst = dst ^ src */
1086		/* xgr %dst,%src */
1087		EMIT4(0xb9820000, dst_reg, src_reg);
1088		break;
1089	case BPF_ALU | BPF_XOR | BPF_K: /* dst = (u32) dst ^ (u32) imm */
1090		if (imm != 0) {
1091			/* xilf %dst,imm */
1092			EMIT6_IMM(0xc0070000, dst_reg, imm);
1093		}
1094		EMIT_ZERO(dst_reg);
1095		break;
1096	case BPF_ALU64 | BPF_XOR | BPF_K: /* dst = dst ^ imm */
1097		if (!is_first_pass(jit) && can_use_ldisp_for_lit64(jit)) {
1098			/* xg %dst,<d(imm)>(%l) */
1099			EMIT6_DISP_LH(0xe3000000, 0x0082,
1100				      dst_reg, REG_0, REG_L,
1101				      EMIT_CONST_U64(imm));
1102		} else {
1103			/* lgrl %w0,imm */
1104			EMIT6_PCREL_RILB(0xc4080000, REG_W0,
1105					 _EMIT_CONST_U64(imm));
1106			jit->seen |= SEEN_LITERAL;
1107			/* xgr %dst,%w0 */
1108			EMIT4(0xb9820000, dst_reg, REG_W0);
1109		}
1110		break;
1111	/*
1112	 * BPF_LSH
1113	 */
1114	case BPF_ALU | BPF_LSH | BPF_X: /* dst = (u32) dst << (u32) src */
1115		/* sll %dst,0(%src) */
1116		EMIT4_DISP(0x89000000, dst_reg, src_reg, 0);
1117		EMIT_ZERO(dst_reg);
1118		break;
1119	case BPF_ALU64 | BPF_LSH | BPF_X: /* dst = dst << src */
1120		/* sllg %dst,%dst,0(%src) */
1121		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, src_reg, 0);
1122		break;
1123	case BPF_ALU | BPF_LSH | BPF_K: /* dst = (u32) dst << (u32) imm */
1124		if (imm != 0) {
1125			/* sll %dst,imm(%r0) */
1126			EMIT4_DISP(0x89000000, dst_reg, REG_0, imm);
1127		}
1128		EMIT_ZERO(dst_reg);
1129		break;
1130	case BPF_ALU64 | BPF_LSH | BPF_K: /* dst = dst << imm */
1131		if (imm == 0)
1132			break;
1133		/* sllg %dst,%dst,imm(%r0) */
1134		EMIT6_DISP_LH(0xeb000000, 0x000d, dst_reg, dst_reg, REG_0, imm);
1135		break;
1136	/*
1137	 * BPF_RSH
1138	 */
1139	case BPF_ALU | BPF_RSH | BPF_X: /* dst = (u32) dst >> (u32) src */
1140		/* srl %dst,0(%src) */
1141		EMIT4_DISP(0x88000000, dst_reg, src_reg, 0);
1142		EMIT_ZERO(dst_reg);
1143		break;
1144	case BPF_ALU64 | BPF_RSH | BPF_X: /* dst = dst >> src */
1145		/* srlg %dst,%dst,0(%src) */
1146		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, src_reg, 0);
1147		break;
1148	case BPF_ALU | BPF_RSH | BPF_K: /* dst = (u32) dst >> (u32) imm */
1149		if (imm != 0) {
1150			/* srl %dst,imm(%r0) */
1151			EMIT4_DISP(0x88000000, dst_reg, REG_0, imm);
1152		}
1153		EMIT_ZERO(dst_reg);
1154		break;
1155	case BPF_ALU64 | BPF_RSH | BPF_K: /* dst = dst >> imm */
1156		if (imm == 0)
1157			break;
1158		/* srlg %dst,%dst,imm(%r0) */
1159		EMIT6_DISP_LH(0xeb000000, 0x000c, dst_reg, dst_reg, REG_0, imm);
1160		break;
1161	/*
1162	 * BPF_ARSH
1163	 */
1164	case BPF_ALU | BPF_ARSH | BPF_X: /* ((s32) dst) >>= src */
1165		/* sra %dst,%dst,0(%src) */
1166		EMIT4_DISP(0x8a000000, dst_reg, src_reg, 0);
1167		EMIT_ZERO(dst_reg);
1168		break;
1169	case BPF_ALU64 | BPF_ARSH | BPF_X: /* ((s64) dst) >>= src */
1170		/* srag %dst,%dst,0(%src) */
1171		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, src_reg, 0);
1172		break;
1173	case BPF_ALU | BPF_ARSH | BPF_K: /* ((s32) dst >> imm */
1174		if (imm != 0) {
1175			/* sra %dst,imm(%r0) */
1176			EMIT4_DISP(0x8a000000, dst_reg, REG_0, imm);
1177		}
1178		EMIT_ZERO(dst_reg);
1179		break;
1180	case BPF_ALU64 | BPF_ARSH | BPF_K: /* ((s64) dst) >>= imm */
1181		if (imm == 0)
1182			break;
1183		/* srag %dst,%dst,imm(%r0) */
1184		EMIT6_DISP_LH(0xeb000000, 0x000a, dst_reg, dst_reg, REG_0, imm);
1185		break;
1186	/*
1187	 * BPF_NEG
1188	 */
1189	case BPF_ALU | BPF_NEG: /* dst = (u32) -dst */
1190		/* lcr %dst,%dst */
1191		EMIT2(0x1300, dst_reg, dst_reg);
1192		EMIT_ZERO(dst_reg);
1193		break;
1194	case BPF_ALU64 | BPF_NEG: /* dst = -dst */
1195		/* lcgr %dst,%dst */
1196		EMIT4(0xb9030000, dst_reg, dst_reg);
1197		break;
1198	/*
1199	 * BPF_FROM_BE/LE
1200	 */
1201	case BPF_ALU | BPF_END | BPF_FROM_BE:
1202		/* s390 is big endian, therefore only clear high order bytes */
1203		switch (imm) {
1204		case 16: /* dst = (u16) cpu_to_be16(dst) */
1205			/* llghr %dst,%dst */
1206			EMIT4(0xb9850000, dst_reg, dst_reg);
1207			if (insn_is_zext(&insn[1]))
1208				insn_count = 2;
1209			break;
1210		case 32: /* dst = (u32) cpu_to_be32(dst) */
1211			if (!fp->aux->verifier_zext)
1212				/* llgfr %dst,%dst */
1213				EMIT4(0xb9160000, dst_reg, dst_reg);
1214			break;
1215		case 64: /* dst = (u64) cpu_to_be64(dst) */
1216			break;
1217		}
1218		break;
1219	case BPF_ALU | BPF_END | BPF_FROM_LE:
1220		switch (imm) {
1221		case 16: /* dst = (u16) cpu_to_le16(dst) */
1222			/* lrvr %dst,%dst */
1223			EMIT4(0xb91f0000, dst_reg, dst_reg);
1224			/* srl %dst,16(%r0) */
1225			EMIT4_DISP(0x88000000, dst_reg, REG_0, 16);
1226			/* llghr %dst,%dst */
1227			EMIT4(0xb9850000, dst_reg, dst_reg);
1228			if (insn_is_zext(&insn[1]))
1229				insn_count = 2;
1230			break;
1231		case 32: /* dst = (u32) cpu_to_le32(dst) */
1232			/* lrvr %dst,%dst */
1233			EMIT4(0xb91f0000, dst_reg, dst_reg);
1234			if (!fp->aux->verifier_zext)
1235				/* llgfr %dst,%dst */
1236				EMIT4(0xb9160000, dst_reg, dst_reg);
1237			break;
1238		case 64: /* dst = (u64) cpu_to_le64(dst) */
1239			/* lrvgr %dst,%dst */
1240			EMIT4(0xb90f0000, dst_reg, dst_reg);
1241			break;
1242		}
1243		break;
1244	/*
1245	 * BPF_NOSPEC (speculation barrier)
1246	 */
1247	case BPF_ST | BPF_NOSPEC:
1248		break;
1249	/*
1250	 * BPF_ST(X)
1251	 */
1252	case BPF_STX | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = src_reg */
1253		/* stcy %src,off(%dst) */
1254		EMIT6_DISP_LH(0xe3000000, 0x0072, src_reg, dst_reg, REG_0, off);
1255		jit->seen |= SEEN_MEM;
1256		break;
1257	case BPF_STX | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = src */
1258		/* sthy %src,off(%dst) */
1259		EMIT6_DISP_LH(0xe3000000, 0x0070, src_reg, dst_reg, REG_0, off);
1260		jit->seen |= SEEN_MEM;
1261		break;
1262	case BPF_STX | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = src */
1263		/* sty %src,off(%dst) */
1264		EMIT6_DISP_LH(0xe3000000, 0x0050, src_reg, dst_reg, REG_0, off);
1265		jit->seen |= SEEN_MEM;
1266		break;
1267	case BPF_STX | BPF_MEM | BPF_DW: /* (u64 *)(dst + off) = src */
1268		/* stg %src,off(%dst) */
1269		EMIT6_DISP_LH(0xe3000000, 0x0024, src_reg, dst_reg, REG_0, off);
1270		jit->seen |= SEEN_MEM;
1271		break;
1272	case BPF_ST | BPF_MEM | BPF_B: /* *(u8 *)(dst + off) = imm */
1273		/* lhi %w0,imm */
1274		EMIT4_IMM(0xa7080000, REG_W0, (u8) imm);
1275		/* stcy %w0,off(dst) */
1276		EMIT6_DISP_LH(0xe3000000, 0x0072, REG_W0, dst_reg, REG_0, off);
1277		jit->seen |= SEEN_MEM;
1278		break;
1279	case BPF_ST | BPF_MEM | BPF_H: /* (u16 *)(dst + off) = imm */
1280		/* lhi %w0,imm */
1281		EMIT4_IMM(0xa7080000, REG_W0, (u16) imm);
1282		/* sthy %w0,off(dst) */
1283		EMIT6_DISP_LH(0xe3000000, 0x0070, REG_W0, dst_reg, REG_0, off);
1284		jit->seen |= SEEN_MEM;
1285		break;
1286	case BPF_ST | BPF_MEM | BPF_W: /* *(u32 *)(dst + off) = imm */
1287		/* llilf %w0,imm  */
1288		EMIT6_IMM(0xc00f0000, REG_W0, (u32) imm);
1289		/* sty %w0,off(%dst) */
1290		EMIT6_DISP_LH(0xe3000000, 0x0050, REG_W0, dst_reg, REG_0, off);
1291		jit->seen |= SEEN_MEM;
1292		break;
1293	case BPF_ST | BPF_MEM | BPF_DW: /* *(u64 *)(dst + off) = imm */
1294		/* lgfi %w0,imm */
1295		EMIT6_IMM(0xc0010000, REG_W0, imm);
1296		/* stg %w0,off(%dst) */
1297		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, dst_reg, REG_0, off);
1298		jit->seen |= SEEN_MEM;
1299		break;
1300	/*
1301	 * BPF_ATOMIC
1302	 */
1303	case BPF_STX | BPF_ATOMIC | BPF_DW:
1304	case BPF_STX | BPF_ATOMIC | BPF_W:
1305	{
1306		bool is32 = BPF_SIZE(insn->code) == BPF_W;
1307
1308		switch (insn->imm) {
1309/* {op32|op64} {%w0|%src},%src,off(%dst) */
1310#define EMIT_ATOMIC(op32, op64) do {					\
1311	EMIT6_DISP_LH(0xeb000000, is32 ? (op32) : (op64),		\
1312		      (insn->imm & BPF_FETCH) ? src_reg : REG_W0,	\
1313		      src_reg, dst_reg, off);				\
1314	if (is32 && (insn->imm & BPF_FETCH))				\
1315		EMIT_ZERO(src_reg);					\
1316} while (0)
1317		case BPF_ADD:
1318		case BPF_ADD | BPF_FETCH:
1319			/* {laal|laalg} */
1320			EMIT_ATOMIC(0x00fa, 0x00ea);
1321			break;
1322		case BPF_AND:
1323		case BPF_AND | BPF_FETCH:
1324			/* {lan|lang} */
1325			EMIT_ATOMIC(0x00f4, 0x00e4);
1326			break;
1327		case BPF_OR:
1328		case BPF_OR | BPF_FETCH:
1329			/* {lao|laog} */
1330			EMIT_ATOMIC(0x00f6, 0x00e6);
1331			break;
1332		case BPF_XOR:
1333		case BPF_XOR | BPF_FETCH:
1334			/* {lax|laxg} */
1335			EMIT_ATOMIC(0x00f7, 0x00e7);
1336			break;
1337#undef EMIT_ATOMIC
1338		case BPF_XCHG:
1339			/* {ly|lg} %w0,off(%dst) */
1340			EMIT6_DISP_LH(0xe3000000,
1341				      is32 ? 0x0058 : 0x0004, REG_W0, REG_0,
1342				      dst_reg, off);
1343			/* 0: {csy|csg} %w0,%src,off(%dst) */
1344			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1345				      REG_W0, src_reg, dst_reg, off);
1346			/* brc 4,0b */
1347			EMIT4_PCREL_RIC(0xa7040000, 4, jit->prg - 6);
1348			/* {llgfr|lgr} %src,%w0 */
1349			EMIT4(is32 ? 0xb9160000 : 0xb9040000, src_reg, REG_W0);
1350			if (is32 && insn_is_zext(&insn[1]))
1351				insn_count = 2;
1352			break;
1353		case BPF_CMPXCHG:
1354			/* 0: {csy|csg} %b0,%src,off(%dst) */
1355			EMIT6_DISP_LH(0xeb000000, is32 ? 0x0014 : 0x0030,
1356				      BPF_REG_0, src_reg, dst_reg, off);
1357			break;
1358		default:
1359			pr_err("Unknown atomic operation %02x\n", insn->imm);
1360			return -1;
1361		}
1362
1363		jit->seen |= SEEN_MEM;
1364		break;
1365	}
1366	/*
1367	 * BPF_LDX
1368	 */
1369	case BPF_LDX | BPF_MEM | BPF_B: /* dst = *(u8 *)(ul) (src + off) */
1370	case BPF_LDX | BPF_PROBE_MEM | BPF_B:
1371		/* llgc %dst,0(off,%src) */
1372		EMIT6_DISP_LH(0xe3000000, 0x0090, dst_reg, src_reg, REG_0, off);
1373		jit->seen |= SEEN_MEM;
1374		if (insn_is_zext(&insn[1]))
1375			insn_count = 2;
1376		break;
1377	case BPF_LDX | BPF_MEM | BPF_H: /* dst = *(u16 *)(ul) (src + off) */
1378	case BPF_LDX | BPF_PROBE_MEM | BPF_H:
1379		/* llgh %dst,0(off,%src) */
1380		EMIT6_DISP_LH(0xe3000000, 0x0091, dst_reg, src_reg, REG_0, off);
1381		jit->seen |= SEEN_MEM;
1382		if (insn_is_zext(&insn[1]))
1383			insn_count = 2;
1384		break;
1385	case BPF_LDX | BPF_MEM | BPF_W: /* dst = *(u32 *)(ul) (src + off) */
1386	case BPF_LDX | BPF_PROBE_MEM | BPF_W:
1387		/* llgf %dst,off(%src) */
1388		jit->seen |= SEEN_MEM;
1389		EMIT6_DISP_LH(0xe3000000, 0x0016, dst_reg, src_reg, REG_0, off);
1390		if (insn_is_zext(&insn[1]))
1391			insn_count = 2;
1392		break;
1393	case BPF_LDX | BPF_MEM | BPF_DW: /* dst = *(u64 *)(ul) (src + off) */
1394	case BPF_LDX | BPF_PROBE_MEM | BPF_DW:
1395		/* lg %dst,0(off,%src) */
1396		jit->seen |= SEEN_MEM;
1397		EMIT6_DISP_LH(0xe3000000, 0x0004, dst_reg, src_reg, REG_0, off);
1398		break;
1399	/*
1400	 * BPF_JMP / CALL
1401	 */
1402	case BPF_JMP | BPF_CALL:
1403	{
1404		const struct btf_func_model *m;
1405		bool func_addr_fixed;
1406		int j, ret;
1407		u64 func;
1408
1409		ret = bpf_jit_get_func_addr(fp, insn, extra_pass,
1410					    &func, &func_addr_fixed);
1411		if (ret < 0)
1412			return -1;
1413
1414		REG_SET_SEEN(BPF_REG_5);
1415		jit->seen |= SEEN_FUNC;
1416		/*
1417		 * Copy the tail call counter to where the callee expects it.
1418		 *
1419		 * Note 1: The callee can increment the tail call counter, but
1420		 * we do not load it back, since the x86 JIT does not do this
1421		 * either.
1422		 *
1423		 * Note 2: We assume that the verifier does not let us call the
1424		 * main program, which clears the tail call counter on entry.
1425		 */
1426		/* mvc STK_OFF_TCCNT(4,%r15),N(%r15) */
1427		_EMIT6(0xd203f000 | STK_OFF_TCCNT,
1428		       0xf000 | (STK_OFF_TCCNT + STK_OFF + stack_depth));
1429
1430		/* Sign-extend the kfunc arguments. */
1431		if (insn->src_reg == BPF_PSEUDO_KFUNC_CALL) {
1432			m = bpf_jit_find_kfunc_model(fp, insn);
1433			if (!m)
1434				return -1;
1435
1436			for (j = 0; j < m->nr_args; j++) {
1437				if (sign_extend(jit, BPF_REG_1 + j,
1438						m->arg_size[j],
1439						m->arg_flags[j]))
1440					return -1;
1441			}
1442		}
1443
1444		/* lgrl %w1,func */
1445		EMIT6_PCREL_RILB(0xc4080000, REG_W1, _EMIT_CONST_U64(func));
1446		/* %r1() */
1447		call_r1(jit);
1448		/* lgr %b0,%r2: load return value into %b0 */
1449		EMIT4(0xb9040000, BPF_REG_0, REG_2);
1450		break;
1451	}
1452	case BPF_JMP | BPF_TAIL_CALL: {
1453		int patch_1_clrj, patch_2_clij, patch_3_brc;
1454
1455		/*
1456		 * Implicit input:
1457		 *  B1: pointer to ctx
1458		 *  B2: pointer to bpf_array
1459		 *  B3: index in bpf_array
1460		 *
1461		 * if (index >= array->map.max_entries)
1462		 *         goto out;
1463		 */
1464
1465		/* llgf %w1,map.max_entries(%b2) */
1466		EMIT6_DISP_LH(0xe3000000, 0x0016, REG_W1, REG_0, BPF_REG_2,
1467			      offsetof(struct bpf_array, map.max_entries));
1468		/* if ((u32)%b3 >= (u32)%w1) goto out; */
1469		/* clrj %b3,%w1,0xa,out */
1470		patch_1_clrj = jit->prg;
1471		EMIT6_PCREL_RIEB(0xec000000, 0x0077, BPF_REG_3, REG_W1, 0xa,
1472				 jit->prg);
1473
1474		/*
1475		 * if (tail_call_cnt++ >= MAX_TAIL_CALL_CNT)
1476		 *         goto out;
1477		 */
1478
1479		if (jit->seen & SEEN_STACK)
1480			off = STK_OFF_TCCNT + STK_OFF + stack_depth;
1481		else
1482			off = STK_OFF_TCCNT;
1483		/* lhi %w0,1 */
1484		EMIT4_IMM(0xa7080000, REG_W0, 1);
1485		/* laal %w1,%w0,off(%r15) */
1486		EMIT6_DISP_LH(0xeb000000, 0x00fa, REG_W1, REG_W0, REG_15, off);
1487		/* clij %w1,MAX_TAIL_CALL_CNT-1,0x2,out */
1488		patch_2_clij = jit->prg;
1489		EMIT6_PCREL_RIEC(0xec000000, 0x007f, REG_W1, MAX_TAIL_CALL_CNT - 1,
1490				 2, jit->prg);
1491
1492		/*
1493		 * prog = array->ptrs[index];
1494		 * if (prog == NULL)
1495		 *         goto out;
1496		 */
1497
1498		/* llgfr %r1,%b3: %r1 = (u32) index */
1499		EMIT4(0xb9160000, REG_1, BPF_REG_3);
1500		/* sllg %r1,%r1,3: %r1 *= 8 */
1501		EMIT6_DISP_LH(0xeb000000, 0x000d, REG_1, REG_1, REG_0, 3);
1502		/* ltg %r1,prog(%b2,%r1) */
1503		EMIT6_DISP_LH(0xe3000000, 0x0002, REG_1, BPF_REG_2,
1504			      REG_1, offsetof(struct bpf_array, ptrs));
1505		/* brc 0x8,out */
1506		patch_3_brc = jit->prg;
1507		EMIT4_PCREL_RIC(0xa7040000, 8, jit->prg);
1508
1509		/*
1510		 * Restore registers before calling function
1511		 */
1512		save_restore_regs(jit, REGS_RESTORE, stack_depth);
1513
1514		/*
1515		 * goto *(prog->bpf_func + tail_call_start);
1516		 */
1517
1518		/* lg %r1,bpf_func(%r1) */
1519		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_1, REG_0,
1520			      offsetof(struct bpf_prog, bpf_func));
1521		if (nospec_uses_trampoline()) {
1522			jit->seen |= SEEN_FUNC;
1523			/* aghi %r1,tail_call_start */
1524			EMIT4_IMM(0xa70b0000, REG_1, jit->tail_call_start);
1525			/* brcl 0xf,__s390_indirect_jump_r1 */
1526			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->r1_thunk_ip);
1527		} else {
1528			/* bc 0xf,tail_call_start(%r1) */
1529			_EMIT4(0x47f01000 + jit->tail_call_start);
1530		}
1531		/* out: */
1532		if (jit->prg_buf) {
1533			*(u16 *)(jit->prg_buf + patch_1_clrj + 2) =
1534				(jit->prg - patch_1_clrj) >> 1;
1535			*(u16 *)(jit->prg_buf + patch_2_clij + 2) =
1536				(jit->prg - patch_2_clij) >> 1;
1537			*(u16 *)(jit->prg_buf + patch_3_brc + 2) =
1538				(jit->prg - patch_3_brc) >> 1;
1539		}
1540		break;
1541	}
1542	case BPF_JMP | BPF_EXIT: /* return b0 */
1543		last = (i == fp->len - 1) ? 1 : 0;
1544		if (last)
1545			break;
1546		if (!is_first_pass(jit) && can_use_rel(jit, jit->exit_ip))
1547			/* brc 0xf, <exit> */
1548			EMIT4_PCREL_RIC(0xa7040000, 0xf, jit->exit_ip);
1549		else
1550			/* brcl 0xf, <exit> */
1551			EMIT6_PCREL_RILC(0xc0040000, 0xf, jit->exit_ip);
1552		break;
1553	/*
1554	 * Branch relative (number of skipped instructions) to offset on
1555	 * condition.
1556	 *
1557	 * Condition code to mask mapping:
1558	 *
1559	 * CC | Description	   | Mask
1560	 * ------------------------------
1561	 * 0  | Operands equal	   |	8
1562	 * 1  | First operand low  |	4
1563	 * 2  | First operand high |	2
1564	 * 3  | Unused		   |	1
1565	 *
1566	 * For s390x relative branches: ip = ip + off_bytes
1567	 * For BPF relative branches:	insn = insn + off_insns + 1
1568	 *
1569	 * For example for s390x with offset 0 we jump to the branch
1570	 * instruction itself (loop) and for BPF with offset 0 we
1571	 * branch to the instruction behind the branch.
1572	 */
1573	case BPF_JMP | BPF_JA: /* if (true) */
1574		mask = 0xf000; /* j */
1575		goto branch_oc;
1576	case BPF_JMP | BPF_JSGT | BPF_K: /* ((s64) dst > (s64) imm) */
1577	case BPF_JMP32 | BPF_JSGT | BPF_K: /* ((s32) dst > (s32) imm) */
1578		mask = 0x2000; /* jh */
1579		goto branch_ks;
1580	case BPF_JMP | BPF_JSLT | BPF_K: /* ((s64) dst < (s64) imm) */
1581	case BPF_JMP32 | BPF_JSLT | BPF_K: /* ((s32) dst < (s32) imm) */
1582		mask = 0x4000; /* jl */
1583		goto branch_ks;
1584	case BPF_JMP | BPF_JSGE | BPF_K: /* ((s64) dst >= (s64) imm) */
1585	case BPF_JMP32 | BPF_JSGE | BPF_K: /* ((s32) dst >= (s32) imm) */
1586		mask = 0xa000; /* jhe */
1587		goto branch_ks;
1588	case BPF_JMP | BPF_JSLE | BPF_K: /* ((s64) dst <= (s64) imm) */
1589	case BPF_JMP32 | BPF_JSLE | BPF_K: /* ((s32) dst <= (s32) imm) */
1590		mask = 0xc000; /* jle */
1591		goto branch_ks;
1592	case BPF_JMP | BPF_JGT | BPF_K: /* (dst_reg > imm) */
1593	case BPF_JMP32 | BPF_JGT | BPF_K: /* ((u32) dst_reg > (u32) imm) */
1594		mask = 0x2000; /* jh */
1595		goto branch_ku;
1596	case BPF_JMP | BPF_JLT | BPF_K: /* (dst_reg < imm) */
1597	case BPF_JMP32 | BPF_JLT | BPF_K: /* ((u32) dst_reg < (u32) imm) */
1598		mask = 0x4000; /* jl */
1599		goto branch_ku;
1600	case BPF_JMP | BPF_JGE | BPF_K: /* (dst_reg >= imm) */
1601	case BPF_JMP32 | BPF_JGE | BPF_K: /* ((u32) dst_reg >= (u32) imm) */
1602		mask = 0xa000; /* jhe */
1603		goto branch_ku;
1604	case BPF_JMP | BPF_JLE | BPF_K: /* (dst_reg <= imm) */
1605	case BPF_JMP32 | BPF_JLE | BPF_K: /* ((u32) dst_reg <= (u32) imm) */
1606		mask = 0xc000; /* jle */
1607		goto branch_ku;
1608	case BPF_JMP | BPF_JNE | BPF_K: /* (dst_reg != imm) */
1609	case BPF_JMP32 | BPF_JNE | BPF_K: /* ((u32) dst_reg != (u32) imm) */
1610		mask = 0x7000; /* jne */
1611		goto branch_ku;
1612	case BPF_JMP | BPF_JEQ | BPF_K: /* (dst_reg == imm) */
1613	case BPF_JMP32 | BPF_JEQ | BPF_K: /* ((u32) dst_reg == (u32) imm) */
1614		mask = 0x8000; /* je */
1615		goto branch_ku;
1616	case BPF_JMP | BPF_JSET | BPF_K: /* (dst_reg & imm) */
1617	case BPF_JMP32 | BPF_JSET | BPF_K: /* ((u32) dst_reg & (u32) imm) */
1618		mask = 0x7000; /* jnz */
1619		if (BPF_CLASS(insn->code) == BPF_JMP32) {
1620			/* llilf %w1,imm (load zero extend imm) */
1621			EMIT6_IMM(0xc00f0000, REG_W1, imm);
1622			/* nr %w1,%dst */
1623			EMIT2(0x1400, REG_W1, dst_reg);
1624		} else {
1625			/* lgfi %w1,imm (load sign extend imm) */
1626			EMIT6_IMM(0xc0010000, REG_W1, imm);
1627			/* ngr %w1,%dst */
1628			EMIT4(0xb9800000, REG_W1, dst_reg);
1629		}
1630		goto branch_oc;
1631
1632	case BPF_JMP | BPF_JSGT | BPF_X: /* ((s64) dst > (s64) src) */
1633	case BPF_JMP32 | BPF_JSGT | BPF_X: /* ((s32) dst > (s32) src) */
1634		mask = 0x2000; /* jh */
1635		goto branch_xs;
1636	case BPF_JMP | BPF_JSLT | BPF_X: /* ((s64) dst < (s64) src) */
1637	case BPF_JMP32 | BPF_JSLT | BPF_X: /* ((s32) dst < (s32) src) */
1638		mask = 0x4000; /* jl */
1639		goto branch_xs;
1640	case BPF_JMP | BPF_JSGE | BPF_X: /* ((s64) dst >= (s64) src) */
1641	case BPF_JMP32 | BPF_JSGE | BPF_X: /* ((s32) dst >= (s32) src) */
1642		mask = 0xa000; /* jhe */
1643		goto branch_xs;
1644	case BPF_JMP | BPF_JSLE | BPF_X: /* ((s64) dst <= (s64) src) */
1645	case BPF_JMP32 | BPF_JSLE | BPF_X: /* ((s32) dst <= (s32) src) */
1646		mask = 0xc000; /* jle */
1647		goto branch_xs;
1648	case BPF_JMP | BPF_JGT | BPF_X: /* (dst > src) */
1649	case BPF_JMP32 | BPF_JGT | BPF_X: /* ((u32) dst > (u32) src) */
1650		mask = 0x2000; /* jh */
1651		goto branch_xu;
1652	case BPF_JMP | BPF_JLT | BPF_X: /* (dst < src) */
1653	case BPF_JMP32 | BPF_JLT | BPF_X: /* ((u32) dst < (u32) src) */
1654		mask = 0x4000; /* jl */
1655		goto branch_xu;
1656	case BPF_JMP | BPF_JGE | BPF_X: /* (dst >= src) */
1657	case BPF_JMP32 | BPF_JGE | BPF_X: /* ((u32) dst >= (u32) src) */
1658		mask = 0xa000; /* jhe */
1659		goto branch_xu;
1660	case BPF_JMP | BPF_JLE | BPF_X: /* (dst <= src) */
1661	case BPF_JMP32 | BPF_JLE | BPF_X: /* ((u32) dst <= (u32) src) */
1662		mask = 0xc000; /* jle */
1663		goto branch_xu;
1664	case BPF_JMP | BPF_JNE | BPF_X: /* (dst != src) */
1665	case BPF_JMP32 | BPF_JNE | BPF_X: /* ((u32) dst != (u32) src) */
1666		mask = 0x7000; /* jne */
1667		goto branch_xu;
1668	case BPF_JMP | BPF_JEQ | BPF_X: /* (dst == src) */
1669	case BPF_JMP32 | BPF_JEQ | BPF_X: /* ((u32) dst == (u32) src) */
1670		mask = 0x8000; /* je */
1671		goto branch_xu;
1672	case BPF_JMP | BPF_JSET | BPF_X: /* (dst & src) */
1673	case BPF_JMP32 | BPF_JSET | BPF_X: /* ((u32) dst & (u32) src) */
1674	{
1675		bool is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1676
1677		mask = 0x7000; /* jnz */
1678		/* nrk or ngrk %w1,%dst,%src */
1679		EMIT4_RRF((is_jmp32 ? 0xb9f40000 : 0xb9e40000),
1680			  REG_W1, dst_reg, src_reg);
1681		goto branch_oc;
1682branch_ks:
1683		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1684		/* cfi or cgfi %dst,imm */
1685		EMIT6_IMM(is_jmp32 ? 0xc20d0000 : 0xc20c0000,
1686			  dst_reg, imm);
1687		if (!is_first_pass(jit) &&
1688		    can_use_rel(jit, addrs[i + off + 1])) {
1689			/* brc mask,off */
1690			EMIT4_PCREL_RIC(0xa7040000,
1691					mask >> 12, addrs[i + off + 1]);
1692		} else {
1693			/* brcl mask,off */
1694			EMIT6_PCREL_RILC(0xc0040000,
1695					 mask >> 12, addrs[i + off + 1]);
1696		}
1697		break;
1698branch_ku:
1699		/* lgfi %w1,imm (load sign extend imm) */
1700		src_reg = REG_1;
1701		EMIT6_IMM(0xc0010000, src_reg, imm);
1702		goto branch_xu;
1703branch_xs:
1704		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1705		if (!is_first_pass(jit) &&
1706		    can_use_rel(jit, addrs[i + off + 1])) {
1707			/* crj or cgrj %dst,%src,mask,off */
1708			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0076 : 0x0064),
1709				    dst_reg, src_reg, i, off, mask);
1710		} else {
1711			/* cr or cgr %dst,%src */
1712			if (is_jmp32)
1713				EMIT2(0x1900, dst_reg, src_reg);
1714			else
1715				EMIT4(0xb9200000, dst_reg, src_reg);
1716			/* brcl mask,off */
1717			EMIT6_PCREL_RILC(0xc0040000,
1718					 mask >> 12, addrs[i + off + 1]);
1719		}
1720		break;
1721branch_xu:
1722		is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
1723		if (!is_first_pass(jit) &&
1724		    can_use_rel(jit, addrs[i + off + 1])) {
1725			/* clrj or clgrj %dst,%src,mask,off */
1726			EMIT6_PCREL(0xec000000, (is_jmp32 ? 0x0077 : 0x0065),
1727				    dst_reg, src_reg, i, off, mask);
1728		} else {
1729			/* clr or clgr %dst,%src */
1730			if (is_jmp32)
1731				EMIT2(0x1500, dst_reg, src_reg);
1732			else
1733				EMIT4(0xb9210000, dst_reg, src_reg);
1734			/* brcl mask,off */
1735			EMIT6_PCREL_RILC(0xc0040000,
1736					 mask >> 12, addrs[i + off + 1]);
1737		}
1738		break;
1739branch_oc:
1740		if (!is_first_pass(jit) &&
1741		    can_use_rel(jit, addrs[i + off + 1])) {
1742			/* brc mask,off */
1743			EMIT4_PCREL_RIC(0xa7040000,
1744					mask >> 12, addrs[i + off + 1]);
1745		} else {
1746			/* brcl mask,off */
1747			EMIT6_PCREL_RILC(0xc0040000,
1748					 mask >> 12, addrs[i + off + 1]);
1749		}
1750		break;
1751	}
1752	default: /* too complex, give up */
1753		pr_err("Unknown opcode %02x\n", insn->code);
1754		return -1;
1755	}
1756
1757	if (probe_prg != -1) {
1758		/*
1759		 * Handlers of certain exceptions leave psw.addr pointing to
1760		 * the instruction directly after the failing one. Therefore,
1761		 * create two exception table entries and also add a nop in
1762		 * case two probing instructions come directly after each
1763		 * other.
1764		 */
1765		nop_prg = jit->prg;
1766		/* bcr 0,%0 */
1767		_EMIT2(0x0700);
1768		err = bpf_jit_probe_mem(jit, fp, probe_prg, nop_prg);
1769		if (err < 0)
1770			return err;
1771	}
1772
1773	return insn_count;
1774}
1775
1776/*
1777 * Return whether new i-th instruction address does not violate any invariant
1778 */
1779static bool bpf_is_new_addr_sane(struct bpf_jit *jit, int i)
1780{
1781	/* On the first pass anything goes */
1782	if (is_first_pass(jit))
1783		return true;
1784
1785	/* The codegen pass must not change anything */
1786	if (is_codegen_pass(jit))
1787		return jit->addrs[i] == jit->prg;
1788
1789	/* Passes in between must not increase code size */
1790	return jit->addrs[i] >= jit->prg;
1791}
1792
1793/*
1794 * Update the address of i-th instruction
1795 */
1796static int bpf_set_addr(struct bpf_jit *jit, int i)
1797{
1798	int delta;
1799
1800	if (is_codegen_pass(jit)) {
1801		delta = jit->prg - jit->addrs[i];
1802		if (delta < 0)
1803			bpf_skip(jit, -delta);
1804	}
1805	if (WARN_ON_ONCE(!bpf_is_new_addr_sane(jit, i)))
1806		return -1;
1807	jit->addrs[i] = jit->prg;
1808	return 0;
1809}
1810
1811/*
1812 * Compile eBPF program into s390x code
1813 */
1814static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
1815			bool extra_pass, u32 stack_depth)
1816{
1817	int i, insn_count, lit32_size, lit64_size;
1818
1819	jit->lit32 = jit->lit32_start;
1820	jit->lit64 = jit->lit64_start;
1821	jit->prg = 0;
1822	jit->excnt = 0;
1823
1824	bpf_jit_prologue(jit, fp, stack_depth);
1825	if (bpf_set_addr(jit, 0) < 0)
1826		return -1;
1827	for (i = 0; i < fp->len; i += insn_count) {
1828		insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
1829		if (insn_count < 0)
1830			return -1;
1831		/* Next instruction address */
1832		if (bpf_set_addr(jit, i + insn_count) < 0)
1833			return -1;
1834	}
1835	bpf_jit_epilogue(jit, stack_depth);
1836
1837	lit32_size = jit->lit32 - jit->lit32_start;
1838	lit64_size = jit->lit64 - jit->lit64_start;
1839	jit->lit32_start = jit->prg;
1840	if (lit32_size)
1841		jit->lit32_start = ALIGN(jit->lit32_start, 4);
1842	jit->lit64_start = jit->lit32_start + lit32_size;
1843	if (lit64_size)
1844		jit->lit64_start = ALIGN(jit->lit64_start, 8);
1845	jit->size = jit->lit64_start + lit64_size;
1846	jit->size_prg = jit->prg;
1847
1848	if (WARN_ON_ONCE(fp->aux->extable &&
1849			 jit->excnt != fp->aux->num_exentries))
1850		/* Verifier bug - too many entries. */
1851		return -1;
1852
1853	return 0;
1854}
1855
1856bool bpf_jit_needs_zext(void)
1857{
1858	return true;
1859}
1860
1861struct s390_jit_data {
1862	struct bpf_binary_header *header;
1863	struct bpf_jit ctx;
1864	int pass;
1865};
1866
1867static struct bpf_binary_header *bpf_jit_alloc(struct bpf_jit *jit,
1868					       struct bpf_prog *fp)
1869{
1870	struct bpf_binary_header *header;
1871	u32 extable_size;
1872	u32 code_size;
1873
1874	/* We need two entries per insn. */
1875	fp->aux->num_exentries *= 2;
1876
1877	code_size = roundup(jit->size,
1878			    __alignof__(struct exception_table_entry));
1879	extable_size = fp->aux->num_exentries *
1880		sizeof(struct exception_table_entry);
1881	header = bpf_jit_binary_alloc(code_size + extable_size, &jit->prg_buf,
1882				      8, jit_fill_hole);
1883	if (!header)
1884		return NULL;
1885	fp->aux->extable = (struct exception_table_entry *)
1886		(jit->prg_buf + code_size);
1887	return header;
1888}
1889
1890/*
1891 * Compile eBPF program "fp"
1892 */
1893struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
1894{
1895	u32 stack_depth = round_up(fp->aux->stack_depth, 8);
1896	struct bpf_prog *tmp, *orig_fp = fp;
1897	struct bpf_binary_header *header;
1898	struct s390_jit_data *jit_data;
1899	bool tmp_blinded = false;
1900	bool extra_pass = false;
1901	struct bpf_jit jit;
1902	int pass;
1903
1904	if (WARN_ON_ONCE(bpf_plt_end - bpf_plt != BPF_PLT_SIZE))
1905		return orig_fp;
1906
1907	if (!fp->jit_requested)
1908		return orig_fp;
1909
1910	tmp = bpf_jit_blind_constants(fp);
1911	/*
1912	 * If blinding was requested and we failed during blinding,
1913	 * we must fall back to the interpreter.
1914	 */
1915	if (IS_ERR(tmp))
1916		return orig_fp;
1917	if (tmp != fp) {
1918		tmp_blinded = true;
1919		fp = tmp;
1920	}
1921
1922	jit_data = fp->aux->jit_data;
1923	if (!jit_data) {
1924		jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL);
1925		if (!jit_data) {
1926			fp = orig_fp;
1927			goto out;
1928		}
1929		fp->aux->jit_data = jit_data;
1930	}
1931	if (jit_data->ctx.addrs) {
1932		jit = jit_data->ctx;
1933		header = jit_data->header;
1934		extra_pass = true;
1935		pass = jit_data->pass + 1;
1936		goto skip_init_ctx;
1937	}
1938
1939	memset(&jit, 0, sizeof(jit));
1940	jit.addrs = kvcalloc(fp->len + 1, sizeof(*jit.addrs), GFP_KERNEL);
1941	if (jit.addrs == NULL) {
1942		fp = orig_fp;
1943		goto free_addrs;
1944	}
1945	/*
1946	 * Three initial passes:
1947	 *   - 1/2: Determine clobbered registers
1948	 *   - 3:   Calculate program size and addrs array
1949	 */
1950	for (pass = 1; pass <= 3; pass++) {
1951		if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
1952			fp = orig_fp;
1953			goto free_addrs;
1954		}
1955	}
1956	/*
1957	 * Final pass: Allocate and generate program
1958	 */
1959	header = bpf_jit_alloc(&jit, fp);
1960	if (!header) {
1961		fp = orig_fp;
1962		goto free_addrs;
1963	}
1964skip_init_ctx:
1965	if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
1966		bpf_jit_binary_free(header);
1967		fp = orig_fp;
1968		goto free_addrs;
1969	}
1970	if (bpf_jit_enable > 1) {
1971		bpf_jit_dump(fp->len, jit.size, pass, jit.prg_buf);
1972		print_fn_code(jit.prg_buf, jit.size_prg);
1973	}
1974	if (!fp->is_func || extra_pass) {
1975		bpf_jit_binary_lock_ro(header);
1976	} else {
1977		jit_data->header = header;
1978		jit_data->ctx = jit;
1979		jit_data->pass = pass;
1980	}
1981	fp->bpf_func = (void *) jit.prg_buf;
1982	fp->jited = 1;
1983	fp->jited_len = jit.size;
1984
1985	if (!fp->is_func || extra_pass) {
1986		bpf_prog_fill_jited_linfo(fp, jit.addrs + 1);
1987free_addrs:
1988		kvfree(jit.addrs);
1989		kfree(jit_data);
1990		fp->aux->jit_data = NULL;
1991	}
1992out:
1993	if (tmp_blinded)
1994		bpf_jit_prog_release_other(fp, fp == orig_fp ?
1995					   tmp : orig_fp);
1996	return fp;
1997}
1998
1999bool bpf_jit_supports_kfunc_call(void)
2000{
2001	return true;
2002}
2003
2004bool bpf_jit_supports_far_kfunc_call(void)
2005{
2006	return true;
2007}
2008
2009int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
2010		       void *old_addr, void *new_addr)
2011{
2012	struct {
2013		u16 opc;
2014		s32 disp;
2015	} __packed insn;
2016	char expected_plt[BPF_PLT_SIZE];
2017	char current_plt[BPF_PLT_SIZE];
2018	char new_plt[BPF_PLT_SIZE];
2019	char *plt;
2020	char *ret;
2021	int err;
2022
2023	/* Verify the branch to be patched. */
2024	err = copy_from_kernel_nofault(&insn, ip, sizeof(insn));
2025	if (err < 0)
2026		return err;
2027	if (insn.opc != (0xc004 | (old_addr ? 0xf0 : 0)))
2028		return -EINVAL;
2029
2030	if (t == BPF_MOD_JUMP &&
2031	    insn.disp == ((char *)new_addr - (char *)ip) >> 1) {
2032		/*
2033		 * The branch already points to the destination,
2034		 * there is no PLT.
2035		 */
2036	} else {
2037		/* Verify the PLT. */
2038		plt = (char *)ip + (insn.disp << 1);
2039		err = copy_from_kernel_nofault(current_plt, plt, BPF_PLT_SIZE);
2040		if (err < 0)
2041			return err;
2042		ret = (char *)ip + 6;
2043		bpf_jit_plt(expected_plt, ret, old_addr);
2044		if (memcmp(current_plt, expected_plt, BPF_PLT_SIZE))
2045			return -EINVAL;
2046		/* Adjust the call address. */
2047		bpf_jit_plt(new_plt, ret, new_addr);
2048		s390_kernel_write(plt + (bpf_plt_target - bpf_plt),
2049				  new_plt + (bpf_plt_target - bpf_plt),
2050				  sizeof(void *));
2051	}
2052
2053	/* Adjust the mask of the branch. */
2054	insn.opc = 0xc004 | (new_addr ? 0xf0 : 0);
2055	s390_kernel_write((char *)ip + 1, (char *)&insn.opc + 1, 1);
2056
2057	/* Make the new code visible to the other CPUs. */
2058	text_poke_sync_lock();
2059
2060	return 0;
2061}
2062
2063struct bpf_tramp_jit {
2064	struct bpf_jit common;
2065	int orig_stack_args_off;/* Offset of arguments placed on stack by the
2066				 * func_addr's original caller
2067				 */
2068	int stack_size;		/* Trampoline stack size */
2069	int backchain_off;	/* Offset of backchain */
2070	int stack_args_off;	/* Offset of stack arguments for calling
2071				 * func_addr, has to be at the top
2072				 */
2073	int reg_args_off;	/* Offset of register arguments for calling
2074				 * func_addr
2075				 */
2076	int ip_off;		/* For bpf_get_func_ip(), has to be at
2077				 * (ctx - 16)
2078				 */
2079	int arg_cnt_off;	/* For bpf_get_func_arg_cnt(), has to be at
2080				 * (ctx - 8)
2081				 */
2082	int bpf_args_off;	/* Offset of BPF_PROG context, which consists
2083				 * of BPF arguments followed by return value
2084				 */
2085	int retval_off;		/* Offset of return value (see above) */
2086	int r7_r8_off;		/* Offset of saved %r7 and %r8, which are used
2087				 * for __bpf_prog_enter() return value and
2088				 * func_addr respectively
2089				 */
2090	int run_ctx_off;	/* Offset of struct bpf_tramp_run_ctx */
2091	int tccnt_off;		/* Offset of saved tailcall counter */
2092	int r14_off;		/* Offset of saved %r14, has to be at the
2093				 * bottom */
2094	int do_fexit;		/* do_fexit: label */
2095};
2096
2097static void load_imm64(struct bpf_jit *jit, int dst_reg, u64 val)
2098{
2099	/* llihf %dst_reg,val_hi */
2100	EMIT6_IMM(0xc00e0000, dst_reg, (val >> 32));
2101	/* oilf %rdst_reg,val_lo */
2102	EMIT6_IMM(0xc00d0000, dst_reg, val);
2103}
2104
2105static int invoke_bpf_prog(struct bpf_tramp_jit *tjit,
2106			   const struct btf_func_model *m,
2107			   struct bpf_tramp_link *tlink, bool save_ret)
2108{
2109	struct bpf_jit *jit = &tjit->common;
2110	int cookie_off = tjit->run_ctx_off +
2111			 offsetof(struct bpf_tramp_run_ctx, bpf_cookie);
2112	struct bpf_prog *p = tlink->link.prog;
2113	int patch;
2114
2115	/*
2116	 * run_ctx.cookie = tlink->cookie;
2117	 */
2118
2119	/* %r0 = tlink->cookie */
2120	load_imm64(jit, REG_W0, tlink->cookie);
2121	/* stg %r0,cookie_off(%r15) */
2122	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_W0, REG_0, REG_15, cookie_off);
2123
2124	/*
2125	 * if ((start = __bpf_prog_enter(p, &run_ctx)) == 0)
2126	 *         goto skip;
2127	 */
2128
2129	/* %r1 = __bpf_prog_enter */
2130	load_imm64(jit, REG_1, (u64)bpf_trampoline_enter(p));
2131	/* %r2 = p */
2132	load_imm64(jit, REG_2, (u64)p);
2133	/* la %r3,run_ctx_off(%r15) */
2134	EMIT4_DISP(0x41000000, REG_3, REG_15, tjit->run_ctx_off);
2135	/* %r1() */
2136	call_r1(jit);
2137	/* ltgr %r7,%r2 */
2138	EMIT4(0xb9020000, REG_7, REG_2);
2139	/* brcl 8,skip */
2140	patch = jit->prg;
2141	EMIT6_PCREL_RILC(0xc0040000, 8, 0);
2142
2143	/*
2144	 * retval = bpf_func(args, p->insnsi);
2145	 */
2146
2147	/* %r1 = p->bpf_func */
2148	load_imm64(jit, REG_1, (u64)p->bpf_func);
2149	/* la %r2,bpf_args_off(%r15) */
2150	EMIT4_DISP(0x41000000, REG_2, REG_15, tjit->bpf_args_off);
2151	/* %r3 = p->insnsi */
2152	if (!p->jited)
2153		load_imm64(jit, REG_3, (u64)p->insnsi);
2154	/* %r1() */
2155	call_r1(jit);
2156	/* stg %r2,retval_off(%r15) */
2157	if (save_ret) {
2158		if (sign_extend(jit, REG_2, m->ret_size, m->ret_flags))
2159			return -1;
2160		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2161			      tjit->retval_off);
2162	}
2163
2164	/* skip: */
2165	if (jit->prg_buf)
2166		*(u32 *)&jit->prg_buf[patch + 2] = (jit->prg - patch) >> 1;
2167
2168	/*
2169	 * __bpf_prog_exit(p, start, &run_ctx);
2170	 */
2171
2172	/* %r1 = __bpf_prog_exit */
2173	load_imm64(jit, REG_1, (u64)bpf_trampoline_exit(p));
2174	/* %r2 = p */
2175	load_imm64(jit, REG_2, (u64)p);
2176	/* lgr %r3,%r7 */
2177	EMIT4(0xb9040000, REG_3, REG_7);
2178	/* la %r4,run_ctx_off(%r15) */
2179	EMIT4_DISP(0x41000000, REG_4, REG_15, tjit->run_ctx_off);
2180	/* %r1() */
2181	call_r1(jit);
2182
2183	return 0;
2184}
2185
2186static int alloc_stack(struct bpf_tramp_jit *tjit, size_t size)
2187{
2188	int stack_offset = tjit->stack_size;
2189
2190	tjit->stack_size += size;
2191	return stack_offset;
2192}
2193
2194/* ABI uses %r2 - %r6 for parameter passing. */
2195#define MAX_NR_REG_ARGS 5
2196
2197/* The "L" field of the "mvc" instruction is 8 bits. */
2198#define MAX_MVC_SIZE 256
2199#define MAX_NR_STACK_ARGS (MAX_MVC_SIZE / sizeof(u64))
2200
2201/* -mfentry generates a 6-byte nop on s390x. */
2202#define S390X_PATCH_SIZE 6
2203
2204static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
2205					 struct bpf_tramp_jit *tjit,
2206					 const struct btf_func_model *m,
2207					 u32 flags,
2208					 struct bpf_tramp_links *tlinks,
2209					 void *func_addr)
2210{
2211	struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
2212	struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
2213	struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
2214	int nr_bpf_args, nr_reg_args, nr_stack_args;
2215	struct bpf_jit *jit = &tjit->common;
2216	int arg, bpf_arg_off;
2217	int i, j;
2218
2219	/* Support as many stack arguments as "mvc" instruction can handle. */
2220	nr_reg_args = min_t(int, m->nr_args, MAX_NR_REG_ARGS);
2221	nr_stack_args = m->nr_args - nr_reg_args;
2222	if (nr_stack_args > MAX_NR_STACK_ARGS)
2223		return -ENOTSUPP;
2224
2225	/* Return to %r14, since func_addr and %r0 are not available. */
2226	if (!func_addr && !(flags & BPF_TRAMP_F_ORIG_STACK))
2227		flags |= BPF_TRAMP_F_SKIP_FRAME;
2228
2229	/*
2230	 * Compute how many arguments we need to pass to BPF programs.
2231	 * BPF ABI mirrors that of x86_64: arguments that are 16 bytes or
2232	 * smaller are packed into 1 or 2 registers; larger arguments are
2233	 * passed via pointers.
2234	 * In s390x ABI, arguments that are 8 bytes or smaller are packed into
2235	 * a register; larger arguments are passed via pointers.
2236	 * We need to deal with this difference.
2237	 */
2238	nr_bpf_args = 0;
2239	for (i = 0; i < m->nr_args; i++) {
2240		if (m->arg_size[i] <= 8)
2241			nr_bpf_args += 1;
2242		else if (m->arg_size[i] <= 16)
2243			nr_bpf_args += 2;
2244		else
2245			return -ENOTSUPP;
2246	}
2247
2248	/*
2249	 * Calculate the stack layout.
2250	 */
2251
2252	/*
2253	 * Allocate STACK_FRAME_OVERHEAD bytes for the callees. As the s390x
2254	 * ABI requires, put our backchain at the end of the allocated memory.
2255	 */
2256	tjit->stack_size = STACK_FRAME_OVERHEAD;
2257	tjit->backchain_off = tjit->stack_size - sizeof(u64);
2258	tjit->stack_args_off = alloc_stack(tjit, nr_stack_args * sizeof(u64));
2259	tjit->reg_args_off = alloc_stack(tjit, nr_reg_args * sizeof(u64));
2260	tjit->ip_off = alloc_stack(tjit, sizeof(u64));
2261	tjit->arg_cnt_off = alloc_stack(tjit, sizeof(u64));
2262	tjit->bpf_args_off = alloc_stack(tjit, nr_bpf_args * sizeof(u64));
2263	tjit->retval_off = alloc_stack(tjit, sizeof(u64));
2264	tjit->r7_r8_off = alloc_stack(tjit, 2 * sizeof(u64));
2265	tjit->run_ctx_off = alloc_stack(tjit,
2266					sizeof(struct bpf_tramp_run_ctx));
2267	tjit->tccnt_off = alloc_stack(tjit, sizeof(u64));
2268	tjit->r14_off = alloc_stack(tjit, sizeof(u64) * 2);
2269	/*
2270	 * In accordance with the s390x ABI, the caller has allocated
2271	 * STACK_FRAME_OVERHEAD bytes for us. 8 of them contain the caller's
2272	 * backchain, and the rest we can use.
2273	 */
2274	tjit->stack_size -= STACK_FRAME_OVERHEAD - sizeof(u64);
2275	tjit->orig_stack_args_off = tjit->stack_size + STACK_FRAME_OVERHEAD;
2276
2277	/* lgr %r1,%r15 */
2278	EMIT4(0xb9040000, REG_1, REG_15);
2279	/* aghi %r15,-stack_size */
2280	EMIT4_IMM(0xa70b0000, REG_15, -tjit->stack_size);
2281	/* stg %r1,backchain_off(%r15) */
2282	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_1, REG_0, REG_15,
2283		      tjit->backchain_off);
2284	/* mvc tccnt_off(4,%r15),stack_size+STK_OFF_TCCNT(%r15) */
2285	_EMIT6(0xd203f000 | tjit->tccnt_off,
2286	       0xf000 | (tjit->stack_size + STK_OFF_TCCNT));
2287	/* stmg %r2,%rN,fwd_reg_args_off(%r15) */
2288	if (nr_reg_args)
2289		EMIT6_DISP_LH(0xeb000000, 0x0024, REG_2,
2290			      REG_2 + (nr_reg_args - 1), REG_15,
2291			      tjit->reg_args_off);
2292	for (i = 0, j = 0; i < m->nr_args; i++) {
2293		if (i < MAX_NR_REG_ARGS)
2294			arg = REG_2 + i;
2295		else
2296			arg = tjit->orig_stack_args_off +
2297			      (i - MAX_NR_REG_ARGS) * sizeof(u64);
2298		bpf_arg_off = tjit->bpf_args_off + j * sizeof(u64);
2299		if (m->arg_size[i] <= 8) {
2300			if (i < MAX_NR_REG_ARGS)
2301				/* stg %arg,bpf_arg_off(%r15) */
2302				EMIT6_DISP_LH(0xe3000000, 0x0024, arg,
2303					      REG_0, REG_15, bpf_arg_off);
2304			else
2305				/* mvc bpf_arg_off(8,%r15),arg(%r15) */
2306				_EMIT6(0xd207f000 | bpf_arg_off,
2307				       0xf000 | arg);
2308			j += 1;
2309		} else {
2310			if (i < MAX_NR_REG_ARGS) {
2311				/* mvc bpf_arg_off(16,%r15),0(%arg) */
2312				_EMIT6(0xd20ff000 | bpf_arg_off,
2313				       reg2hex[arg] << 12);
2314			} else {
2315				/* lg %r1,arg(%r15) */
2316				EMIT6_DISP_LH(0xe3000000, 0x0004, REG_1, REG_0,
2317					      REG_15, arg);
2318				/* mvc bpf_arg_off(16,%r15),0(%r1) */
2319				_EMIT6(0xd20ff000 | bpf_arg_off, 0x1000);
2320			}
2321			j += 2;
2322		}
2323	}
2324	/* stmg %r7,%r8,r7_r8_off(%r15) */
2325	EMIT6_DISP_LH(0xeb000000, 0x0024, REG_7, REG_8, REG_15,
2326		      tjit->r7_r8_off);
2327	/* stg %r14,r14_off(%r15) */
2328	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_14, REG_0, REG_15, tjit->r14_off);
2329
2330	if (flags & BPF_TRAMP_F_ORIG_STACK) {
2331		/*
2332		 * The ftrace trampoline puts the return address (which is the
2333		 * address of the original function + S390X_PATCH_SIZE) into
2334		 * %r0; see ftrace_shared_hotpatch_trampoline_br and
2335		 * ftrace_init_nop() for details.
2336		 */
2337
2338		/* lgr %r8,%r0 */
2339		EMIT4(0xb9040000, REG_8, REG_0);
2340	} else {
2341		/* %r8 = func_addr + S390X_PATCH_SIZE */
2342		load_imm64(jit, REG_8, (u64)func_addr + S390X_PATCH_SIZE);
2343	}
2344
2345	/*
2346	 * ip = func_addr;
2347	 * arg_cnt = m->nr_args;
2348	 */
2349
2350	if (flags & BPF_TRAMP_F_IP_ARG) {
2351		/* %r0 = func_addr */
2352		load_imm64(jit, REG_0, (u64)func_addr);
2353		/* stg %r0,ip_off(%r15) */
2354		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2355			      tjit->ip_off);
2356	}
2357	/* lghi %r0,nr_bpf_args */
2358	EMIT4_IMM(0xa7090000, REG_0, nr_bpf_args);
2359	/* stg %r0,arg_cnt_off(%r15) */
2360	EMIT6_DISP_LH(0xe3000000, 0x0024, REG_0, REG_0, REG_15,
2361		      tjit->arg_cnt_off);
2362
2363	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2364		/*
2365		 * __bpf_tramp_enter(im);
2366		 */
2367
2368		/* %r1 = __bpf_tramp_enter */
2369		load_imm64(jit, REG_1, (u64)__bpf_tramp_enter);
2370		/* %r2 = im */
2371		load_imm64(jit, REG_2, (u64)im);
2372		/* %r1() */
2373		call_r1(jit);
2374	}
2375
2376	for (i = 0; i < fentry->nr_links; i++)
2377		if (invoke_bpf_prog(tjit, m, fentry->links[i],
2378				    flags & BPF_TRAMP_F_RET_FENTRY_RET))
2379			return -EINVAL;
2380
2381	if (fmod_ret->nr_links) {
2382		/*
2383		 * retval = 0;
2384		 */
2385
2386		/* xc retval_off(8,%r15),retval_off(%r15) */
2387		_EMIT6(0xd707f000 | tjit->retval_off,
2388		       0xf000 | tjit->retval_off);
2389
2390		for (i = 0; i < fmod_ret->nr_links; i++) {
2391			if (invoke_bpf_prog(tjit, m, fmod_ret->links[i], true))
2392				return -EINVAL;
2393
2394			/*
2395			 * if (retval)
2396			 *         goto do_fexit;
2397			 */
2398
2399			/* ltg %r0,retval_off(%r15) */
2400			EMIT6_DISP_LH(0xe3000000, 0x0002, REG_0, REG_0, REG_15,
2401				      tjit->retval_off);
2402			/* brcl 7,do_fexit */
2403			EMIT6_PCREL_RILC(0xc0040000, 7, tjit->do_fexit);
2404		}
2405	}
2406
2407	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2408		/*
2409		 * retval = func_addr(args);
2410		 */
2411
2412		/* lmg %r2,%rN,reg_args_off(%r15) */
2413		if (nr_reg_args)
2414			EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2415				      REG_2 + (nr_reg_args - 1), REG_15,
2416				      tjit->reg_args_off);
2417		/* mvc stack_args_off(N,%r15),orig_stack_args_off(%r15) */
2418		if (nr_stack_args)
2419			_EMIT6(0xd200f000 |
2420				       (nr_stack_args * sizeof(u64) - 1) << 16 |
2421				       tjit->stack_args_off,
2422			       0xf000 | tjit->orig_stack_args_off);
2423		/* mvc STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
2424		_EMIT6(0xd203f000 | STK_OFF_TCCNT, 0xf000 | tjit->tccnt_off);
2425		/* lgr %r1,%r8 */
2426		EMIT4(0xb9040000, REG_1, REG_8);
2427		/* %r1() */
2428		call_r1(jit);
2429		/* stg %r2,retval_off(%r15) */
2430		EMIT6_DISP_LH(0xe3000000, 0x0024, REG_2, REG_0, REG_15,
2431			      tjit->retval_off);
2432
2433		im->ip_after_call = jit->prg_buf + jit->prg;
2434
2435		/*
2436		 * The following nop will be patched by bpf_tramp_image_put().
2437		 */
2438
2439		/* brcl 0,im->ip_epilogue */
2440		EMIT6_PCREL_RILC(0xc0040000, 0, (u64)im->ip_epilogue);
2441	}
2442
2443	/* do_fexit: */
2444	tjit->do_fexit = jit->prg;
2445	for (i = 0; i < fexit->nr_links; i++)
2446		if (invoke_bpf_prog(tjit, m, fexit->links[i], false))
2447			return -EINVAL;
2448
2449	if (flags & BPF_TRAMP_F_CALL_ORIG) {
2450		im->ip_epilogue = jit->prg_buf + jit->prg;
2451
2452		/*
2453		 * __bpf_tramp_exit(im);
2454		 */
2455
2456		/* %r1 = __bpf_tramp_exit */
2457		load_imm64(jit, REG_1, (u64)__bpf_tramp_exit);
2458		/* %r2 = im */
2459		load_imm64(jit, REG_2, (u64)im);
2460		/* %r1() */
2461		call_r1(jit);
2462	}
2463
2464	/* lmg %r2,%rN,reg_args_off(%r15) */
2465	if ((flags & BPF_TRAMP_F_RESTORE_REGS) && nr_reg_args)
2466		EMIT6_DISP_LH(0xeb000000, 0x0004, REG_2,
2467			      REG_2 + (nr_reg_args - 1), REG_15,
2468			      tjit->reg_args_off);
2469	/* lgr %r1,%r8 */
2470	if (!(flags & BPF_TRAMP_F_SKIP_FRAME))
2471		EMIT4(0xb9040000, REG_1, REG_8);
2472	/* lmg %r7,%r8,r7_r8_off(%r15) */
2473	EMIT6_DISP_LH(0xeb000000, 0x0004, REG_7, REG_8, REG_15,
2474		      tjit->r7_r8_off);
2475	/* lg %r14,r14_off(%r15) */
2476	EMIT6_DISP_LH(0xe3000000, 0x0004, REG_14, REG_0, REG_15, tjit->r14_off);
2477	/* lg %r2,retval_off(%r15) */
2478	if (flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET))
2479		EMIT6_DISP_LH(0xe3000000, 0x0004, REG_2, REG_0, REG_15,
2480			      tjit->retval_off);
2481	/* mvc stack_size+STK_OFF_TCCNT(4,%r15),tccnt_off(%r15) */
2482	_EMIT6(0xd203f000 | (tjit->stack_size + STK_OFF_TCCNT),
2483	       0xf000 | tjit->tccnt_off);
2484	/* aghi %r15,stack_size */
2485	EMIT4_IMM(0xa70b0000, REG_15, tjit->stack_size);
2486	/* Emit an expoline for the following indirect jump. */
2487	if (nospec_uses_trampoline())
2488		emit_expoline(jit);
2489	if (flags & BPF_TRAMP_F_SKIP_FRAME)
2490		/* br %r14 */
2491		_EMIT2(0x07fe);
2492	else
2493		/* br %r1 */
2494		_EMIT2(0x07f1);
2495
2496	emit_r1_thunk(jit);
2497
2498	return 0;
2499}
2500
2501int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image,
2502				void *image_end, const struct btf_func_model *m,
2503				u32 flags, struct bpf_tramp_links *tlinks,
2504				void *func_addr)
2505{
2506	struct bpf_tramp_jit tjit;
2507	int ret;
2508	int i;
2509
2510	for (i = 0; i < 2; i++) {
2511		if (i == 0) {
2512			/* Compute offsets, check whether the code fits. */
2513			memset(&tjit, 0, sizeof(tjit));
2514		} else {
2515			/* Generate the code. */
2516			tjit.common.prg = 0;
2517			tjit.common.prg_buf = image;
2518		}
2519		ret = __arch_prepare_bpf_trampoline(im, &tjit, m, flags,
2520						    tlinks, func_addr);
2521		if (ret < 0)
2522			return ret;
2523		if (tjit.common.prg > (char *)image_end - (char *)image)
2524			/*
2525			 * Use the same error code as for exceeding
2526			 * BPF_MAX_TRAMP_LINKS.
2527			 */
2528			return -E2BIG;
2529	}
2530
2531	return tjit.common.prg;
2532}
2533
2534bool bpf_jit_supports_subprog_tailcalls(void)
2535{
2536	return true;
2537}
2538