1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5 */
6 #include <uapi/linux/btf.h>
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/bpf_verifier.h>
13 #include <linux/filter.h>
14 #include <net/netlink.h>
15 #include <linux/file.h>
16 #include <linux/vmalloc.h>
17 #include <linux/stringify.h>
18 #include <linux/bsearch.h>
19 #include <linux/sort.h>
20 #include <linux/perf_event.h>
21 #include <linux/ctype.h>
22 #include <linux/error-injection.h>
23 #include <linux/bpf_lsm.h>
24 #include <linux/btf_ids.h>
25
26 #include "disasm.h"
27
28 static const struct bpf_verifier_ops *const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) [_id] = &_name##_verifier_ops,
30 #define BPF_MAP_TYPE(_id, _ops)
31 #define BPF_LINK_TYPE(_id, _name)
32 #include <linux/bpf_types.h>
33 #undef BPF_PROG_TYPE
34 #undef BPF_MAP_TYPE
35 #undef BPF_LINK_TYPE
36 };
37
38 /* bpf_check() is a static code analyzer that walks eBPF program
39 * instruction by instruction and updates register/stack state.
40 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
41 *
42 * The first pass is depth-first-search to check that the program is a DAG.
43 * It rejects the following programs:
44 * - larger than BPF_MAXINSNS insns
45 * - if loop is present (detected via back-edge)
46 * - unreachable insns exist (shouldn't be a forest. program = one function)
47 * - out of bounds or malformed jumps
48 * The second pass is all possible path descent from the 1st insn.
49 * Since it's analyzing all pathes through the program, the length of the
50 * analysis is limited to 64k insn, which may be hit even if total number of
51 * insn is less then 4K, but there are too many branches that change stack/regs.
52 * Number of 'branches to be analyzed' is limited to 1k
53 *
54 * On entry to each instruction, each register has a type, and the instruction
55 * changes the types of the registers depending on instruction semantics.
56 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
57 * copied to R1.
58 *
59 * All registers are 64-bit.
60 * R0 - return register
61 * R1-R5 argument passing registers
62 * R6-R9 callee saved registers
63 * R10 - frame pointer read-only
64 *
65 * At the start of BPF program the register R1 contains a pointer to bpf_context
66 * and has type PTR_TO_CTX.
67 *
68 * Verifier tracks arithmetic operations on pointers in case:
69 * BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
70 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
71 * 1st insn copies R10 (which has FRAME_PTR) type into R1
72 * and 2nd arithmetic instruction is pattern matched to recognize
73 * that it wants to construct a pointer to some element within stack.
74 * So after 2nd insn, the register R1 has type PTR_TO_STACK
75 * (and -20 constant is saved for further stack bounds checking).
76 * Meaning that this reg is a pointer to stack plus known immediate constant.
77 *
78 * Most of the time the registers have SCALAR_VALUE type, which
79 * means the register has some value, but it's not a valid pointer.
80 * (like pointer plus pointer becomes SCALAR_VALUE type)
81 *
82 * When verifier sees load or store instructions the type of base register
83 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
84 * four pointer types recognized by check_mem_access() function.
85 *
86 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
87 * and the range of [ptr, ptr + map's value_size) is accessible.
88 *
89 * registers used to pass values to function calls are checked against
90 * function argument constraints.
91 *
92 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
93 * It means that the register type passed to this function must be
94 * PTR_TO_STACK and it will be used inside the function as
95 * 'pointer to map element key'
96 *
97 * For example the argument constraints for bpf_map_lookup_elem():
98 * .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
99 * .arg1_type = ARG_CONST_MAP_PTR,
100 * .arg2_type = ARG_PTR_TO_MAP_KEY,
101 *
102 * ret_type says that this function returns 'pointer to map elem value or null'
103 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
104 * 2nd argument should be a pointer to stack, which will be used inside
105 * the helper function as a pointer to map element key.
106 *
107 * On the kernel side the helper function looks like:
108 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
109 * {
110 * struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
111 * void *key = (void *) (unsigned long) r2;
112 * void *value;
113 *
114 * here kernel can access 'key' and 'map' pointers safely, knowing that
115 * [key, key + map->key_size) bytes are valid and were initialized on
116 * the stack of eBPF program.
117 * }
118 *
119 * Corresponding eBPF program may look like:
120 * BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), // after this insn R2 type is FRAME_PTR
121 * BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
122 * BPF_LD_MAP_FD(BPF_REG_1, map_fd), // after this insn R1 type is CONST_PTR_TO_MAP
123 * BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
124 * here verifier looks at prototype of map_lookup_elem() and sees:
125 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
126 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
127 *
128 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
129 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
130 * and were initialized prior to this call.
131 * If it's ok, then verifier allows this BPF_CALL insn and looks at
132 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
133 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
134 * returns ether pointer to map value or NULL.
135 *
136 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
137 * insn, the register holding that pointer in the true branch changes state to
138 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
139 * branch. See check_cond_jmp_op().
140 *
141 * After the call R0 is set to return type of the function and registers R1-R5
142 * are set to NOT_INIT to indicate that they are no longer readable.
143 *
144 * The following reference types represent a potential reference to a kernel
145 * resource which, after first being allocated, must be checked and freed by
146 * the BPF program:
147 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
148 *
149 * When the verifier sees a helper call return a reference type, it allocates a
150 * pointer id for the reference and stores it in the current function state.
151 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
152 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
153 * passes through a NULL-check conditional. For the branch wherein the state is
154 * changed to CONST_IMM, the verifier releases the reference.
155 *
156 * For each helper function that allocates a reference, such as
157 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
158 * bpf_sk_release(). When a reference type passes into the release function,
159 * the verifier also releases the reference. If any unchecked or unreleased
160 * reference remains at the end of the program, the verifier rejects it.
161 */
162
163 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
164 struct bpf_verifier_stack_elem {
165 /* verifer state is 'st'
166 * before processing instruction 'insn_idx'
167 * and after processing instruction 'prev_insn_idx'
168 */
169 struct bpf_verifier_state st;
170 int insn_idx;
171 int prev_insn_idx;
172 struct bpf_verifier_stack_elem *next;
173 /* length of verifier log at the time this state was pushed on stack */
174 u32 log_pos;
175 };
176
177 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
178 #define BPF_COMPLEXITY_LIMIT_STATES 64
179
180 #define BPF_MAP_KEY_POISON (1ULL << 63)
181 #define BPF_MAP_KEY_SEEN (1ULL << 62)
182
183 #define BPF_MAP_PTR_UNPRIV 1UL
184 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + POISON_POINTER_DELTA))
185 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
186
187 #define VERIFIER_TWO 2
188 #define VERIFIER_THREE 3
189 #define VERIFIER_FOUR 4
190 #define VERIFIER_EIGHT 8
191 #define VERIFIER_SIXTEEN 16
192 #define VERIFIER_THIRTYONE 31
193 #define VERIFIER_THIRTYTWO 32
194 #define VERIFIER_SIXTYTHREE 63
195 #define VERIFIER_SIXTYFOUR 64
196 #define VERIFIER_ONEHUNDREDTWENTYEIGHT 128
197 #define VERIFIER_TWOHUNDREDFIFTYSIX 256
198 #define VERIFIER_ONETHOUSAND 1000
199
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)200 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
201 {
202 return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
203 }
204
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)205 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
206 {
207 return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
208 }
209
bpf_map_ptr_store(struct bpf_insn_aux_data *aux, const struct bpf_map *map, bool unpriv)210 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, const struct bpf_map *map, bool unpriv)
211 {
212 BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
213 unpriv |= bpf_map_ptr_unpriv(aux);
214 aux->map_ptr_state = (unsigned long)map | (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
215 }
216
bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)217 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
218 {
219 return aux->map_key_state & BPF_MAP_KEY_POISON;
220 }
221
bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)222 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
223 {
224 return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
225 }
226
bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)227 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
228 {
229 return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
230 }
231
bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)232 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
233 {
234 bool poisoned = bpf_map_key_poisoned(aux);
235
236 aux->map_key_state = state | BPF_MAP_KEY_SEEN | (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
237 }
238
239 struct bpf_call_arg_meta {
240 struct bpf_map *map_ptr;
241 bool raw_mode;
242 bool pkt_access;
243 int regno;
244 int access_size;
245 int mem_size;
246 u64 msize_max_value;
247 int ref_obj_id;
248 int func_id;
249 u32 btf_id;
250 u32 ret_btf_id;
251 };
252
253 struct btf *btf_vmlinux;
254
255 static DEFINE_MUTEX(bpf_verifier_lock);
256
find_linfo(const struct bpf_verifier_env *env, u32 insn_off)257 static const struct bpf_line_info *find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
258 {
259 const struct bpf_line_info *linfo;
260 const struct bpf_prog *prog;
261 u32 i, nr_linfo;
262
263 prog = env->prog;
264 nr_linfo = prog->aux->nr_linfo;
265
266 if (!nr_linfo || insn_off >= prog->len) {
267 return NULL;
268 }
269
270 linfo = prog->aux->linfo;
271 for (i = 1; i < nr_linfo; i++) {
272 if (insn_off < linfo[i].insn_off) {
273 break;
274 }
275 }
276
277 return &linfo[i - 1];
278 }
279
bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args)280 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args)
281 {
282 unsigned int n;
283
284 n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
285
286 WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, "verifier log line truncated - local buffer too short\n");
287
288 n = min(log->len_total - log->len_used - 1, n);
289 log->kbuf[n] = '\0';
290
291 if (log->level == BPF_LOG_KERNEL) {
292 pr_err("BPF:%s\n", log->kbuf);
293 return;
294 }
295 if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) {
296 log->len_used += n;
297 } else {
298 log->ubuf = NULL;
299 }
300 }
301
bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)302 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
303 {
304 char zero = 0;
305
306 if (!bpf_verifier_log_needed(log)) {
307 return;
308 }
309
310 log->len_used = new_pos;
311 if (put_user(zero, log->ubuf + new_pos)) {
312 log->ubuf = NULL;
313 }
314 }
315
316 /* log_level controls verbosity level of eBPF verifier.
317 * bpf_verifier_log_write() is used to dump the verification trace to the log,
318 * so the user can figure out what's wrong with the program
319 */
bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...)320 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...)
321 {
322 va_list args;
323
324 if (!bpf_verifier_log_needed(&env->log)) {
325 return;
326 }
327
328 va_start(args, fmt);
329 bpf_verifier_vlog(&env->log, fmt, args);
330 va_end(args);
331 }
332 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
333
verbose(void *private_data, const char *fmt, ...)334 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
335 {
336 struct bpf_verifier_env *env = private_data;
337 va_list args;
338
339 if (!bpf_verifier_log_needed(&env->log)) {
340 return;
341 }
342
343 va_start(args, fmt);
344 bpf_verifier_vlog(&env->log, fmt, args);
345 va_end(args);
346 }
347
bpf_log(struct bpf_verifier_log *log, const char *fmt, ...)348 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...)
349 {
350 va_list args;
351
352 if (!bpf_verifier_log_needed(log)) {
353 return;
354 }
355
356 va_start(args, fmt);
357 bpf_verifier_vlog(log, fmt, args);
358 va_end(args);
359 }
360
ltrim(const char *s)361 static const char *ltrim(const char *s)
362 {
363 while (isspace(*s)) {
364 s++;
365 }
366
367 return s;
368 }
369
verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...)370 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...)
371 {
372 const struct bpf_line_info *linfo;
373
374 if (!bpf_verifier_log_needed(&env->log)) {
375 return;
376 }
377
378 linfo = find_linfo(env, insn_off);
379 if (!linfo || linfo == env->prev_linfo) {
380 return;
381 }
382
383 if (prefix_fmt) {
384 va_list args;
385
386 va_start(args, prefix_fmt);
387 bpf_verifier_vlog(&env->log, prefix_fmt, args);
388 va_end(args);
389 }
390
391 verbose(env, "%s\n", ltrim(btf_name_by_offset(env->prog->aux->btf, linfo->line_off)));
392
393 env->prev_linfo = linfo;
394 }
395
type_is_pkt_pointer(enum bpf_reg_type type)396 static bool type_is_pkt_pointer(enum bpf_reg_type type)
397 {
398 return type == PTR_TO_PACKET || type == PTR_TO_PACKET_META;
399 }
400
type_is_sk_pointer(enum bpf_reg_type type)401 static bool type_is_sk_pointer(enum bpf_reg_type type)
402 {
403 return type == PTR_TO_SOCKET || type == PTR_TO_SOCK_COMMON || type == PTR_TO_TCP_SOCK || type == PTR_TO_XDP_SOCK;
404 }
405
reg_type_not_null(enum bpf_reg_type type)406 static bool reg_type_not_null(enum bpf_reg_type type)
407 {
408 return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || type == PTR_TO_SOCK_COMMON;
409 }
410
reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)411 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
412 {
413 return reg->type == PTR_TO_MAP_VALUE && map_value_has_spin_lock(reg->map_ptr);
414 }
415
reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)416 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
417 {
418 return base_type(type) == PTR_TO_SOCKET || base_type(type) == PTR_TO_TCP_SOCK || base_type(type) == PTR_TO_MEM;
419 }
420
type_is_rdonly_mem(u32 type)421 static bool type_is_rdonly_mem(u32 type)
422 {
423 return type & MEM_RDONLY;
424 }
425
arg_type_may_be_refcounted(enum bpf_arg_type type)426 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
427 {
428 return type == ARG_PTR_TO_SOCK_COMMON;
429 }
430
type_may_be_null(u32 type)431 static bool type_may_be_null(u32 type)
432 {
433 return type & PTR_MAYBE_NULL;
434 }
435
436 /* Determine whether the function releases some resources allocated by another
437 * function call. The first reference type argument will be assumed to be
438 * released by release_reference().
439 */
is_release_function(enum bpf_func_id func_id)440 static bool is_release_function(enum bpf_func_id func_id)
441 {
442 return func_id == BPF_FUNC_sk_release || func_id == BPF_FUNC_ringbuf_submit || func_id == BPF_FUNC_ringbuf_discard;
443 }
444
may_be_acquire_function(enum bpf_func_id func_id)445 static bool may_be_acquire_function(enum bpf_func_id func_id)
446 {
447 return func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp ||
448 func_id == BPF_FUNC_skc_lookup_tcp || func_id == BPF_FUNC_map_lookup_elem ||
449 func_id == BPF_FUNC_ringbuf_reserve;
450 }
451
is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map)452 static bool is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map)
453 {
454 enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
455
456 if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp ||
457 func_id == BPF_FUNC_ringbuf_reserve) {
458 return true;
459 }
460
461 if (func_id == BPF_FUNC_map_lookup_elem &&
462 (map_type == BPF_MAP_TYPE_SOCKMAP || map_type == BPF_MAP_TYPE_SOCKHASH)) {
463 return true;
464 }
465
466 return false;
467 }
468
is_ptr_cast_function(enum bpf_func_id func_id)469 static bool is_ptr_cast_function(enum bpf_func_id func_id)
470 {
471 return func_id == BPF_FUNC_tcp_sock || func_id == BPF_FUNC_sk_fullsock || func_id == BPF_FUNC_skc_to_tcp_sock ||
472 func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock ||
473 func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock;
474 }
475
476 /* string representation of 'enum bpf_reg_type'
477 *
478 * Note that reg_type_str() can not appear more than once in a single verbose()
479 * statement.
480 */
reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)481 static const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
482 {
483 char postfix[VERIFIER_SIXTEEN] = {0}, prefix[VERIFIER_SIXTEEN] = {0};
484 static const char *const str[] = {
485 [NOT_INIT] = "?",
486 [SCALAR_VALUE] = "inv",
487 [PTR_TO_CTX] = "ctx",
488 [CONST_PTR_TO_MAP] = "map_ptr",
489 [PTR_TO_MAP_VALUE] = "map_value",
490 [PTR_TO_STACK] = "fp",
491 [PTR_TO_PACKET] = "pkt",
492 [PTR_TO_PACKET_META] = "pkt_meta",
493 [PTR_TO_PACKET_END] = "pkt_end",
494 [PTR_TO_FLOW_KEYS] = "flow_keys",
495 [PTR_TO_SOCKET] = "sock",
496 [PTR_TO_SOCK_COMMON] = "sock_common",
497 [PTR_TO_TCP_SOCK] = "tcp_sock",
498 [PTR_TO_TP_BUFFER] = "tp_buffer",
499 [PTR_TO_XDP_SOCK] = "xdp_sock",
500 [PTR_TO_BTF_ID] = "ptr_",
501 [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
502 [PTR_TO_MEM] = "mem",
503 [PTR_TO_BUF] = "buf",
504 };
505
506 if (type & PTR_MAYBE_NULL) {
507 if (base_type(type) == PTR_TO_BTF_ID || base_type(type) == PTR_TO_PERCPU_BTF_ID) {
508 strncpy(postfix, "or_null_", VERIFIER_SIXTEEN);
509 } else {
510 strncpy(postfix, "_or_null", VERIFIER_SIXTEEN);
511 }
512 }
513
514 if (type & MEM_RDONLY) {
515 strncpy(prefix, "rdonly_", VERIFIER_SIXTEEN);
516 }
517 if (type & MEM_ALLOC) {
518 strncpy(prefix, "alloc_", VERIFIER_SIXTEEN);
519 }
520
521 (void)snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix);
522 return env->type_str_buf;
523 }
524
525 static char slot_type_char[] = {
526 [STACK_INVALID] = '?',
527 [STACK_SPILL] = 'r',
528 [STACK_MISC] = 'm',
529 [STACK_ZERO] = '0',
530 };
531
print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live)532 static void print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live)
533 {
534 if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) {
535 verbose(env, "_");
536 }
537 if (live & REG_LIVE_READ) {
538 verbose(env, "r");
539 }
540 if (live & REG_LIVE_WRITTEN) {
541 verbose(env, "w");
542 }
543 if (live & REG_LIVE_DONE) {
544 verbose(env, "D");
545 }
546 }
547
func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)548 static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
549 {
550 struct bpf_verifier_state *cur = env->cur_state;
551
552 return cur->frame[reg->frameno];
553 }
554
kernel_type_name(u32 id)555 const char *kernel_type_name(u32 id)
556 {
557 return btf_name_by_offset(btf_vmlinux, btf_type_by_id(btf_vmlinux, id)->name_off);
558 }
559
print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)560 static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)
561 {
562 const struct bpf_reg_state *reg;
563 enum bpf_reg_type t;
564 int i;
565
566 if (state->frameno) {
567 verbose(env, " frame%d:", state->frameno);
568 }
569 for (i = 0; i < MAX_BPF_REG; i++) {
570 reg = &state->regs[i];
571 t = reg->type;
572 if (t == NOT_INIT) {
573 continue;
574 }
575 verbose(env, " R%d", i);
576 print_liveness(env, reg->live);
577 verbose(env, "=%s", reg_type_str(env, t));
578 if (t == SCALAR_VALUE && reg->precise) {
579 verbose(env, "P");
580 }
581 if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) {
582 /* reg->off should be 0 for SCALAR_VALUE */
583 verbose(env, "%lld", reg->var_off.value + reg->off);
584 } else {
585 if (base_type(t) == PTR_TO_BTF_ID || base_type(t) == PTR_TO_PERCPU_BTF_ID) {
586 verbose(env, "%s", kernel_type_name(reg->btf_id));
587 }
588 verbose(env, "(id=%d", reg->id);
589 if (reg_type_may_be_refcounted_or_null(t)) {
590 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
591 }
592 if (t != SCALAR_VALUE) {
593 verbose(env, ",off=%d", reg->off);
594 }
595 if (type_is_pkt_pointer(t)) {
596 verbose(env, ",r=%d", reg->range);
597 } else if (base_type(t) == CONST_PTR_TO_MAP || base_type(t) == PTR_TO_MAP_VALUE) {
598 verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size);
599 }
600 if (tnum_is_const(reg->var_off)) {
601 /* Typically an immediate SCALAR_VALUE, but
602 * could be a pointer whose offset is too big
603 * for reg->off
604 */
605 verbose(env, ",imm=%llx", reg->var_off.value);
606 } else {
607 if (reg->smin_value != reg->umin_value && reg->smin_value != S64_MIN) {
608 verbose(env, ",smin_value=%lld", (long long)reg->smin_value);
609 }
610 if (reg->smax_value != reg->umax_value && reg->smax_value != S64_MAX) {
611 verbose(env, ",smax_value=%lld", (long long)reg->smax_value);
612 }
613 if (reg->umin_value != 0) {
614 verbose(env, ",umin_value=%llu", (unsigned long long)reg->umin_value);
615 }
616 if (reg->umax_value != U64_MAX) {
617 verbose(env, ",umax_value=%llu", (unsigned long long)reg->umax_value);
618 }
619 if (!tnum_is_unknown(reg->var_off)) {
620 char tn_buf[48];
621
622 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
623 verbose(env, ",var_off=%s", tn_buf);
624 }
625 if (reg->s32_min_value != reg->smin_value && reg->s32_min_value != S32_MIN) {
626 verbose(env, ",s32_min_value=%d", (int)(reg->s32_min_value));
627 }
628 if (reg->s32_max_value != reg->smax_value && reg->s32_max_value != S32_MAX) {
629 verbose(env, ",s32_max_value=%d", (int)(reg->s32_max_value));
630 }
631 if (reg->u32_min_value != reg->umin_value && reg->u32_min_value != U32_MIN) {
632 verbose(env, ",u32_min_value=%d", (int)(reg->u32_min_value));
633 }
634 if (reg->u32_max_value != reg->umax_value && reg->u32_max_value != U32_MAX) {
635 verbose(env, ",u32_max_value=%d", (int)(reg->u32_max_value));
636 }
637 }
638 verbose(env, ")");
639 }
640 }
641 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
642 char types_buf[BPF_REG_SIZE + 1];
643 bool valid = false;
644 int j;
645
646 for (j = 0; j < BPF_REG_SIZE; j++) {
647 if (state->stack[i].slot_type[j] != STACK_INVALID) {
648 valid = true;
649 }
650 types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
651 }
652 types_buf[BPF_REG_SIZE] = 0;
653 if (!valid) {
654 continue;
655 }
656 verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
657 print_liveness(env, state->stack[i].spilled_ptr.live);
658 if (state->stack[i].slot_type[0] == STACK_SPILL) {
659 reg = &state->stack[i].spilled_ptr;
660 t = reg->type;
661 verbose(env, "=%s", reg_type_str(env, t));
662 if (t == SCALAR_VALUE && reg->precise) {
663 verbose(env, "P");
664 }
665 if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
666 verbose(env, "%lld", reg->var_off.value + reg->off);
667 }
668 } else {
669 verbose(env, "=%s", types_buf);
670 }
671 }
672 if (state->acquired_refs && state->refs[0].id) {
673 verbose(env, " refs=%d", state->refs[0].id);
674 for (i = 1; i < state->acquired_refs; i++) {
675 if (state->refs[i].id) {
676 verbose(env, ",%d", state->refs[i].id);
677 }
678 }
679 }
680 verbose(env, "\n");
681 }
682
683 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE) \
684 static int copy_##NAME##_state(struct bpf_func_state *dst, const struct bpf_func_state *src) \
685 { \
686 if (!src->FIELD) \
687 return 0; \
688 if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) { \
689 /* internal bug, make state invalid to reject the program */ \
690 memset(dst, 0, sizeof(*dst)); \
691 return -EFAULT; \
692 } \
693 memcpy(dst->FIELD, src->FIELD, sizeof(*src->FIELD) * (src->COUNT / (SIZE))); \
694 return 0; \
695 }
696 /* copy_reference_state() */
697 COPY_STATE_FN(reference, acquired_refs, refs, 1)
698 /* copy_stack_state() */
699 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
700 #undef COPY_STATE_FN
701
702 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE) \
703 static int realloc_##NAME##_state(struct bpf_func_state *state, int size, bool copy_old) \
704 { \
705 u32 old_size = state->COUNT; \
706 struct bpf_##NAME##_state *new_##FIELD; \
707 int slot = size / (SIZE); \
708 \
709 if (size <= old_size || !size) { \
710 if (copy_old) \
711 return 0; \
712 state->COUNT = slot * (SIZE); \
713 if (!size && old_size) { \
714 kfree(state->FIELD); \
715 state->FIELD = NULL; \
716 } \
717 return 0; \
718 } \
719 new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), GFP_KERNEL); \
720 if (!new_##FIELD) \
721 return -ENOMEM; \
722 if (copy_old) { \
723 if (state->FIELD) \
724 memcpy(new_##FIELD, state->FIELD, sizeof(*new_##FIELD) * (old_size / (SIZE))); \
725 memset(new_##FIELD + old_size / (SIZE), 0, sizeof(*new_##FIELD) * (size - old_size) / (SIZE)); \
726 } \
727 state->COUNT = slot * (SIZE); \
728 kfree(state->FIELD); \
729 state->FIELD = new_##FIELD; \
730 return 0; \
731 }
732 /* realloc_reference_state() */
733 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
734 /* realloc_stack_state() */
735 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
736 #undef REALLOC_STATE_FN
737
738 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
739 * make it consume minimal amount of memory. check_stack_write() access from
740 * the program calls into realloc_func_state() to grow the stack size.
741 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
742 * which realloc_stack_state() copies over. It points to previous
743 * bpf_verifier_state which is never reallocated.
744 */
realloc_func_state(struct bpf_func_state *state, int stack_size, int refs_size, bool copy_old)745 static int realloc_func_state(struct bpf_func_state *state, int stack_size, int refs_size, bool copy_old)
746 {
747 int err = realloc_reference_state(state, refs_size, copy_old);
748 if (err) {
749 return err;
750 }
751 return realloc_stack_state(state, stack_size, copy_old);
752 }
753
754 /* Acquire a pointer id from the env and update the state->refs to include
755 * this new pointer reference.
756 * On success, returns a valid pointer id to associate with the register
757 * On failure, returns a negative errno.
758 */
acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)759 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
760 {
761 struct bpf_func_state *state = cur_func(env);
762 int new_ofs = state->acquired_refs;
763 int id, err;
764
765 err = realloc_reference_state(state, state->acquired_refs + 1, true);
766 if (err) {
767 return err;
768 }
769 id = ++env->id_gen;
770 state->refs[new_ofs].id = id;
771 state->refs[new_ofs].insn_idx = insn_idx;
772
773 return id;
774 }
775
776 /* release function corresponding to acquire_reference_state(). Idempotent. */
release_reference_state(struct bpf_func_state *state, int ptr_id)777 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
778 {
779 int i, last_idx;
780
781 last_idx = state->acquired_refs - 1;
782 for (i = 0; i < state->acquired_refs; i++) {
783 if (state->refs[i].id == ptr_id) {
784 if (last_idx && i != last_idx) {
785 memcpy(&state->refs[i], &state->refs[last_idx], sizeof(*state->refs));
786 }
787 memset(&state->refs[last_idx], 0, sizeof(*state->refs));
788 state->acquired_refs--;
789 return 0;
790 }
791 }
792 return -EINVAL;
793 }
794
transfer_reference_state(struct bpf_func_state *dst, struct bpf_func_state *src)795 static int transfer_reference_state(struct bpf_func_state *dst, struct bpf_func_state *src)
796 {
797 int err = realloc_reference_state(dst, src->acquired_refs, false);
798 if (err) {
799 return err;
800 }
801 err = copy_reference_state(dst, src);
802 if (err) {
803 return err;
804 }
805 return 0;
806 }
807
free_func_state(struct bpf_func_state *state)808 static void free_func_state(struct bpf_func_state *state)
809 {
810 if (!state) {
811 return;
812 }
813 kfree(state->refs);
814 kfree(state->stack);
815 kfree(state);
816 }
817
clear_jmp_history(struct bpf_verifier_state *state)818 static void clear_jmp_history(struct bpf_verifier_state *state)
819 {
820 kfree(state->jmp_history);
821 state->jmp_history = NULL;
822 state->jmp_history_cnt = 0;
823 }
824
free_verifier_state(struct bpf_verifier_state *state, bool free_self)825 static void free_verifier_state(struct bpf_verifier_state *state, bool free_self)
826 {
827 int i;
828
829 for (i = 0; i <= state->curframe; i++) {
830 free_func_state(state->frame[i]);
831 state->frame[i] = NULL;
832 }
833 clear_jmp_history(state);
834 if (free_self) {
835 kfree(state);
836 }
837 }
838
839 /* copy verifier state from src to dst growing dst stack space
840 * when necessary to accommodate larger src stack
841 */
copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src)842 static int copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
843 {
844 int err;
845
846 err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, false);
847 if (err) {
848 return err;
849 }
850 memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
851 err = copy_reference_state(dst, src);
852 if (err) {
853 return err;
854 }
855 return copy_stack_state(dst, src);
856 }
857
copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src)858 static int copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src)
859 {
860 struct bpf_func_state *dst;
861 u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
862 int i, err;
863
864 if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
865 kfree(dst_state->jmp_history);
866 dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
867 if (!dst_state->jmp_history) {
868 return -ENOMEM;
869 }
870 }
871 memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
872 dst_state->jmp_history_cnt = src->jmp_history_cnt;
873
874 /* if dst has more stack frames then src frame, free them */
875 for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
876 free_func_state(dst_state->frame[i]);
877 dst_state->frame[i] = NULL;
878 }
879 dst_state->speculative = src->speculative;
880 dst_state->curframe = src->curframe;
881 dst_state->active_spin_lock = src->active_spin_lock;
882 dst_state->branches = src->branches;
883 dst_state->parent = src->parent;
884 dst_state->first_insn_idx = src->first_insn_idx;
885 dst_state->last_insn_idx = src->last_insn_idx;
886 for (i = 0; i <= src->curframe; i++) {
887 dst = dst_state->frame[i];
888 if (!dst) {
889 dst = kzalloc(sizeof(*dst), GFP_KERNEL);
890 if (!dst) {
891 return -ENOMEM;
892 }
893 dst_state->frame[i] = dst;
894 }
895 err = copy_func_state(dst, src->frame[i]);
896 if (err) {
897 return err;
898 }
899 }
900 return 0;
901 }
902
update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)903 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
904 {
905 while (st) {
906 u32 br = --st->branches;
907
908 /* WARN_ON(br > 1) technically makes sense here,
909 * but see comment in push_stack(), hence:
910 */
911 WARN_ONCE((int)br < 0, "BUG update_branch_counts:branches_to_explore=%d\n", br);
912 if (br) {
913 break;
914 }
915 st = st->parent;
916 }
917 }
918
pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx, bool pop_log)919 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx, bool pop_log)
920 {
921 struct bpf_verifier_state *cur = env->cur_state;
922 struct bpf_verifier_stack_elem *elem, *head = env->head;
923 int err;
924
925 if (env->head == NULL) {
926 return -ENOENT;
927 }
928
929 if (cur) {
930 err = copy_verifier_state(cur, &head->st);
931 if (err) {
932 return err;
933 }
934 }
935 if (pop_log) {
936 bpf_vlog_reset(&env->log, head->log_pos);
937 }
938 if (insn_idx) {
939 *insn_idx = head->insn_idx;
940 }
941 if (prev_insn_idx) {
942 *prev_insn_idx = head->prev_insn_idx;
943 }
944 elem = head->next;
945 free_verifier_state(&head->st, false);
946 kfree(head);
947 env->head = elem;
948 env->stack_size--;
949 return 0;
950 }
951
push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx, bool speculative)952 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx,
953 bool speculative)
954 {
955 struct bpf_verifier_state *cur = env->cur_state;
956 struct bpf_verifier_stack_elem *elem;
957 int err;
958
959 elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
960 if (!elem) {
961 goto err;
962 }
963
964 elem->insn_idx = insn_idx;
965 elem->prev_insn_idx = prev_insn_idx;
966 elem->next = env->head;
967 elem->log_pos = env->log.len_used;
968 env->head = elem;
969 env->stack_size++;
970 err = copy_verifier_state(&elem->st, cur);
971 if (err) {
972 goto err;
973 }
974 elem->st.speculative |= speculative;
975 if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
976 verbose(env, "The sequence of %d jumps is too complex.\n", env->stack_size);
977 goto err;
978 }
979 if (elem->st.parent) {
980 ++elem->st.parent->branches;
981 /* WARN_ON(branches > 2) technically makes sense here,
982 * but
983 * 1. speculative states will bump 'branches' for non-branch
984 * instructions
985 * 2. is_state_visited() heuristics may decide not to create
986 * a new state for a sequence of branches and all such current
987 * and cloned states will be pointing to a single parent state
988 * which might have large 'branches' count.
989 */
990 }
991 return &elem->st;
992 err:
993 free_verifier_state(env->cur_state, true);
994 env->cur_state = NULL;
995 /* pop all elements and return */
996 while (!pop_stack(env, NULL, NULL, false)) {
997 ;
998 }
999 return NULL;
1000 }
1001
1002 #define CALLER_SAVED_REGS 6
1003 static const int caller_saved[CALLER_SAVED_REGS] = {BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5};
1004
1005 static void verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg);
1006
1007 /* This helper doesn't clear reg->id */
verifier2_mark_reg_known(struct bpf_reg_state *reg, u64 imm)1008 static void verifier2_mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1009 {
1010 reg->var_off = tnum_const(imm);
1011 reg->smin_value = (s64)imm;
1012 reg->smax_value = (s64)imm;
1013 reg->umin_value = imm;
1014 reg->umax_value = imm;
1015
1016 reg->s32_min_value = (s32)imm;
1017 reg->s32_max_value = (s32)imm;
1018 reg->u32_min_value = (u32)imm;
1019 reg->u32_max_value = (u32)imm;
1020 }
1021
1022 /* Mark the unknown part of a register (variable offset or scalar value) as
1023 * known to have the value @imm.
1024 */
verifier_mark_reg_known(struct bpf_reg_state *reg, u64 imm)1025 static void verifier_mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1026 {
1027 /* Clear id, off, and union(map_ptr, range) */
1028 memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1029 verifier2_mark_reg_known(reg, imm);
1030 }
1031
verifier_mark_reg32_known(struct bpf_reg_state *reg, u64 imm)1032 static void verifier_mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1033 {
1034 reg->var_off = tnum_const_subreg(reg->var_off, imm);
1035 reg->s32_min_value = (s32)imm;
1036 reg->s32_max_value = (s32)imm;
1037 reg->u32_min_value = (u32)imm;
1038 reg->u32_max_value = (u32)imm;
1039 }
1040
1041 /* Mark the 'variable offset' part of a register as zero. This should be
1042 * used only on registers holding a pointer type.
1043 */
verifier_mark_reg_known_zero(struct bpf_reg_state *reg)1044 static void verifier_mark_reg_known_zero(struct bpf_reg_state *reg)
1045 {
1046 verifier_mark_reg_known(reg, 0);
1047 }
1048
verifier_mark_reg_const_zero(struct bpf_reg_state *reg)1049 static void verifier_mark_reg_const_zero(struct bpf_reg_state *reg)
1050 {
1051 verifier_mark_reg_known(reg, 0);
1052 reg->type = SCALAR_VALUE;
1053 }
1054
mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)1055 static void mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1056 {
1057 if (WARN_ON(regno >= MAX_BPF_REG)) {
1058 verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1059 /* Something bad happened, let's kill all regs */
1060 for (regno = 0; regno < MAX_BPF_REG; regno++) {
1061 verifier_mark_reg_not_init(env, regs + regno);
1062 }
1063 return;
1064 }
1065 verifier_mark_reg_known_zero(regs + regno);
1066 }
1067
reg_is_pkt_pointer(const struct bpf_reg_state *reg)1068 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1069 {
1070 return type_is_pkt_pointer(reg->type);
1071 }
1072
reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)1073 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1074 {
1075 return reg_is_pkt_pointer(reg) || reg->type == PTR_TO_PACKET_END;
1076 }
1077
1078 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, enum bpf_reg_type which)1079 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, enum bpf_reg_type which)
1080 {
1081 /* The register can already have a range from prior markings.
1082 * This is fine as long as it hasn't been advanced from its
1083 * origin.
1084 */
1085 return reg->type == which && reg->id == 0 && reg->off == 0 && tnum_equals_const(reg->var_off, 0);
1086 }
1087
1088 /* Reset the min/max bounds of a register */
verifier_mark_reg_unbounded(struct bpf_reg_state *reg)1089 static void verifier_mark_reg_unbounded(struct bpf_reg_state *reg)
1090 {
1091 reg->smin_value = S64_MIN;
1092 reg->smax_value = S64_MAX;
1093 reg->umin_value = 0;
1094 reg->umax_value = U64_MAX;
1095
1096 reg->s32_min_value = S32_MIN;
1097 reg->s32_max_value = S32_MAX;
1098 reg->u32_min_value = 0;
1099 reg->u32_max_value = U32_MAX;
1100 }
1101
verifier_mark_reg64_unbounded(struct bpf_reg_state *reg)1102 static void verifier_mark_reg64_unbounded(struct bpf_reg_state *reg)
1103 {
1104 reg->smin_value = S64_MIN;
1105 reg->smax_value = S64_MAX;
1106 reg->umin_value = 0;
1107 reg->umax_value = U64_MAX;
1108 }
1109
verifier_mark_reg32_unbounded(struct bpf_reg_state *reg)1110 static void verifier_mark_reg32_unbounded(struct bpf_reg_state *reg)
1111 {
1112 reg->s32_min_value = S32_MIN;
1113 reg->s32_max_value = S32_MAX;
1114 reg->u32_min_value = 0;
1115 reg->u32_max_value = U32_MAX;
1116 }
1117
verifier_update_reg32_bounds(struct bpf_reg_state *reg)1118 static void verifier_update_reg32_bounds(struct bpf_reg_state *reg)
1119 {
1120 struct tnum var32_off = tnum_subreg(reg->var_off);
1121
1122 /* min signed is max(sign bit) | min(other bits) */
1123 reg->s32_min_value = max_t(s32, reg->s32_min_value, var32_off.value | (var32_off.mask & S32_MIN));
1124 /* max signed is min(sign bit) | max(other bits) */
1125 reg->s32_max_value = min_t(s32, reg->s32_max_value, var32_off.value | (var32_off.mask & S32_MAX));
1126 reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1127 reg->u32_max_value = min(reg->u32_max_value, (u32)(var32_off.value | var32_off.mask));
1128 }
1129
verifier_update_reg64_bounds(struct bpf_reg_state *reg)1130 static void verifier_update_reg64_bounds(struct bpf_reg_state *reg)
1131 {
1132 /* min signed is max(sign bit) | min(other bits) */
1133 reg->smin_value = max_t(s64, reg->smin_value, reg->var_off.value | (reg->var_off.mask & S64_MIN));
1134 /* max signed is min(sign bit) | max(other bits) */
1135 reg->smax_value = min_t(s64, reg->smax_value, reg->var_off.value | (reg->var_off.mask & S64_MAX));
1136 reg->umin_value = max(reg->umin_value, reg->var_off.value);
1137 reg->umax_value = min(reg->umax_value, reg->var_off.value | reg->var_off.mask);
1138 }
1139
verifier_update_reg_bounds(struct bpf_reg_state *reg)1140 static void verifier_update_reg_bounds(struct bpf_reg_state *reg)
1141 {
1142 verifier_update_reg32_bounds(reg);
1143 verifier_update_reg64_bounds(reg);
1144 }
1145
1146 /* Uses signed min/max values to inform unsigned, and vice-versa */
verifier_reg32_deduce_bounds(struct bpf_reg_state *reg)1147 static void verifier_reg32_deduce_bounds(struct bpf_reg_state *reg)
1148 {
1149 /* Learn sign from signed bounds.
1150 * If we cannot cross the sign boundary, then signed and unsigned bounds
1151 * are the same, so combine. This works even in the negative case, e.g.
1152 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1153 */
1154 if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1155 reg->s32_min_value = reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
1156 reg->s32_max_value = reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
1157 return;
1158 }
1159 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1160 * boundary, so we must be careful.
1161 */
1162 if ((s32)reg->u32_max_value >= 0) {
1163 /* Positive. We can't learn anything from the smin, but smax
1164 * is positive, hence safe.
1165 */
1166 reg->s32_min_value = reg->u32_min_value;
1167 reg->s32_max_value = reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
1168 } else if ((s32)reg->u32_min_value < 0) {
1169 /* Negative. We can't learn anything from the smax, but smin
1170 * is negative, hence safe.
1171 */
1172 reg->s32_min_value = reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
1173 reg->s32_max_value = reg->u32_max_value;
1174 }
1175 }
1176
verifier_reg64_deduce_bounds(struct bpf_reg_state *reg)1177 static void verifier_reg64_deduce_bounds(struct bpf_reg_state *reg)
1178 {
1179 /* Learn sign from signed bounds.
1180 * If we cannot cross the sign boundary, then signed and unsigned bounds
1181 * are the same, so combine. This works even in the negative case, e.g.
1182 * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1183 */
1184 if (reg->smin_value >= 0 || reg->smax_value < 0) {
1185 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
1186 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
1187 return;
1188 }
1189 /* Learn sign from unsigned bounds. Signed bounds cross the sign
1190 * boundary, so we must be careful.
1191 */
1192 if ((s64)reg->umax_value >= 0) {
1193 /* Positive. We can't learn anything from the smin, but smax
1194 * is positive, hence safe.
1195 */
1196 reg->smin_value = reg->umin_value;
1197 reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
1198 } else if ((s64)reg->umin_value < 0) {
1199 /* Negative. We can't learn anything from the smax, but smin
1200 * is negative, hence safe.
1201 */
1202 reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
1203 reg->smax_value = reg->umax_value;
1204 }
1205 }
1206
verifier_reg_deduce_bounds(struct bpf_reg_state *reg)1207 static void verifier_reg_deduce_bounds(struct bpf_reg_state *reg)
1208 {
1209 verifier_reg32_deduce_bounds(reg);
1210 verifier_reg64_deduce_bounds(reg);
1211 }
1212
1213 /* Attempts to improve var_off based on unsigned min/max information */
verifier_reg_bound_offset(struct bpf_reg_state *reg)1214 static void verifier_reg_bound_offset(struct bpf_reg_state *reg)
1215 {
1216 struct tnum var64_off = tnum_intersect(reg->var_off, tnum_range(reg->umin_value, reg->umax_value));
1217 struct tnum var32_off =
1218 tnum_intersect(tnum_subreg(reg->var_off), tnum_range(reg->u32_min_value, reg->u32_max_value));
1219
1220 reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1221 }
1222
reg_bounds_sync(struct bpf_reg_state *reg)1223 static void reg_bounds_sync(struct bpf_reg_state *reg)
1224 {
1225 /* We might have learned new bounds from the var_off. */
1226 verifier_update_reg_bounds(reg);
1227 /* We might have learned something about the sign bit. */
1228 verifier_reg_deduce_bounds(reg);
1229 /* We might have learned some bits from the bounds. */
1230 verifier_reg_bound_offset(reg);
1231 /* Intersecting with the old var_off might have improved our bounds
1232 * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1233 * then new var_off is (0; 0x7f...fc) which improves our umax.
1234 */
1235 verifier_update_reg_bounds(reg);
1236 }
verifier_reg32_bound_s64(s32 a)1237 static bool verifier_reg32_bound_s64(s32 a)
1238 {
1239 return a >= 0 && a <= S32_MAX;
1240 }
1241
verifier_reg_assign_32_into_64(struct bpf_reg_state *reg)1242 static void verifier_reg_assign_32_into_64(struct bpf_reg_state *reg)
1243 {
1244 reg->umin_value = reg->u32_min_value;
1245 reg->umax_value = reg->u32_max_value;
1246
1247 /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1248 * be positive otherwise set to worse case bounds and refine later
1249 * from tnum.
1250 */
1251 if (verifier_reg32_bound_s64(reg->s32_min_value) && verifier_reg32_bound_s64(reg->s32_max_value)) {
1252 reg->smin_value = reg->s32_min_value;
1253 reg->smax_value = reg->s32_max_value;
1254 } else {
1255 reg->smin_value = 0;
1256 reg->smax_value = U32_MAX;
1257 }
1258 }
1259
verifier_reg_combine_32_into_64(struct bpf_reg_state *reg)1260 static void verifier_reg_combine_32_into_64(struct bpf_reg_state *reg)
1261 {
1262 /* special case when 64-bit register has upper 32-bit register
1263 * zeroed. Typically happens after zext or <<32, >>32 sequence
1264 * allowing us to use 32-bit bounds directly,
1265 */
1266 if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1267 verifier_reg_assign_32_into_64(reg);
1268 } else {
1269 /* Otherwise the best we can do is push lower 32bit known and
1270 * unknown bits into register (var_off set from jmp logic)
1271 * then learn as much as possible from the 64-bit tnum
1272 * known and unknown bits. The previous smin/smax bounds are
1273 * invalid here because of jmp32 compare so mark them unknown
1274 * so they do not impact tnum bounds calculation.
1275 */
1276 verifier_mark_reg64_unbounded(reg);
1277 verifier_update_reg_bounds(reg);
1278 }
1279
1280 /* Intersecting with the old var_off might have improved our bounds
1281 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1282 * then new var_off is (0; 0x7f...fc) which improves our umax.
1283 */
1284 reg_bounds_sync(reg);
1285 }
1286
verifier_reg64_bound_s32(s64 a)1287 static bool verifier_reg64_bound_s32(s64 a)
1288 {
1289 return a > S32_MIN && a < S32_MAX;
1290 }
1291
verifier_reg64_bound_u32(u64 a)1292 static bool verifier_reg64_bound_u32(u64 a)
1293 {
1294 return a > U32_MIN && a < U32_MAX;
1295 }
1296
__reg_combine_64_into_32(struct bpf_reg_state *reg)1297 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1298 {
1299 verifier_mark_reg32_unbounded(reg);
1300
1301 if (verifier_reg64_bound_s32(reg->smin_value) && verifier_reg64_bound_s32(reg->smax_value)) {
1302 reg->s32_min_value = (s32)reg->smin_value;
1303 reg->s32_max_value = (s32)reg->smax_value;
1304 }
1305 if (verifier_reg64_bound_u32(reg->umin_value) && verifier_reg64_bound_u32(reg->umax_value)) {
1306 reg->u32_min_value = (u32)reg->umin_value;
1307 reg->u32_max_value = (u32)reg->umax_value;
1308 }
1309
1310 /* Intersecting with the old var_off might have improved our bounds
1311 * slightly. e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1312 * then new var_off is (0; 0x7f...fc) which improves our umax.
1313 */
1314 reg_bounds_sync(reg);
1315 }
1316
1317 /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)1318 static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1319 {
1320 /*
1321 * Clear type, id, off, and union(map_ptr, range) and
1322 * padding between 'type' and union
1323 */
1324 memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1325 reg->type = SCALAR_VALUE;
1326 reg->var_off = tnum_unknown;
1327 reg->frameno = 0;
1328 reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1329 verifier_mark_reg_unbounded(reg);
1330 }
1331
mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)1332 static void mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1333 {
1334 if (WARN_ON(regno >= MAX_BPF_REG)) {
1335 verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1336 /* Something bad happened, let's kill all regs except FP */
1337 for (regno = 0; regno < BPF_REG_FP; regno++) {
1338 verifier_mark_reg_not_init(env, regs + regno);
1339 }
1340 return;
1341 }
1342 __mark_reg_unknown(env, regs + regno);
1343 }
1344
verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)1345 static void verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1346 {
1347 __mark_reg_unknown(env, reg);
1348 reg->type = NOT_INIT;
1349 }
1350
mark_reg_not_init(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)1351 static void mark_reg_not_init(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1352 {
1353 if (WARN_ON(regno >= MAX_BPF_REG)) {
1354 verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1355 /* Something bad happened, let's kill all regs except FP */
1356 for (regno = 0; regno < BPF_REG_FP; regno++) {
1357 verifier_mark_reg_not_init(env, regs + regno);
1358 }
1359 return;
1360 }
1361 verifier_mark_reg_not_init(env, regs + regno);
1362 }
1363
mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, enum bpf_reg_type reg_type, u32 btf_id)1364 static void mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
1365 enum bpf_reg_type reg_type, u32 btf_id)
1366 {
1367 if (reg_type == SCALAR_VALUE) {
1368 mark_reg_unknown(env, regs, regno);
1369 return;
1370 }
1371 mark_reg_known_zero(env, regs, regno);
1372 regs[regno].type = PTR_TO_BTF_ID;
1373 regs[regno].btf_id = btf_id;
1374 }
1375
1376 #define DEF_NOT_SUBREG (0)
init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state)1377 static void init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state)
1378 {
1379 struct bpf_reg_state *regs = state->regs;
1380 int i;
1381
1382 for (i = 0; i < MAX_BPF_REG; i++) {
1383 mark_reg_not_init(env, regs, i);
1384 regs[i].live = REG_LIVE_NONE;
1385 regs[i].parent = NULL;
1386 regs[i].subreg_def = DEF_NOT_SUBREG;
1387 }
1388
1389 /* frame pointer */
1390 regs[BPF_REG_FP].type = PTR_TO_STACK;
1391 mark_reg_known_zero(env, regs, BPF_REG_FP);
1392 regs[BPF_REG_FP].frameno = state->frameno;
1393 }
1394
1395 #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int callsite, int frameno, int subprogno)1396 static void init_func_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int callsite, int frameno,
1397 int subprogno)
1398 {
1399 state->callsite = callsite;
1400 state->frameno = frameno;
1401 state->subprogno = subprogno;
1402 init_reg_state(env, state);
1403 }
1404
1405 enum reg_arg_type {
1406 SRC_OP, /* register is used as source operand */
1407 DST_OP, /* register is used as destination operand */
1408 DST_OP_NO_MARK /* same as above, check only, don't mark */
1409 };
1410
cmp_subprogs(const void *a, const void *b)1411 static int cmp_subprogs(const void *a, const void *b)
1412 {
1413 return ((struct bpf_subprog_info *)a)->start - ((struct bpf_subprog_info *)b)->start;
1414 }
1415
find_subprog(struct bpf_verifier_env *env, int off)1416 static int find_subprog(struct bpf_verifier_env *env, int off)
1417 {
1418 struct bpf_subprog_info *p;
1419
1420 p = bsearch(&off, env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs);
1421 if (!p) {
1422 return -ENOENT;
1423 }
1424 return p - env->subprog_info;
1425 }
1426
add_subprog(struct bpf_verifier_env *env, int off)1427 static int add_subprog(struct bpf_verifier_env *env, int off)
1428 {
1429 int insn_cnt = env->prog->len;
1430 int ret;
1431
1432 if (off >= insn_cnt || off < 0) {
1433 verbose(env, "call to invalid destination\n");
1434 return -EINVAL;
1435 }
1436 ret = find_subprog(env, off);
1437 if (ret >= 0) {
1438 return 0;
1439 }
1440 if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1441 verbose(env, "too many subprograms\n");
1442 return -E2BIG;
1443 }
1444 env->subprog_info[env->subprog_cnt++].start = off;
1445 sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1446 return 0;
1447 }
1448
check_subprogs(struct bpf_verifier_env *env)1449 static int check_subprogs(struct bpf_verifier_env *env)
1450 {
1451 int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1452 struct bpf_subprog_info *subprog = env->subprog_info;
1453 struct bpf_insn *insn = env->prog->insnsi;
1454 int insn_cnt = env->prog->len;
1455
1456 /* Add entry function. */
1457 ret = add_subprog(env, 0);
1458 if (ret < 0) {
1459 return ret;
1460 }
1461
1462 /* determine subprog starts. The end is one before the next starts */
1463 for (i = 0; i < insn_cnt; i++) {
1464 if (insn[i].code != (BPF_JMP | BPF_CALL)) {
1465 continue;
1466 }
1467 if (insn[i].src_reg != BPF_PSEUDO_CALL) {
1468 continue;
1469 }
1470 if (!env->bpf_capable) {
1471 verbose(env, "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1472 return -EPERM;
1473 }
1474 ret = add_subprog(env, i + insn[i].imm + 1);
1475 if (ret < 0) {
1476 return ret;
1477 }
1478 }
1479
1480 /* Add a fake 'exit' subprog which could simplify subprog iteration
1481 * logic. 'subprog_cnt' should not be increased.
1482 */
1483 subprog[env->subprog_cnt].start = insn_cnt;
1484
1485 if (env->log.level & BPF_LOG_LEVEL2) {
1486 for (i = 0; i < env->subprog_cnt; i++) {
1487 verbose(env, "func#%d @%d\n", i, subprog[i].start);
1488 }
1489 }
1490
1491 /* now check that all jumps are within the same subprog */
1492 subprog_start = subprog[cur_subprog].start;
1493 subprog_end = subprog[cur_subprog + 1].start;
1494 for (i = 0; i < insn_cnt; i++) {
1495 u8 code = insn[i].code;
1496
1497 if (code == (BPF_JMP | BPF_CALL) && insn[i].imm == BPF_FUNC_tail_call && insn[i].src_reg != BPF_PSEUDO_CALL) {
1498 subprog[cur_subprog].has_tail_call = true;
1499 }
1500 if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) {
1501 subprog[cur_subprog].has_ld_abs = true;
1502 }
1503 if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) {
1504 goto next;
1505 }
1506 if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) {
1507 goto next;
1508 }
1509 off = i + insn[i].off + 1;
1510 if (off < subprog_start || off >= subprog_end) {
1511 verbose(env, "jump out of range from insn %d to %d\n", i, off);
1512 return -EINVAL;
1513 }
1514 next:
1515 if (i == subprog_end - 1) {
1516 /* to avoid fall-through from one subprog into another
1517 * the last insn of the subprog should be either exit
1518 * or unconditional jump back
1519 */
1520 if (code != (BPF_JMP | BPF_EXIT) && code != (BPF_JMP | BPF_JA)) {
1521 verbose(env, "last insn is not an exit or jmp\n");
1522 return -EINVAL;
1523 }
1524 subprog_start = subprog_end;
1525 cur_subprog++;
1526 if (cur_subprog < env->subprog_cnt) {
1527 subprog_end = subprog[cur_subprog + 1].start;
1528 }
1529 }
1530 }
1531 return 0;
1532 }
1533
1534 /* Parentage chain of this register (or stack slot) should take care of all
1535 * issues like callee-saved registers, stack slot allocation time, etc.
1536 */
mark_reg_read(struct bpf_verifier_env *env, const struct bpf_reg_state *state, struct bpf_reg_state *parent, u8 flag)1537 static int mark_reg_read(struct bpf_verifier_env *env, const struct bpf_reg_state *state, struct bpf_reg_state *parent,
1538 u8 flag)
1539 {
1540 bool writes = parent == state->parent; /* Observe write marks */
1541 int cnt = 0;
1542
1543 while (parent) {
1544 /* if read wasn't screened by an earlier write ... */
1545 if (writes && (state->live & REG_LIVE_WRITTEN)) {
1546 break;
1547 }
1548 if (parent->live & REG_LIVE_DONE) {
1549 verbose(env, "verifier BUG type %s var_off %lld off %d\n", reg_type_str(env, parent->type),
1550 parent->var_off.value, parent->off);
1551 return -EFAULT;
1552 }
1553 /* The first condition is more likely to be true than the
1554 * second, checked it first.
1555 */
1556 if ((parent->live & REG_LIVE_READ) == flag || (parent->live & REG_LIVE_READ64)) {
1557 /* The parentage chain never changes and
1558 * this parent was already marked as LIVE_READ.
1559 * There is no need to keep walking the chain again and
1560 * keep re-marking all parents as LIVE_READ.
1561 * This case happens when the same register is read
1562 * multiple times without writes into it in-between.
1563 * Also, if parent has the stronger REG_LIVE_READ64 set,
1564 * then no need to set the weak REG_LIVE_READ32.
1565 */
1566 break;
1567 }
1568 /* ... then we depend on parent's value */
1569 parent->live |= flag;
1570 /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1571 if (flag == REG_LIVE_READ64) {
1572 parent->live &= ~REG_LIVE_READ32;
1573 }
1574 state = parent;
1575 parent = state->parent;
1576 writes = true;
1577 cnt++;
1578 }
1579
1580 if (env->longest_mark_read_walk < cnt) {
1581 env->longest_mark_read_walk = cnt;
1582 }
1583 return 0;
1584 }
1585
1586 /* This function is supposed to be used by the following 32-bit optimization
1587 * code only. It returns TRUE if the source or destination register operates
1588 * on 64-bit, otherwise return FALSE.
1589 */
is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)1590 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg,
1591 enum reg_arg_type t)
1592 {
1593 u8 code, class, op;
1594
1595 code = insn->code;
1596 class = BPF_CLASS(code);
1597 op = BPF_OP(code);
1598 if (class == BPF_JMP) {
1599 /* BPF_EXIT for "main" will reach here. Return TRUE
1600 * conservatively.
1601 */
1602 if (op == BPF_EXIT) {
1603 return true;
1604 }
1605 if (op == BPF_CALL) {
1606 /* BPF to BPF call will reach here because of marking
1607 * caller saved clobber with DST_OP_NO_MARK for which we
1608 * don't care the register def because they are anyway
1609 * marked as NOT_INIT already.
1610 */
1611 if (insn->src_reg == BPF_PSEUDO_CALL) {
1612 return false;
1613 }
1614 /* Helper call will reach here because of arg type
1615 * check, conservatively return TRUE.
1616 */
1617 if (t == SRC_OP) {
1618 return true;
1619 }
1620
1621 return false;
1622 }
1623 }
1624
1625 if (class == BPF_ALU64 || class == BPF_JMP ||
1626 /* BPF_END always use BPF_ALU class. */
1627 (class == BPF_ALU && op == BPF_END && insn->imm == VERIFIER_SIXTYFOUR)) {
1628 return true;
1629 }
1630
1631 if (class == BPF_ALU || class == BPF_JMP32) {
1632 return false;
1633 }
1634
1635 if (class == BPF_LDX) {
1636 if (t != SRC_OP) {
1637 return BPF_SIZE(code) == BPF_DW;
1638 }
1639 /* LDX source must be ptr. */
1640 return true;
1641 }
1642
1643 if (class == BPF_STX) {
1644 if (reg->type != SCALAR_VALUE) {
1645 return true;
1646 }
1647 return BPF_SIZE(code) == BPF_DW;
1648 }
1649
1650 if (class == BPF_LD) {
1651 u8 mode = BPF_MODE(code);
1652 /* LD_IMM64 */
1653 if (mode == BPF_IMM) {
1654 return true;
1655 }
1656
1657 /* Both LD_IND and LD_ABS return 32-bit data. */
1658 if (t != SRC_OP) {
1659 return false;
1660 }
1661
1662 /* Implicit ctx ptr. */
1663 if (regno == BPF_REG_6) {
1664 return true;
1665 }
1666
1667 /* Explicit source could be any width. */
1668 return true;
1669 }
1670
1671 if (class == BPF_ST) {
1672 /* The only source register for BPF_ST is a ptr. */
1673 return true;
1674 }
1675
1676 /* Conservatively return true at default. */
1677 return true;
1678 }
1679
1680 /* Return TRUE if INSN doesn't have explicit value define. */
insn_no_def(struct bpf_insn *insn)1681 static bool insn_no_def(struct bpf_insn *insn)
1682 {
1683 u8 class = BPF_CLASS(insn->code);
1684
1685 return (class == BPF_JMP || class == BPF_JMP32 || class == BPF_STX || class == BPF_ST);
1686 }
1687
1688 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)1689 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1690 {
1691 if (insn_no_def(insn)) {
1692 return false;
1693 }
1694
1695 return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1696 }
1697
mark_insn_zext(struct bpf_verifier_env *env, struct bpf_reg_state *reg)1698 static void mark_insn_zext(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1699 {
1700 s32 def_idx = reg->subreg_def;
1701
1702 if (def_idx == DEF_NOT_SUBREG) {
1703 return;
1704 }
1705
1706 env->insn_aux_data[def_idx - 1].zext_dst = true;
1707 /* The dst will be zero extended, so won't be sub-register anymore. */
1708 reg->subreg_def = DEF_NOT_SUBREG;
1709 }
1710
check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t)1711 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t)
1712 {
1713 struct bpf_verifier_state *vstate = env->cur_state;
1714 struct bpf_func_state *state = vstate->frame[vstate->curframe];
1715 struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1716 struct bpf_reg_state *reg, *regs = state->regs;
1717 bool rw64;
1718
1719 if (regno >= MAX_BPF_REG) {
1720 verbose(env, "R%d is invalid\n", regno);
1721 return -EINVAL;
1722 }
1723
1724 reg = ®s[regno];
1725 rw64 = is_reg64(env, insn, regno, reg, t);
1726 if (t == SRC_OP) {
1727 /* check whether register used as source operand can be read */
1728 if (reg->type == NOT_INIT) {
1729 verbose(env, "R%d !read_ok\n", regno);
1730 return -EACCES;
1731 }
1732 /* We don't need to worry about FP liveness because it's read-only */
1733 if (regno == BPF_REG_FP) {
1734 return 0;
1735 }
1736
1737 if (rw64) {
1738 mark_insn_zext(env, reg);
1739 }
1740
1741 return mark_reg_read(env, reg, reg->parent, rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1742 } else {
1743 /* check whether register used as dest operand can be written to */
1744 if (regno == BPF_REG_FP) {
1745 verbose(env, "frame pointer is read only\n");
1746 return -EACCES;
1747 }
1748 reg->live |= REG_LIVE_WRITTEN;
1749 reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1750 if (t == DST_OP) {
1751 mark_reg_unknown(env, regs, regno);
1752 }
1753 }
1754 return 0;
1755 }
1756
1757 /* for any branch, call, exit record the history of jmps in the given state */
push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur)1758 static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur)
1759 {
1760 u32 cnt = cur->jmp_history_cnt;
1761 struct bpf_idx_pair *p;
1762
1763 cnt++;
1764 p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1765 if (!p) {
1766 return -ENOMEM;
1767 }
1768 p[cnt - 1].idx = env->insn_idx;
1769 p[cnt - 1].prev_idx = env->prev_insn_idx;
1770 cur->jmp_history = p;
1771 cur->jmp_history_cnt = cnt;
1772 return 0;
1773 }
1774
1775 /* Backtrack one insn at a time. If idx is not at the top of recorded
1776 * history then previous instruction came from straight line execution.
1777 */
get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history)1778 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history)
1779 {
1780 u32 cnt = *history;
1781
1782 if (cnt && st->jmp_history[cnt - 1].idx == i) {
1783 i = st->jmp_history[cnt - 1].prev_idx;
1784 (*history)--;
1785 } else {
1786 i--;
1787 }
1788 return i;
1789 }
1790
1791 /* For given verifier state backtrack_insn() is called from the last insn to
1792 * the first insn. Its purpose is to compute a bitmask of registers and
1793 * stack slots that needs precision in the parent verifier state.
1794 */
backtrack_insn(struct bpf_verifier_env *env, int idx, u32 *reg_mask, u64 *stack_mask)1795 static int backtrack_insn(struct bpf_verifier_env *env, int idx, u32 *reg_mask, u64 *stack_mask)
1796 {
1797 const struct bpf_insn_cbs cbs = {
1798 .cb_print = verbose,
1799 .private_data = env,
1800 };
1801 struct bpf_insn *insn = env->prog->insnsi + idx;
1802 u8 class = BPF_CLASS(insn->code);
1803 u8 opcode = BPF_OP(insn->code);
1804 u8 mode = BPF_MODE(insn->code);
1805 u32 dreg = 1u << insn->dst_reg;
1806 u32 sreg = 1u << insn->src_reg;
1807 u32 spi;
1808
1809 if (insn->code == 0) {
1810 return 0;
1811 }
1812 if (env->log.level & BPF_LOG_LEVEL) {
1813 verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1814 verbose(env, "%d: ", idx);
1815 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1816 }
1817
1818 if (class == BPF_ALU || class == BPF_ALU64) {
1819 if (!(*reg_mask & dreg)) {
1820 return 0;
1821 }
1822 if (opcode == BPF_MOV) {
1823 if (BPF_SRC(insn->code) == BPF_X) {
1824 /* dreg = sreg
1825 * dreg needs precision after this insn
1826 * sreg needs precision before this insn
1827 */
1828 *reg_mask &= ~dreg;
1829 *reg_mask |= sreg;
1830 } else {
1831 /* dreg = K
1832 * dreg needs precision after this insn.
1833 * Corresponding register is already marked
1834 * as precise=true in this verifier state.
1835 * No further markings in parent are necessary
1836 */
1837 *reg_mask &= ~dreg;
1838 }
1839 } else {
1840 if (BPF_SRC(insn->code) == BPF_X) {
1841 /* dreg += sreg
1842 * both dreg and sreg need precision
1843 * before this insn
1844 */
1845 *reg_mask |= sreg;
1846 }
1847 /* else dreg += K
1848 * dreg still needs precision before this insn
1849 */
1850 }
1851 } else if (class == BPF_LDX) {
1852 if (!(*reg_mask & dreg)) {
1853 return 0;
1854 }
1855 *reg_mask &= ~dreg;
1856
1857 /* scalars can only be spilled into stack w/o losing precision.
1858 * Load from any other memory can be zero extended.
1859 * The desire to keep that precision is already indicated
1860 * by 'precise' mark in corresponding register of this state.
1861 * No further tracking necessary.
1862 */
1863 if (insn->src_reg != BPF_REG_FP) {
1864 return 0;
1865 }
1866 if (BPF_SIZE(insn->code) != BPF_DW) {
1867 return 0;
1868 }
1869
1870 /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1871 * that [fp - off] slot contains scalar that needs to be
1872 * tracked with precision
1873 */
1874 spi = (-insn->off - 1) / BPF_REG_SIZE;
1875 if (spi >= VERIFIER_SIXTYFOUR) {
1876 verbose(env, "BUG spi %d\n", spi);
1877 WARN_ONCE(1, "verifier backtracking bug");
1878 return -EFAULT;
1879 }
1880 *stack_mask |= 1ull << spi;
1881 } else if (class == BPF_STX || class == BPF_ST) {
1882 if (*reg_mask & dreg) {
1883 /* stx & st shouldn't be using _scalar_ dst_reg
1884 * to access memory. It means backtracking
1885 * encountered a case of pointer subtraction.
1886 */
1887 return -ENOTSUPP;
1888 }
1889 /* scalars can only be spilled into stack */
1890 if (insn->dst_reg != BPF_REG_FP) {
1891 return 0;
1892 }
1893 if (BPF_SIZE(insn->code) != BPF_DW) {
1894 return 0;
1895 }
1896 spi = (-insn->off - 1) / BPF_REG_SIZE;
1897 if (spi >= VERIFIER_SIXTYFOUR) {
1898 verbose(env, "BUG spi %d\n", spi);
1899 WARN_ONCE(1, "verifier backtracking bug");
1900 return -EFAULT;
1901 }
1902 if (!(*stack_mask & (1ull << spi))) {
1903 return 0;
1904 }
1905 *stack_mask &= ~(1ull << spi);
1906 if (class == BPF_STX) {
1907 *reg_mask |= sreg;
1908 }
1909 } else if (class == BPF_JMP || class == BPF_JMP32) {
1910 if (opcode == BPF_CALL) {
1911 if (insn->src_reg == BPF_PSEUDO_CALL) {
1912 return -ENOTSUPP;
1913 }
1914 /* regular helper call sets R0 */
1915 *reg_mask &= ~1;
1916 if (*reg_mask & 0x3f) {
1917 /* if backtracing was looking for registers R1-R5
1918 * they should have been found already.
1919 */
1920 verbose(env, "BUG regs %x\n", *reg_mask);
1921 WARN_ONCE(1, "verifier backtracking bug");
1922 return -EFAULT;
1923 }
1924 } else if (opcode == BPF_EXIT) {
1925 return -ENOTSUPP;
1926 }
1927 } else if (class == BPF_LD) {
1928 if (!(*reg_mask & dreg)) {
1929 return 0;
1930 }
1931 *reg_mask &= ~dreg;
1932 /* It's ld_imm64 or ld_abs or ld_ind.
1933 * For ld_imm64 no further tracking of precision
1934 * into parent is necessary
1935 */
1936 if (mode == BPF_IND || mode == BPF_ABS) {
1937 /* to be analyzed */
1938 return -ENOTSUPP;
1939 }
1940 }
1941 return 0;
1942 }
1943
1944 /* the scalar precision tracking algorithm:
1945 * . at the start all registers have precise=false.
1946 * . scalar ranges are tracked as normal through alu and jmp insns.
1947 * . once precise value of the scalar register is used in:
1948 * . ptr + scalar alu
1949 * . if (scalar cond K|scalar)
1950 * . helper_call(.., scalar, ...) where ARG_CONST is expected
1951 * backtrack through the verifier states and mark all registers and
1952 * stack slots with spilled constants that these scalar regisers
1953 * should be precise.
1954 * . during state pruning two registers (or spilled stack slots)
1955 * are equivalent if both are not precise.
1956 *
1957 * Note the verifier cannot simply walk register parentage chain,
1958 * since many different registers and stack slots could have been
1959 * used to compute single precise scalar.
1960 *
1961 * The approach of starting with precise=true for all registers and then
1962 * backtrack to mark a register as not precise when the verifier detects
1963 * that program doesn't care about specific value (e.g., when helper
1964 * takes register as ARG_ANYTHING parameter) is not safe.
1965 *
1966 * It's ok to walk single parentage chain of the verifier states.
1967 * It's possible that this backtracking will go all the way till 1st insn.
1968 * All other branches will be explored for needing precision later.
1969 *
1970 * The backtracking needs to deal with cases like:
1971 * R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1972 * r9 -= r8
1973 * r5 = r9
1974 * if r5 > 0x79f goto pc+7
1975 * R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1976 * r5 += 1
1977 * ...
1978 * call bpf_perf_event_output#25
1979 * where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1980 *
1981 * and this case:
1982 * r6 = 1
1983 * call foo // uses callee's r6 inside to compute r0
1984 * r0 += r6
1985 * if r0 == 0 goto
1986 *
1987 * to track above reg_mask/stack_mask needs to be independent for each frame.
1988 *
1989 * Also if parent's curframe > frame where backtracking started,
1990 * the verifier need to mark registers in both frames, otherwise callees
1991 * may incorrectly prune callers. This is similar to
1992 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1993 *
1994 * For now backtracking falls back into conservative marking.
1995 */
mark_all_scalars_precise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)1996 static void mark_all_scalars_precise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1997 {
1998 struct bpf_func_state *func;
1999 struct bpf_reg_state *reg;
2000 int i, j;
2001
2002 /* big hammer: mark all scalars precise in this path.
2003 * pop_stack may still get !precise scalars.
2004 */
2005 for (; st; st = st->parent) {
2006 for (i = 0; i <= st->curframe; i++) {
2007 func = st->frame[i];
2008 for (j = 0; j < BPF_REG_FP; j++) {
2009 reg = &func->regs[j];
2010 if (reg->type != SCALAR_VALUE) {
2011 continue;
2012 }
2013 reg->precise = true;
2014 }
2015 for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2016 if (func->stack[j].slot_type[0] != STACK_SPILL) {
2017 continue;
2018 }
2019 reg = &func->stack[j].spilled_ptr;
2020 if (reg->type != SCALAR_VALUE) {
2021 continue;
2022 }
2023 reg->precise = true;
2024 }
2025 }
2026 }
2027 }
2028
__mark_chain_precision(struct bpf_verifier_env *env, int regno, int spi)2029 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, int spi)
2030 {
2031 struct bpf_verifier_state *st = env->cur_state;
2032 int first_idx = st->first_insn_idx;
2033 int last_idx = env->insn_idx;
2034 struct bpf_func_state *func;
2035 struct bpf_reg_state *reg;
2036 u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2037 u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2038 bool skip_first = true;
2039 bool new_marks = false;
2040 int i, err;
2041
2042 if (!env->bpf_capable) {
2043 return 0;
2044 }
2045
2046 func = st->frame[st->curframe];
2047 if (regno >= 0) {
2048 reg = &func->regs[regno];
2049 if (reg->type != SCALAR_VALUE) {
2050 WARN_ONCE(1, "backtracing misuse");
2051 return -EFAULT;
2052 }
2053 if (!reg->precise) {
2054 new_marks = true;
2055 } else {
2056 reg_mask = 0;
2057 }
2058 reg->precise = true;
2059 }
2060
2061 while (spi >= 0) {
2062 if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2063 stack_mask = 0;
2064 break;
2065 }
2066 reg = &func->stack[spi].spilled_ptr;
2067 if (reg->type != SCALAR_VALUE) {
2068 stack_mask = 0;
2069 break;
2070 }
2071 if (!reg->precise) {
2072 new_marks = true;
2073 } else {
2074 stack_mask = 0;
2075 }
2076 reg->precise = true;
2077 break;
2078 }
2079
2080 if (!new_marks) {
2081 return 0;
2082 }
2083 if (!reg_mask && !stack_mask) {
2084 return 0;
2085 }
2086 for (;;) {
2087 DECLARE_BITMAP(mask, VERIFIER_SIXTYFOUR);
2088 u32 history = st->jmp_history_cnt;
2089
2090 if (env->log.level & BPF_LOG_LEVEL) {
2091 verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2092 }
2093 for (i = last_idx;;) {
2094 if (skip_first) {
2095 err = 0;
2096 skip_first = false;
2097 } else {
2098 err = backtrack_insn(env, i, ®_mask, &stack_mask);
2099 }
2100 if (err == -ENOTSUPP) {
2101 mark_all_scalars_precise(env, st);
2102 return 0;
2103 } else if (err) {
2104 return err;
2105 }
2106 if (!reg_mask && !stack_mask) {
2107 /* Found assignment(s) into tracked register in this state.
2108 * Since this state is already marked, just return.
2109 * Nothing to be tracked further in the parent state.
2110 */
2111 return 0;
2112 }
2113 if (i == first_idx) {
2114 break;
2115 }
2116 i = get_prev_insn_idx(st, i, &history);
2117 if (i >= env->prog->len) {
2118 /* This can happen if backtracking reached insn 0
2119 * and there are still reg_mask or stack_mask
2120 * to backtrack.
2121 * It means the backtracking missed the spot where
2122 * particular register was initialized with a constant.
2123 */
2124 verbose(env, "BUG backtracking idx %d\n", i);
2125 WARN_ONCE(1, "verifier backtracking bug");
2126 return -EFAULT;
2127 }
2128 }
2129 st = st->parent;
2130 if (!st) {
2131 break;
2132 }
2133
2134 new_marks = false;
2135 func = st->frame[st->curframe];
2136 bitmap_from_u64(mask, reg_mask);
2137 for_each_set_bit(i, mask, 0x20)
2138 {
2139 reg = &func->regs[i];
2140 if (reg->type != SCALAR_VALUE) {
2141 reg_mask &= ~(1u << i);
2142 continue;
2143 }
2144 if (!reg->precise) {
2145 new_marks = true;
2146 }
2147 reg->precise = true;
2148 }
2149
2150 bitmap_from_u64(mask, stack_mask);
2151 for_each_set_bit(i, mask, VERIFIER_SIXTYFOUR)
2152 {
2153 if (i >= func->allocated_stack / BPF_REG_SIZE) {
2154 /* the sequence of instructions:
2155 * 2: (bf) r3 = r10
2156 * 3: (7b) *(u64 *)(r3 -8) = r0
2157 * 4: (79) r4 = *(u64 *)(r10 -8)
2158 * doesn't contain jmps. It's backtracked
2159 * as a single block.
2160 * During backtracking insn 3 is not recognized as
2161 * stack access, so at the end of backtracking
2162 * stack slot fp-8 is still marked in stack_mask.
2163 * However the parent state may not have accessed
2164 * fp-8 and it's "unallocated" stack space.
2165 * In such case fallback to conservative.
2166 */
2167 mark_all_scalars_precise(env, st);
2168 return 0;
2169 }
2170
2171 if (func->stack[i].slot_type[0] != STACK_SPILL) {
2172 stack_mask &= ~(1ull << i);
2173 continue;
2174 }
2175 reg = &func->stack[i].spilled_ptr;
2176 if (reg->type != SCALAR_VALUE) {
2177 stack_mask &= ~(1ull << i);
2178 continue;
2179 }
2180 if (!reg->precise) {
2181 new_marks = true;
2182 }
2183 reg->precise = true;
2184 }
2185 if (env->log.level & BPF_LOG_LEVEL) {
2186 print_verifier_state(env, func);
2187 verbose(env, "parent %s regs=%x stack=%llx marks\n", new_marks ? "didn't have" : "already had", reg_mask,
2188 stack_mask);
2189 }
2190
2191 if (!reg_mask && !stack_mask) {
2192 break;
2193 }
2194 if (!new_marks) {
2195 break;
2196 }
2197
2198 last_idx = st->last_insn_idx;
2199 first_idx = st->first_insn_idx;
2200 }
2201 return 0;
2202 }
2203
mark_chain_precision(struct bpf_verifier_env *env, int regno)2204 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2205 {
2206 return __mark_chain_precision(env, regno, -1);
2207 }
2208
mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)2209 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2210 {
2211 return __mark_chain_precision(env, -1, spi);
2212 }
2213
is_spillable_regtype(enum bpf_reg_type type)2214 static bool is_spillable_regtype(enum bpf_reg_type type)
2215 {
2216 switch (base_type(type)) {
2217 case PTR_TO_MAP_VALUE:
2218 case PTR_TO_STACK:
2219 case PTR_TO_CTX:
2220 case PTR_TO_PACKET:
2221 case PTR_TO_PACKET_META:
2222 case PTR_TO_PACKET_END:
2223 case PTR_TO_FLOW_KEYS:
2224 case CONST_PTR_TO_MAP:
2225 case PTR_TO_SOCKET:
2226 case PTR_TO_SOCK_COMMON:
2227 case PTR_TO_TCP_SOCK:
2228 case PTR_TO_XDP_SOCK:
2229 case PTR_TO_BTF_ID:
2230 case PTR_TO_BUF:
2231 case PTR_TO_PERCPU_BTF_ID:
2232 case PTR_TO_MEM:
2233 return true;
2234 default:
2235 return false;
2236 }
2237 }
2238
2239 /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state *reg)2240 static bool register_is_null(struct bpf_reg_state *reg)
2241 {
2242 return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2243 }
2244
register_is_const(struct bpf_reg_state *reg)2245 static bool register_is_const(struct bpf_reg_state *reg)
2246 {
2247 return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2248 }
2249
__is_scalar_unbounded(struct bpf_reg_state *reg)2250 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2251 {
2252 return tnum_is_unknown(reg->var_off) && reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2253 reg->umin_value == 0 && reg->umax_value == U64_MAX && reg->s32_min_value == S32_MIN &&
2254 reg->s32_max_value == S32_MAX && reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2255 }
2256
register_is_bounded(struct bpf_reg_state *reg)2257 static bool register_is_bounded(struct bpf_reg_state *reg)
2258 {
2259 return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2260 }
2261
__is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg)2262 static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg)
2263 {
2264 if (allow_ptr_leaks) {
2265 return false;
2266 }
2267
2268 return reg->type != SCALAR_VALUE;
2269 }
2270
save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg)2271 static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg)
2272 {
2273 int i;
2274
2275 state->stack[spi].spilled_ptr = *reg;
2276 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2277
2278 for (i = 0; i < BPF_REG_SIZE; i++) {
2279 state->stack[spi].slot_type[i] = STACK_SPILL;
2280 }
2281 }
2282
2283 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2284 * stack boundary and alignment are checked in check_mem_access()
2285 */
check_stack_write_fixed_off(struct bpf_verifier_env *env, struct bpf_func_state *state, int off, int size, int value_regno, int insn_idx)2286 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2287 /* stack frame we're writing to */
2288 struct bpf_func_state *state, int off, int size, int value_regno, int insn_idx)
2289 {
2290 struct bpf_func_state *cur; /* state of the current function */
2291 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2292 u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2293 struct bpf_reg_state *reg = NULL;
2294
2295 err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), state->acquired_refs, true);
2296 if (err) {
2297 return err;
2298 }
2299 /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2300 * so it's aligned access and [off, off + size) are within stack limits
2301 */
2302 if (!env->allow_ptr_leaks && state->stack[spi].slot_type[0] == STACK_SPILL && size != BPF_REG_SIZE) {
2303 verbose(env, "attempt to corrupt spilled pointer on stack\n");
2304 return -EACCES;
2305 }
2306
2307 cur = env->cur_state->frame[env->cur_state->curframe];
2308 if (value_regno >= 0) {
2309 reg = &cur->regs[value_regno];
2310 }
2311 if (!env->bypass_spec_v4) {
2312 bool sanitize = reg && is_spillable_regtype(reg->type);
2313
2314 for (i = 0; i < size; i++) {
2315 if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2316 sanitize = true;
2317 break;
2318 }
2319 }
2320
2321 if (sanitize) {
2322 env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2323 }
2324 }
2325
2326 if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) {
2327 if (dst_reg != BPF_REG_FP) {
2328 /* The backtracking logic can only recognize explicit
2329 * stack slot address like [fp - 8]. Other spill of
2330 * scalar via different register has to be conervative.
2331 * Backtrack from here and mark all registers as precise
2332 * that contributed into 'reg' being a constant.
2333 */
2334 err = mark_chain_precision(env, value_regno);
2335 if (err) {
2336 return err;
2337 }
2338 }
2339 save_register_state(state, spi, reg);
2340 } else if (reg && is_spillable_regtype(reg->type)) {
2341 /* register containing pointer is being spilled into stack */
2342 if (size != BPF_REG_SIZE) {
2343 verbose_linfo(env, insn_idx, "; ");
2344 verbose(env, "invalid size of register spill\n");
2345 return -EACCES;
2346 }
2347 if (state != cur && reg->type == PTR_TO_STACK) {
2348 verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2349 return -EINVAL;
2350 }
2351 save_register_state(state, spi, reg);
2352 } else {
2353 u8 type = STACK_MISC;
2354
2355 /* regular write of data into stack destroys any spilled ptr */
2356 state->stack[spi].spilled_ptr.type = NOT_INIT;
2357 /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2358 if (state->stack[spi].slot_type[0] == STACK_SPILL) {
2359 for (i = 0; i < BPF_REG_SIZE; i++) {
2360 state->stack[spi].slot_type[i] = STACK_MISC;
2361 }
2362 }
2363
2364 /* only mark the slot as written if all 8 bytes were written
2365 * otherwise read propagation may incorrectly stop too soon
2366 * when stack slots are partially written.
2367 * This heuristic means that read propagation will be
2368 * conservative, since it will add reg_live_read marks
2369 * to stack slots all the way to first state when programs
2370 * writes+reads less than 8 bytes
2371 */
2372 if (size == BPF_REG_SIZE) {
2373 state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2374 }
2375
2376 /* when we zero initialize stack slots mark them as such */
2377 if (reg && register_is_null(reg)) {
2378 /* backtracking doesn't work for STACK_ZERO yet. */
2379 err = mark_chain_precision(env, value_regno);
2380 if (err) {
2381 return err;
2382 }
2383 type = STACK_ZERO;
2384 }
2385
2386 /* Mark slots affected by this stack write. */
2387 for (i = 0; i < size; i++) {
2388 state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
2389 }
2390 }
2391 return 0;
2392 }
2393
2394 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2395 * known to contain a variable offset.
2396 * This function checks whether the write is permitted and conservatively
2397 * tracks the effects of the write, considering that each stack slot in the
2398 * dynamic range is potentially written to.
2399 *
2400 * 'off' includes 'regno->off'.
2401 * 'value_regno' can be -1, meaning that an unknown value is being written to
2402 * the stack.
2403 *
2404 * Spilled pointers in range are not marked as written because we don't know
2405 * what's going to be actually written. This means that read propagation for
2406 * future reads cannot be terminated by this write.
2407 *
2408 * For privileged programs, uninitialized stack slots are considered
2409 * initialized by this write (even though we don't know exactly what offsets
2410 * are going to be written to). The idea is that we don't want the verifier to
2411 * reject future reads that access slots written to through variable offsets.
2412 */
check_stack_write_var_off(struct bpf_verifier_env *env, struct bpf_func_state *state, int ptr_regno, int off, int size, int value_regno, int insn_idx)2413 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2414 /* func where register points to */
2415 struct bpf_func_state *state, int ptr_regno, int off, int size, int value_regno,
2416 int insn_idx)
2417 {
2418 struct bpf_func_state *cur; /* state of the current function */
2419 int min_off, max_off;
2420 int i, err;
2421 struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2422 bool writing_zero = false;
2423 /* set if the fact that we're writing a zero is used to let any
2424 * stack slots remain STACK_ZERO
2425 */
2426 bool zero_used = false;
2427
2428 cur = env->cur_state->frame[env->cur_state->curframe];
2429 ptr_reg = &cur->regs[ptr_regno];
2430 min_off = ptr_reg->smin_value + off;
2431 max_off = ptr_reg->smax_value + off + size;
2432 if (value_regno >= 0) {
2433 value_reg = &cur->regs[value_regno];
2434 }
2435 if (value_reg && register_is_null(value_reg)) {
2436 writing_zero = true;
2437 }
2438
2439 err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), state->acquired_refs, true);
2440 if (err) {
2441 return err;
2442 }
2443
2444 /* Variable offset writes destroy any spilled pointers in range. */
2445 for (i = min_off; i < max_off; i++) {
2446 u8 new_type, *stype;
2447 int slot, spi;
2448
2449 slot = -i - 1;
2450 spi = slot / BPF_REG_SIZE;
2451 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2452
2453 if (!env->allow_ptr_leaks && *stype != NOT_INIT && *stype != SCALAR_VALUE) {
2454 /* Reject the write if there's are spilled pointers in
2455 * range. If we didn't reject here, the ptr status
2456 * would be erased below (even though not all slots are
2457 * actually overwritten), possibly opening the door to
2458 * leaks.
2459 */
2460 verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", insn_idx, i);
2461 return -EINVAL;
2462 }
2463
2464 /* Erase all spilled pointers. */
2465 state->stack[spi].spilled_ptr.type = NOT_INIT;
2466
2467 /* Update the slot type. */
2468 new_type = STACK_MISC;
2469 if (writing_zero && *stype == STACK_ZERO) {
2470 new_type = STACK_ZERO;
2471 zero_used = true;
2472 }
2473 /* If the slot is STACK_INVALID, we check whether it's OK to
2474 * pretend that it will be initialized by this write. The slot
2475 * might not actually be written to, and so if we mark it as
2476 * initialized future reads might leak uninitialized memory.
2477 * For privileged programs, we will accept such reads to slots
2478 * that may or may not be written because, if we're reject
2479 * them, the error would be too confusing.
2480 */
2481 if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2482 verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", insn_idx,
2483 i);
2484 return -EINVAL;
2485 }
2486 *stype = new_type;
2487 }
2488 if (zero_used) {
2489 /* backtracking doesn't work for STACK_ZERO yet. */
2490 err = mark_chain_precision(env, value_regno);
2491 if (err) {
2492 return err;
2493 }
2494 }
2495 return 0;
2496 }
2497
2498 /* When register 'dst_regno' is assigned some values from stack[min_off,
2499 * max_off), we set the register's type according to the types of the
2500 * respective stack slots. If all the stack values are known to be zeros, then
2501 * so is the destination reg. Otherwise, the register is considered to be
2502 * SCALAR. This function does not deal with register filling; the caller must
2503 * ensure that all spilled registers in the stack range have been marked as
2504 * read.
2505 */
mark_reg_stack_read(struct bpf_verifier_env *env, struct bpf_func_state *ptr_state, int min_off, int max_off, int dst_regno)2506 static void mark_reg_stack_read(struct bpf_verifier_env *env,
2507 /* func where src register points to */
2508 struct bpf_func_state *ptr_state, int min_off, int max_off, int dst_regno)
2509 {
2510 struct bpf_verifier_state *vstate = env->cur_state;
2511 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2512 int i, slot, spi;
2513 u8 *stype;
2514 int zeros = 0;
2515
2516 for (i = min_off; i < max_off; i++) {
2517 slot = -i - 1;
2518 spi = slot / BPF_REG_SIZE;
2519 stype = ptr_state->stack[spi].slot_type;
2520 if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) {
2521 break;
2522 }
2523 zeros++;
2524 }
2525 if (zeros == max_off - min_off) {
2526 /* any access_size read into register is zero extended,
2527 * so the whole register == const_zero
2528 */
2529 verifier_mark_reg_const_zero(&state->regs[dst_regno]);
2530 /* backtracking doesn't support STACK_ZERO yet,
2531 * so mark it precise here, so that later
2532 * backtracking can stop here.
2533 * Backtracking may not need this if this register
2534 * doesn't participate in pointer adjustment.
2535 * Forward propagation of precise flag is not
2536 * necessary either. This mark is only to stop
2537 * backtracking. Any register that contributed
2538 * to const 0 was marked precise before spill.
2539 */
2540 state->regs[dst_regno].precise = true;
2541 } else {
2542 /* have read misc data from the stack */
2543 mark_reg_unknown(env, state->regs, dst_regno);
2544 }
2545 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2546 }
2547
2548 /* Read the stack at 'off' and put the results into the register indicated by
2549 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2550 * spilled reg.
2551 *
2552 * 'dst_regno' can be -1, meaning that the read value is not going to a
2553 * register.
2554 *
2555 * The access is assumed to be within the current stack bounds.
2556 */
check_stack_read_fixed_off(struct bpf_verifier_env *env, struct bpf_func_state *reg_state, int off, int size, int dst_regno)2557 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2558 /* func where src register points to */
2559 struct bpf_func_state *reg_state, int off, int size, int dst_regno)
2560 {
2561 struct bpf_verifier_state *vstate = env->cur_state;
2562 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2563 int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2564 struct bpf_reg_state *reg;
2565 u8 *stype;
2566
2567 stype = reg_state->stack[spi].slot_type;
2568 reg = ®_state->stack[spi].spilled_ptr;
2569
2570 if (stype[0] == STACK_SPILL) {
2571 if (size != BPF_REG_SIZE) {
2572 if (reg->type != SCALAR_VALUE) {
2573 verbose_linfo(env, env->insn_idx, "; ");
2574 verbose(env, "invalid size of register fill\n");
2575 return -EACCES;
2576 }
2577 if (dst_regno >= 0) {
2578 mark_reg_unknown(env, state->regs, dst_regno);
2579 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2580 }
2581 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2582 return 0;
2583 }
2584 for (i = 1; i < BPF_REG_SIZE; i++) {
2585 if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2586 verbose(env, "corrupted spill memory\n");
2587 return -EACCES;
2588 }
2589 }
2590
2591 if (dst_regno >= 0) {
2592 /* restore register state from stack */
2593 state->regs[dst_regno] = *reg;
2594 /* mark reg as written since spilled pointer state likely
2595 * has its liveness marks cleared by is_state_visited()
2596 * which resets stack/reg liveness for state transitions
2597 */
2598 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2599 } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2600 /* If dst_regno==-1, the caller is asking us whether
2601 * it is acceptable to use this value as a SCALAR_VALUE
2602 * (e.g. for XADD).
2603 * We must not allow unprivileged callers to do that
2604 * with spilled pointers.
2605 */
2606 verbose(env, "leaking pointer from stack off %d\n", off);
2607 return -EACCES;
2608 }
2609 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2610 } else {
2611 u8 type;
2612
2613 for (i = 0; i < size; i++) {
2614 type = stype[(slot - i) % BPF_REG_SIZE];
2615 if (type == STACK_MISC) {
2616 continue;
2617 }
2618 if (type == STACK_ZERO) {
2619 continue;
2620 }
2621 verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size);
2622 return -EACCES;
2623 }
2624 mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2625 if (dst_regno >= 0) {
2626 mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
2627 }
2628 }
2629 return 0;
2630 }
2631
2632 enum stack_access_src {
2633 ACCESS_DIRECT = 1, /* the access is performed by an instruction */
2634 ACCESS_HELPER = 2, /* the access is performed by a helper */
2635 };
2636
2637 static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size,
2638 bool zero_size_allowed, enum stack_access_src type,
2639 struct bpf_call_arg_meta *meta);
2640
reg_state(struct bpf_verifier_env *env, int regno)2641 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2642 {
2643 return cur_regs(env) + regno;
2644 }
2645
2646 /* Read the stack at 'ptr_regno + off' and put the result into the register
2647 * 'dst_regno'.
2648 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2649 * but not its variable offset.
2650 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2651 *
2652 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2653 * filling registers (i.e. reads of spilled register cannot be detected when
2654 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2655 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2656 * offset; for a fixed offset check_stack_read_fixed_off should be used
2657 * instead.
2658 */
check_stack_read_var_off(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)2659 static int check_stack_read_var_off(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)
2660 {
2661 /* The state of the source register. */
2662 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2663 struct bpf_func_state *ptr_state = func(env, reg);
2664 int err;
2665 int min_off, max_off;
2666
2667 /* Note that we pass a NULL meta, so raw access will not be permitted.
2668 */
2669 err = check_stack_range_initialized(env, ptr_regno, off, size, false, ACCESS_DIRECT, NULL);
2670 if (err) {
2671 return err;
2672 }
2673
2674 min_off = reg->smin_value + off;
2675 max_off = reg->smax_value + off;
2676 mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2677 return 0;
2678 }
2679
2680 /* check_stack_read dispatches to check_stack_read_fixed_off or
2681 * check_stack_read_var_off.
2682 *
2683 * The caller must ensure that the offset falls within the allocated stack
2684 * bounds.
2685 *
2686 * 'dst_regno' is a register which will receive the value from the stack. It
2687 * can be -1, meaning that the read value is not going to a register.
2688 */
check_stack_read(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)2689 static int check_stack_read(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)
2690 {
2691 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2692 struct bpf_func_state *state = func(env, reg);
2693 int err;
2694 /* Some accesses are only permitted with a static offset. */
2695 bool var_off = !tnum_is_const(reg->var_off);
2696 /* The offset is required to be static when reads don't go to a
2697 * register, in order to not leak pointers (see
2698 * check_stack_read_fixed_off).
2699 */
2700 if (dst_regno < 0 && var_off) {
2701 char tn_buf[48];
2702
2703 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2704 verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
2705 tn_buf, off, size);
2706 return -EACCES;
2707 }
2708 /* Variable offset is prohibited for unprivileged mode for simplicity
2709 * since it requires corresponding support in Spectre masking for stack
2710 * ALU. See also retrieve_ptr_limit().
2711 */
2712 if (!env->bypass_spec_v1 && var_off) {
2713 char tn_buf[48];
2714
2715 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2716 verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", ptr_regno, tn_buf);
2717 return -EACCES;
2718 }
2719
2720 if (!var_off) {
2721 off += reg->var_off.value;
2722 err = check_stack_read_fixed_off(env, state, off, size, dst_regno);
2723 } else {
2724 /* Variable offset stack reads need more conservative handling
2725 * than fixed offset ones. Note that dst_regno >= 0 on this
2726 * branch.
2727 */
2728 err = check_stack_read_var_off(env, ptr_regno, off, size, dst_regno);
2729 }
2730 return err;
2731 }
2732
2733 /* check_stack_write dispatches to check_stack_write_fixed_off or
2734 * check_stack_write_var_off.
2735 *
2736 * 'ptr_regno' is the register used as a pointer into the stack.
2737 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2738 * 'value_regno' is the register whose value we're writing to the stack. It can
2739 * be -1, meaning that we're not writing from a register.
2740 *
2741 * The caller must ensure that the offset falls within the maximum stack size.
2742 */
check_stack_write(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int value_regno, int insn_idx)2743 static int check_stack_write(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int value_regno,
2744 int insn_idx)
2745 {
2746 struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2747 struct bpf_func_state *state = func(env, reg);
2748 int err;
2749
2750 if (tnum_is_const(reg->var_off)) {
2751 off += reg->var_off.value;
2752 err = check_stack_write_fixed_off(env, state, off, size, value_regno, insn_idx);
2753 } else {
2754 /* Variable offset stack reads need more conservative handling
2755 * than fixed offset ones.
2756 */
2757 err = check_stack_write_var_off(env, state, ptr_regno, off, size, value_regno, insn_idx);
2758 }
2759 return err;
2760 }
2761
check_map_access_type(struct bpf_verifier_env *env, u32 regno, int off, int size, enum bpf_access_type type)2762 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, int off, int size, enum bpf_access_type type)
2763 {
2764 struct bpf_reg_state *regs = cur_regs(env);
2765 struct bpf_map *map = regs[regno].map_ptr;
2766 u32 cap = bpf_map_flags_to_cap(map);
2767 if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2768 verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", map->value_size, off, size);
2769 return -EACCES;
2770 }
2771 if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2772 verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", map->value_size, off, size);
2773 return -EACCES;
2774 }
2775
2776 return 0;
2777 }
2778
2779 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env *env, int regno, int off, int size, u32 mem_size, bool zero_size_allowed)2780 static int __check_mem_access(struct bpf_verifier_env *env, int regno, int off, int size, u32 mem_size,
2781 bool zero_size_allowed)
2782 {
2783 bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2784 struct bpf_reg_state *reg;
2785
2786 if (off >= 0 && size_ok && (u64)off + size <= mem_size) {
2787 return 0;
2788 }
2789
2790 reg = &cur_regs(env)[regno];
2791 switch (reg->type) {
2792 case PTR_TO_MAP_VALUE:
2793 verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size);
2794 break;
2795 case PTR_TO_PACKET:
2796 case PTR_TO_PACKET_META:
2797 case PTR_TO_PACKET_END:
2798 verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno,
2799 reg->id, off, mem_size);
2800 break;
2801 case PTR_TO_MEM:
2802 default:
2803 verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", mem_size, off, size);
2804 }
2805
2806 return -EACCES;
2807 }
2808
2809 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env *env, u32 regno, int off, int size, u32 mem_size, bool zero_size_allowed)2810 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, int off, int size, u32 mem_size,
2811 bool zero_size_allowed)
2812 {
2813 struct bpf_verifier_state *vstate = env->cur_state;
2814 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2815 struct bpf_reg_state *reg = &state->regs[regno];
2816 int err;
2817
2818 /* We may have adjusted the register pointing to memory region, so we
2819 * need to try adding each of min_value and max_value to off
2820 * to make sure our theoretical access will be safe.
2821 */
2822 if (env->log.level & BPF_LOG_LEVEL) {
2823 print_verifier_state(env, state);
2824 }
2825
2826 /* The minimum value is only important with signed
2827 * comparisons where we can't assume the floor of a
2828 * value is 0. If we are using signed variables for our
2829 * index'es we need to make sure that whatever we use
2830 * will have a set floor within our range.
2831 */
2832 if (reg->smin_value < 0 &&
2833 (reg->smin_value == S64_MIN || (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2834 reg->smin_value + off < 0)) {
2835 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
2836 return -EACCES;
2837 }
2838 err = __check_mem_access(env, regno, reg->smin_value + off, size, mem_size, zero_size_allowed);
2839 if (err) {
2840 verbose(env, "R%d min value is outside of the allowed memory range\n", regno);
2841 return err;
2842 }
2843
2844 /* If we haven't set a max value then we need to bail since we can't be
2845 * sure we won't do bad things.
2846 * If reg->umax_value + off could overflow, treat that as unbounded too.
2847 */
2848 if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2849 verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", regno);
2850 return -EACCES;
2851 }
2852 err = __check_mem_access(env, regno, reg->umax_value + off, size, mem_size, zero_size_allowed);
2853 if (err) {
2854 verbose(env, "R%d max value is outside of the allowed memory range\n", regno);
2855 return err;
2856 }
2857
2858 return 0;
2859 }
2860
2861 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)2862 static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)
2863 {
2864 struct bpf_verifier_state *vstate = env->cur_state;
2865 struct bpf_func_state *state = vstate->frame[vstate->curframe];
2866 struct bpf_reg_state *reg = &state->regs[regno];
2867 struct bpf_map *map = reg->map_ptr;
2868 int err;
2869
2870 err = check_mem_region_access(env, regno, off, size, map->value_size, zero_size_allowed);
2871 if (err) {
2872 return err;
2873 }
2874
2875 if (map_value_has_spin_lock(map)) {
2876 u32 lock = map->spin_lock_off;
2877
2878 /* if any part of struct bpf_spin_lock can be touched by
2879 * load/store reject this program.
2880 * To check that [x1, x2) overlaps with [y1, y2)
2881 * it is sufficient to check x1 < y2 && y1 < x2.
2882 */
2883 if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && lock < reg->umax_value + off + size) {
2884 verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2885 return -EACCES;
2886 }
2887 }
2888 return err;
2889 }
2890
2891 #define MAX_PACKET_OFF 0xffff
2892
resolve_prog_type(struct bpf_prog *prog)2893 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2894 {
2895 return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2896 }
2897
may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t)2898 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta,
2899 enum bpf_access_type t)
2900 {
2901 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2902
2903 switch (prog_type) {
2904 /* Program types only with direct read access go here! */
2905 case BPF_PROG_TYPE_LWT_IN:
2906 case BPF_PROG_TYPE_LWT_OUT:
2907 case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2908 case BPF_PROG_TYPE_SK_REUSEPORT:
2909 case BPF_PROG_TYPE_FLOW_DISSECTOR:
2910 case BPF_PROG_TYPE_CGROUP_SKB:
2911 if (t == BPF_WRITE) {
2912 return false;
2913 }
2914 fallthrough;
2915
2916 /* Program types with direct read + write access go here! */
2917 case BPF_PROG_TYPE_SCHED_CLS:
2918 case BPF_PROG_TYPE_SCHED_ACT:
2919 case BPF_PROG_TYPE_XDP:
2920 case BPF_PROG_TYPE_LWT_XMIT:
2921 case BPF_PROG_TYPE_SK_SKB:
2922 case BPF_PROG_TYPE_SK_MSG:
2923 if (meta) {
2924 return meta->pkt_access;
2925 }
2926
2927 env->seen_direct_write = true;
2928 return true;
2929
2930 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2931 if (t == BPF_WRITE) {
2932 env->seen_direct_write = true;
2933 }
2934
2935 return true;
2936
2937 default:
2938 return false;
2939 }
2940 }
2941
check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)2942 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)
2943 {
2944 struct bpf_reg_state *regs = cur_regs(env);
2945 struct bpf_reg_state *reg = ®s[regno];
2946 int err;
2947
2948 /* We may have added a variable offset to the packet pointer; but any
2949 * reg->range we have comes after that. We are only checking the fixed
2950 * offset.
2951 */
2952
2953 /* We don't allow negative numbers, because we aren't tracking enough
2954 * detail to prove they're safe.
2955 */
2956 if (reg->smin_value < 0) {
2957 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
2958 return -EACCES;
2959 }
2960 err = __check_mem_access(env, regno, off, size, reg->range, zero_size_allowed);
2961 if (err) {
2962 verbose(env, "R%d offset is outside of the packet\n", regno);
2963 return err;
2964 }
2965
2966 /* __check_mem_access has made sure "off + size - 1" is within u16.
2967 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2968 * otherwise find_good_pkt_pointers would have refused to set range info
2969 * that __check_mem_access would have rejected this pkt access.
2970 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2971 */
2972 env->prog->aux->max_pkt_offset = max_t(u32, env->prog->aux->max_pkt_offset, off + reg->umax_value + size - 1);
2973
2974 return err;
2975 }
2976
2977 /* check access to 'struct bpf_context' fields. Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type, u32 *btf_id)2978 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t,
2979 enum bpf_reg_type *reg_type, u32 *btf_id)
2980 {
2981 struct bpf_insn_access_aux info = {
2982 .reg_type = *reg_type,
2983 .log = &env->log,
2984 };
2985
2986 if (env->ops->is_valid_access && env->ops->is_valid_access(off, size, t, env->prog, &info)) {
2987 /* A non zero info.ctx_field_size indicates that this field is a
2988 * candidate for later verifier transformation to load the whole
2989 * field and then apply a mask when accessed with a narrower
2990 * access than actual ctx access size. A zero info.ctx_field_size
2991 * will only allow for whole field access and rejects any other
2992 * type of narrower access.
2993 */
2994 *reg_type = info.reg_type;
2995
2996 if (base_type(*reg_type) == PTR_TO_BTF_ID) {
2997 *btf_id = info.btf_id;
2998 } else {
2999 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3000 }
3001 /* remember the offset of last byte accessed in ctx */
3002 if (env->prog->aux->max_ctx_offset < off + size) {
3003 env->prog->aux->max_ctx_offset = off + size;
3004 }
3005 return 0;
3006 }
3007
3008 verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3009 return -EACCES;
3010 }
3011
check_flow_keys_access(struct bpf_verifier_env *env, int off, int size)3012 static int check_flow_keys_access(struct bpf_verifier_env *env, int off, int size)
3013 {
3014 if (size < 0 || off < 0 || (u64)off + size > sizeof(struct bpf_flow_keys)) {
3015 verbose(env, "invalid access to flow keys off=%d size=%d\n", off, size);
3016 return -EACCES;
3017 }
3018 return 0;
3019 }
3020
check_sock_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int size, enum bpf_access_type t)3021 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int size,
3022 enum bpf_access_type t)
3023 {
3024 struct bpf_reg_state *regs = cur_regs(env);
3025 struct bpf_reg_state *reg = ®s[regno];
3026 struct bpf_insn_access_aux info = {};
3027 bool valid;
3028
3029 if (reg->smin_value < 0) {
3030 verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
3031 return -EACCES;
3032 }
3033
3034 switch (reg->type) {
3035 case PTR_TO_SOCK_COMMON:
3036 valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3037 break;
3038 case PTR_TO_SOCKET:
3039 valid = bpf_sock_is_valid_access(off, size, t, &info);
3040 break;
3041 case PTR_TO_TCP_SOCK:
3042 valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3043 break;
3044 case PTR_TO_XDP_SOCK:
3045 valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3046 break;
3047 default:
3048 valid = false;
3049 }
3050
3051 if (valid) {
3052 env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3053 return 0;
3054 }
3055
3056 verbose(env, "R%d invalid %s access off=%d size=%d\n", regno, reg_type_str(env, reg->type), off, size);
3057
3058 return -EACCES;
3059 }
3060
is_pointer_value(struct bpf_verifier_env *env, int regno)3061 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3062 {
3063 return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3064 }
3065
is_ctx_reg(struct bpf_verifier_env *env, int regno)3066 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3067 {
3068 const struct bpf_reg_state *reg = reg_state(env, regno);
3069
3070 return reg->type == PTR_TO_CTX;
3071 }
3072
is_sk_reg(struct bpf_verifier_env *env, int regno)3073 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3074 {
3075 const struct bpf_reg_state *reg = reg_state(env, regno);
3076
3077 return type_is_sk_pointer(reg->type);
3078 }
3079
is_pkt_reg(struct bpf_verifier_env *env, int regno)3080 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3081 {
3082 const struct bpf_reg_state *reg = reg_state(env, regno);
3083
3084 return type_is_pkt_pointer(reg->type);
3085 }
3086
is_flow_key_reg(struct bpf_verifier_env *env, int regno)3087 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3088 {
3089 const struct bpf_reg_state *reg = reg_state(env, regno);
3090
3091 /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3092 return reg->type == PTR_TO_FLOW_KEYS;
3093 }
3094
check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict)3095 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size,
3096 bool strict)
3097 {
3098 struct tnum reg_off;
3099 int ip_align;
3100
3101 /* Byte size accesses are always allowed. */
3102 if (!strict || size == 1) {
3103 return 0;
3104 }
3105
3106 /* For platforms that do not have a Kconfig enabling
3107 * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3108 * NET_IP_ALIGN is universally set to '2'. And on platforms
3109 * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3110 * to this code only in strict mode where we want to emulate
3111 * the NET_IP_ALIGN==2 checking. Therefore use an
3112 * unconditional IP align value of '2'.
3113 */
3114 ip_align = 2;
3115
3116 reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3117 if (!tnum_is_aligned(reg_off, size)) {
3118 char tn_buf[48];
3119
3120 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3121 verbose(env, "misaligned packet access off %d+%s+%d+%d size %d\n", ip_align, tn_buf, reg->off, off, size);
3122 return -EACCES;
3123 }
3124
3125 return 0;
3126 }
3127
check_generic_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, const char *pointer_desc, int off, int size, bool strict)3128 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg,
3129 const char *pointer_desc, int off, int size, bool strict)
3130 {
3131 struct tnum reg_off;
3132
3133 /* Byte size accesses are always allowed. */
3134 if (!strict || size == 1) {
3135 return 0;
3136 }
3137
3138 reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3139 if (!tnum_is_aligned(reg_off, size)) {
3140 char tn_buf[48];
3141
3142 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3143 verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", pointer_desc, tn_buf, reg->off, off, size);
3144 return -EACCES;
3145 }
3146
3147 return 0;
3148 }
3149
check_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict_alignment_once)3150 static int check_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size,
3151 bool strict_alignment_once)
3152 {
3153 bool strict = env->strict_alignment || strict_alignment_once;
3154 const char *pointer_desc = "";
3155
3156 switch (reg->type) {
3157 case PTR_TO_PACKET:
3158 case PTR_TO_PACKET_META:
3159 /* Special case, because of NET_IP_ALIGN. Given metadata sits
3160 * right in front, treat it the very same way.
3161 */
3162 return check_pkt_ptr_alignment(env, reg, off, size, strict);
3163 case PTR_TO_FLOW_KEYS:
3164 pointer_desc = "flow keys ";
3165 break;
3166 case PTR_TO_MAP_VALUE:
3167 pointer_desc = "value ";
3168 break;
3169 case PTR_TO_CTX:
3170 pointer_desc = "context ";
3171 break;
3172 case PTR_TO_STACK:
3173 pointer_desc = "stack ";
3174 /* The stack spill tracking logic in check_stack_write_fixed_off()
3175 * and check_stack_read_fixed_off() relies on stack accesses being
3176 * aligned.
3177 */
3178 strict = true;
3179 break;
3180 case PTR_TO_SOCKET:
3181 pointer_desc = "sock ";
3182 break;
3183 case PTR_TO_SOCK_COMMON:
3184 pointer_desc = "sock_common ";
3185 break;
3186 case PTR_TO_TCP_SOCK:
3187 pointer_desc = "tcp_sock ";
3188 break;
3189 case PTR_TO_XDP_SOCK:
3190 pointer_desc = "xdp_sock ";
3191 break;
3192 default:
3193 break;
3194 }
3195 return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, strict);
3196 }
3197
update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off)3198 static int update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off)
3199 {
3200 u16 stack = env->subprog_info[func->subprogno].stack_depth;
3201
3202 if (stack >= -off) {
3203 return 0;
3204 }
3205
3206 /* update known max for given subprogram */
3207 env->subprog_info[func->subprogno].stack_depth = -off;
3208 return 0;
3209 }
3210
3211 /* starting from main bpf function walk all instructions of the function
3212 * and recursively walk all callees that given function can call.
3213 * Ignore jump and exit insns.
3214 * Since recursion is prevented by check_cfg() this algorithm
3215 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3216 */
check_max_stack_depth(struct bpf_verifier_env *env)3217 static int check_max_stack_depth(struct bpf_verifier_env *env)
3218 {
3219 int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3220 struct bpf_subprog_info *subprog = env->subprog_info;
3221 struct bpf_insn *insn = env->prog->insnsi;
3222 bool tail_call_reachable = false;
3223 int ret_insn[MAX_CALL_FRAMES];
3224 int ret_prog[MAX_CALL_FRAMES];
3225 int j;
3226 int process_flag = 0;
3227 int continue_flag = 0;
3228
3229 while (1) {
3230 if (process_flag == 0 && continue_flag == 0) {
3231 /* protect against potential stack overflow that might happen when
3232 * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3233 * depth for such case down to 256 so that the worst case scenario
3234 * would result in 8k stack size (32 which is tailcall limit * 256 =
3235 * 8k).
3236 *
3237 * To get the idea what might happen, see an example:
3238 * func1 -> sub rsp, 128
3239 * subfunc1 -> sub rsp, 256
3240 * tailcall1 -> add rsp, 256
3241 * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3242 * subfunc2 -> sub rsp, 64
3243 * subfunc22 -> sub rsp, 128
3244 * tailcall2 -> add rsp, 128
3245 * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3246 *
3247 * tailcall will unwind the current stack frame but it will not get rid
3248 * of caller's stack as shown on the example above.
3249 */
3250 if (idx && subprog[idx].has_tail_call && depth >= VERIFIER_TWOHUNDREDFIFTYSIX) {
3251 verbose(env, "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3252 depth);
3253 return -EACCES;
3254 }
3255 /* round up to 32-bytes, since this is granularity
3256 * of interpreter stack size
3257 */
3258 depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), VERIFIER_THIRTYTWO);
3259 if (depth > MAX_BPF_STACK) {
3260 verbose(env, "combined stack size of %d calls is %d. Too large\n", frame + 1, depth);
3261 return -EACCES;
3262 }
3263 }
3264 while (1) {
3265 continue_flag = 0;
3266 subprog_end = subprog[idx + 1].start;
3267 for (; i < subprog_end; i++) {
3268 if (insn[i].code != (BPF_JMP | BPF_CALL)) {
3269 continue;
3270 }
3271 if (insn[i].src_reg != BPF_PSEUDO_CALL) {
3272 continue;
3273 }
3274 /* remember insn and function to return to */
3275 ret_insn[frame] = i + 1;
3276 ret_prog[frame] = idx;
3277
3278 /* find the callee */
3279 i = i + insn[i].imm + 1;
3280 idx = find_subprog(env, i);
3281 if (idx < 0) {
3282 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", i);
3283 return -EFAULT;
3284 }
3285
3286 if (subprog[idx].has_tail_call) {
3287 tail_call_reachable = true;
3288 }
3289
3290 frame++;
3291 if (frame >= MAX_CALL_FRAMES) {
3292 verbose(env, "the call stack of %d frames is too deep !\n", frame);
3293 return -E2BIG;
3294 }
3295 process_flag = 1;
3296 break;
3297 }
3298 if (process_flag == 1) {
3299 break;
3300 }
3301 }
3302 if (process_flag == 1) {
3303 process_flag = 0;
3304 continue;
3305 }
3306 /* if tail call got detected across bpf2bpf calls then mark each of the
3307 * currently present subprog frames as tail call reachable subprogs;
3308 * this info will be utilized by JIT so that we will be preserving the
3309 * tail call counter throughout bpf2bpf calls combined with tailcalls
3310 */
3311 if (tail_call_reachable) {
3312 for (j = 0; j < frame; j++) {
3313 subprog[ret_prog[j]].tail_call_reachable = true;
3314 }
3315 }
3316 if (subprog[0].tail_call_reachable) {
3317 env->prog->aux->tail_call_reachable = true;
3318 }
3319
3320 /* end of for() loop means the last insn of the 'subprog'
3321 * was reached. Doesn't matter whether it was JA or EXIT
3322 */
3323 if (frame == 0) {
3324 return 0;
3325 }
3326 depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), VERIFIER_THIRTYTWO);
3327 frame--;
3328 i = ret_insn[frame];
3329 idx = ret_prog[frame];
3330 continue_flag = 1;
3331 continue;
3332 }
3333 }
3334
3335 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env *env, const struct bpf_insn *insn, int idx)3336 static int get_callee_stack_depth(struct bpf_verifier_env *env, const struct bpf_insn *insn, int idx)
3337 {
3338 int start = idx + insn->imm + 1, subprog;
3339
3340 subprog = find_subprog(env, start);
3341 if (subprog < 0) {
3342 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", start);
3343 return -EFAULT;
3344 }
3345 return env->subprog_info[subprog].stack_depth;
3346 }
3347 #endif
3348
__check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, bool fixed_off_ok)3349 static int __check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno,
3350 bool fixed_off_ok)
3351 {
3352 /* Access to this pointer-typed register or passing it to a helper
3353 * is only allowed in its original, unmodified form.
3354 */
3355
3356 if (!fixed_off_ok && reg->off) {
3357 verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", reg_type_str(env, reg->type), regno,
3358 reg->off);
3359 return -EACCES;
3360 }
3361
3362 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3363 char tn_buf[48];
3364
3365 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3366 verbose(env, "variable %s access var_off=%s disallowed\n", reg_type_str(env, reg->type), tn_buf);
3367 return -EACCES;
3368 }
3369
3370 return 0;
3371 }
3372
check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno)3373 int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno)
3374 {
3375 return __check_ptr_off_reg(env, reg, regno, false);
3376 }
3377
__check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, int regno, int off, int size)3378 static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg,
3379 int regno, int off, int size)
3380 {
3381 if (off < 0) {
3382 verbose(env, "R%d invalid %s buffer access: off=%d, size=%d\n", regno, buf_info, off, size);
3383 return -EACCES;
3384 }
3385 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3386 char tn_buf[48];
3387
3388 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3389 verbose(env, "R%d invalid variable buffer offset: off=%d, var_off=%s\n", regno, off, tn_buf);
3390 return -EACCES;
3391 }
3392
3393 return 0;
3394 }
3395
check_tp_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off, int size)3396 static int check_tp_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off,
3397 int size)
3398 {
3399 int err;
3400
3401 err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3402 if (err) {
3403 return err;
3404 }
3405
3406 if (off + size > env->prog->aux->max_tp_access) {
3407 env->prog->aux->max_tp_access = off + size;
3408 }
3409
3410 return 0;
3411 }
3412
check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off, int size, bool zero_size_allowed, const char *buf_info, u32 *max_access)3413 static int check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off,
3414 int size, bool zero_size_allowed, const char *buf_info, u32 *max_access)
3415 {
3416 int err;
3417
3418 err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3419 if (err) {
3420 return err;
3421 }
3422
3423 if (off + size > *max_access) {
3424 *max_access = off + size;
3425 }
3426
3427 return 0;
3428 }
3429
3430 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state *reg)3431 static void zext_32_to_64(struct bpf_reg_state *reg)
3432 {
3433 reg->var_off = tnum_subreg(reg->var_off);
3434 verifier_reg_assign_32_into_64(reg);
3435 }
3436
3437 /* truncate register to smaller size (in bytes)
3438 * must be called with size < BPF_REG_SIZE
3439 */
coerce_reg_to_size(struct bpf_reg_state *reg, int size)3440 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3441 {
3442 u64 mask;
3443
3444 /* clear high bits in bit representation */
3445 reg->var_off = tnum_cast(reg->var_off, size);
3446
3447 /* fix arithmetic bounds */
3448 mask = ((u64)1 << (size * VERIFIER_EIGHT)) - 1;
3449 if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3450 reg->umin_value &= mask;
3451 reg->umax_value &= mask;
3452 } else {
3453 reg->umin_value = 0;
3454 reg->umax_value = mask;
3455 }
3456 reg->smin_value = reg->umin_value;
3457 reg->smax_value = reg->umax_value;
3458
3459 /* If size is smaller than 32bit register the 32bit register
3460 * values are also truncated so we push 64-bit bounds into
3461 * 32-bit bounds. Above were truncated < 32-bits already.
3462 */
3463 if (size >= VERIFIER_FOUR) {
3464 return;
3465 }
3466 __reg_combine_64_into_32(reg);
3467 }
3468
bpf_map_is_rdonly(const struct bpf_map *map)3469 static bool bpf_map_is_rdonly(const struct bpf_map *map)
3470 {
3471 return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
3472 }
3473
bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)3474 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3475 {
3476 void *ptr;
3477 u64 addr;
3478 int err;
3479
3480 err = map->ops->map_direct_value_addr(map, &addr, off);
3481 if (err) {
3482 return err;
3483 }
3484 ptr = (void *)(long)addr + off;
3485
3486 switch (size) {
3487 case sizeof(u8):
3488 *val = (u64) * (u8 *)ptr;
3489 break;
3490 case sizeof(u16):
3491 *val = (u64) * (u16 *)ptr;
3492 break;
3493 case sizeof(u32):
3494 *val = (u64) * (u32 *)ptr;
3495 break;
3496 case sizeof(u64):
3497 *val = *(u64 *)ptr;
3498 break;
3499 default:
3500 return -EINVAL;
3501 }
3502 return 0;
3503 }
3504
check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, enum bpf_access_type atype, int value_regno)3505 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off,
3506 int size, enum bpf_access_type atype, int value_regno)
3507 {
3508 struct bpf_reg_state *reg = regs + regno;
3509 const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3510 const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3511 u32 btf_id;
3512 int ret;
3513
3514 if (off < 0) {
3515 verbose(env, "R%d is ptr_%s invalid negative access: off=%d\n", regno, tname, off);
3516 return -EACCES;
3517 }
3518 if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3519 char tn_buf[48];
3520
3521 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3522 verbose(env, "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", regno, tname, off, tn_buf);
3523 return -EACCES;
3524 }
3525
3526 if (env->ops->btf_struct_access) {
3527 ret = env->ops->btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3528 } else {
3529 if (atype != BPF_READ) {
3530 verbose(env, "only read is supported\n");
3531 return -EACCES;
3532 }
3533
3534 ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3535 }
3536
3537 if (ret < 0) {
3538 return ret;
3539 }
3540
3541 if (atype == BPF_READ && value_regno >= 0) {
3542 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3543 }
3544
3545 return 0;
3546 }
3547
check_ptr_to_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, enum bpf_access_type atype, int value_regno)3548 static int check_ptr_to_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off,
3549 int size, enum bpf_access_type atype, int value_regno)
3550 {
3551 struct bpf_reg_state *reg = regs + regno;
3552 struct bpf_map *map = reg->map_ptr;
3553 const struct btf_type *t;
3554 const char *tname;
3555 u32 btf_id;
3556 int ret;
3557
3558 if (!btf_vmlinux) {
3559 verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3560 return -ENOTSUPP;
3561 }
3562
3563 if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3564 verbose(env, "map_ptr access not supported for map type %d\n", map->map_type);
3565 return -ENOTSUPP;
3566 }
3567
3568 t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3569 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3570
3571 if (!env->allow_ptr_to_map_access) {
3572 verbose(env, "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", tname);
3573 return -EPERM;
3574 }
3575
3576 if (off < 0) {
3577 verbose(env, "R%d is %s invalid negative access: off=%d\n", regno, tname, off);
3578 return -EACCES;
3579 }
3580
3581 if (atype != BPF_READ) {
3582 verbose(env, "only read from %s is supported\n", tname);
3583 return -EACCES;
3584 }
3585
3586 ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3587 if (ret < 0) {
3588 return ret;
3589 }
3590
3591 if (value_regno >= 0) {
3592 mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3593 }
3594
3595 return 0;
3596 }
3597
3598 /* Check that the stack access at the given offset is within bounds. The
3599 * maximum valid offset is -1.
3600 *
3601 * The minimum valid offset is -MAX_BPF_STACK for writes, and
3602 * -state->allocated_stack for reads.
3603 */
check_stack_slot_within_bounds(int off, struct bpf_func_state *state, enum bpf_access_type t)3604 static int check_stack_slot_within_bounds(int off, struct bpf_func_state *state, enum bpf_access_type t)
3605 {
3606 int min_valid_off;
3607
3608 if (t == BPF_WRITE) {
3609 min_valid_off = -MAX_BPF_STACK;
3610 } else {
3611 min_valid_off = -state->allocated_stack;
3612 }
3613
3614 if (off < min_valid_off || off > -1) {
3615 return -EACCES;
3616 }
3617 return 0;
3618 }
3619
3620 /* Check that the stack access at 'regno + off' falls within the maximum stack
3621 * bounds.
3622 *
3623 * 'off' includes `regno->offset`, but not its dynamic part (if any).
3624 */
check_stack_access_within_bounds(struct bpf_verifier_env *env, int regno, int off, int access_size, enum stack_access_src src, enum bpf_access_type type)3625 static int check_stack_access_within_bounds(struct bpf_verifier_env *env, int regno, int off, int access_size,
3626 enum stack_access_src src, enum bpf_access_type type)
3627 {
3628 struct bpf_reg_state *regs = cur_regs(env);
3629 struct bpf_reg_state *reg = regs + regno;
3630 struct bpf_func_state *state = func(env, reg);
3631 int min_off, max_off;
3632 int err;
3633 char *err_extra;
3634
3635 if (src == ACCESS_HELPER) {
3636 /* We don't know if helpers are reading or writing (or both). */
3637 err_extra = " indirect access to";
3638 } else if (type == BPF_READ) {
3639 err_extra = " read from";
3640 } else {
3641 err_extra = " write to";
3642 }
3643
3644 if (tnum_is_const(reg->var_off)) {
3645 min_off = reg->var_off.value + off;
3646 if (access_size > 0) {
3647 max_off = min_off + access_size - 1;
3648 } else {
3649 max_off = min_off;
3650 }
3651 } else {
3652 if (reg->smax_value >= BPF_MAX_VAR_OFF || reg->smin_value <= -BPF_MAX_VAR_OFF) {
3653 verbose(env, "invalid unbounded variable-offset%s stack R%d\n", err_extra, regno);
3654 return -EACCES;
3655 }
3656 min_off = reg->smin_value + off;
3657 if (access_size > 0) {
3658 max_off = reg->smax_value + off + access_size - 1;
3659 } else {
3660 max_off = min_off;
3661 }
3662 }
3663
3664 err = check_stack_slot_within_bounds(min_off, state, type);
3665 if (!err) {
3666 err = check_stack_slot_within_bounds(max_off, state, type);
3667 }
3668
3669 if (err) {
3670 if (tnum_is_const(reg->var_off)) {
3671 verbose(env, "invalid%s stack R%d off=%d size=%d\n", err_extra, regno, off, access_size);
3672 } else {
3673 char tn_buf[48];
3674
3675 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3676 verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", err_extra, regno, tn_buf,
3677 access_size);
3678 }
3679 }
3680 return err;
3681 }
3682
3683 /* check whether memory at (regno + off) is accessible for t = (read | write)
3684 * if t==write, value_regno is a register which value is stored into memory
3685 * if t==read, value_regno is a register which will receive the value from memory
3686 * if t==write && value_regno==-1, some unknown value is stored into memory
3687 * if t==read && value_regno==-1, don't care what we read from memory
3688 */
check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno, bool strict_alignment_once)3689 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size,
3690 enum bpf_access_type t, int value_regno, bool strict_alignment_once)
3691 {
3692 struct bpf_reg_state *regs = cur_regs(env);
3693 struct bpf_reg_state *reg = regs + regno;
3694 struct bpf_func_state *state;
3695 int size, err = 0;
3696
3697 size = bpf_size_to_bytes(bpf_size);
3698 if (size < 0) {
3699 return size;
3700 }
3701
3702 /* alignment checks will add in reg->off themselves */
3703 err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
3704 if (err) {
3705 return err;
3706 }
3707
3708 /* for access checks, reg->off is just part of off */
3709 off += reg->off;
3710
3711 if (reg->type == PTR_TO_MAP_VALUE) {
3712 if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3713 verbose(env, "R%d leaks addr into map\n", value_regno);
3714 return -EACCES;
3715 }
3716 err = check_map_access_type(env, regno, off, size, t);
3717 if (err) {
3718 return err;
3719 }
3720 err = check_map_access(env, regno, off, size, false);
3721 if (!err && t == BPF_READ && value_regno >= 0) {
3722 struct bpf_map *map = reg->map_ptr;
3723
3724 /* if map is read-only, track its contents as scalars */
3725 if (tnum_is_const(reg->var_off) && bpf_map_is_rdonly(map) && map->ops->map_direct_value_addr) {
3726 int map_off = off + reg->var_off.value;
3727 u64 val = 0;
3728
3729 err = bpf_map_direct_read(map, map_off, size, &val);
3730 if (err) {
3731 return err;
3732 }
3733
3734 regs[value_regno].type = SCALAR_VALUE;
3735 verifier_mark_reg_known(®s[value_regno], val);
3736 } else {
3737 mark_reg_unknown(env, regs, value_regno);
3738 }
3739 }
3740 } else if (base_type(reg->type) == PTR_TO_MEM) {
3741 bool rdonly_mem = type_is_rdonly_mem(reg->type);
3742
3743 if (type_may_be_null(reg->type)) {
3744 verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type));
3745 return -EACCES;
3746 }
3747
3748 if (t == BPF_WRITE && rdonly_mem) {
3749 verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3750 return -EACCES;
3751 }
3752
3753 if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3754 verbose(env, "R%d leaks addr into mem\n", value_regno);
3755 return -EACCES;
3756 }
3757
3758 err = check_mem_region_access(env, regno, off, size, reg->mem_size, false);
3759 if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) {
3760 mark_reg_unknown(env, regs, value_regno);
3761 }
3762 } else if (reg->type == PTR_TO_CTX) {
3763 enum bpf_reg_type reg_type = SCALAR_VALUE;
3764 u32 btf_id = 0;
3765
3766 if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3767 verbose(env, "R%d leaks addr into ctx\n", value_regno);
3768 return -EACCES;
3769 }
3770
3771 err = check_ptr_off_reg(env, reg, regno);
3772 if (err < 0) {
3773 return err;
3774 }
3775
3776 err = check_ctx_access(env, insn_idx, off, size, t, ®_type, &btf_id);
3777 if (err) {
3778 verbose_linfo(env, insn_idx, "; ");
3779 }
3780 if (!err && t == BPF_READ && value_regno >= 0) {
3781 /* ctx access returns either a scalar, or a
3782 * PTR_TO_PACKET[_META,_END]. In the latter
3783 * case, we know the offset is zero.
3784 */
3785 if (reg_type == SCALAR_VALUE) {
3786 mark_reg_unknown(env, regs, value_regno);
3787 } else {
3788 mark_reg_known_zero(env, regs, value_regno);
3789 if (type_may_be_null(reg_type)) {
3790 regs[value_regno].id = ++env->id_gen;
3791 }
3792 /* A load of ctx field could have different
3793 * actual load size with the one encoded in the
3794 * insn. When the dst is PTR, it is for sure not
3795 * a sub-register.
3796 */
3797 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3798 if (base_type(reg_type) == PTR_TO_BTF_ID) {
3799 regs[value_regno].btf_id = btf_id;
3800 }
3801 }
3802 regs[value_regno].type = reg_type;
3803 }
3804 } else if (reg->type == PTR_TO_STACK) {
3805 /* Basic bounds checks. */
3806 err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
3807 if (err) {
3808 return err;
3809 }
3810 state = func(env, reg);
3811 err = update_stack_depth(env, state, off);
3812 if (err) {
3813 return err;
3814 }
3815
3816 if (t == BPF_READ) {
3817 err = check_stack_read(env, regno, off, size, value_regno);
3818 } else {
3819 err = check_stack_write(env, regno, off, size, value_regno, insn_idx);
3820 }
3821 } else if (reg_is_pkt_pointer(reg)) {
3822 if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3823 verbose(env, "cannot write into packet\n");
3824 return -EACCES;
3825 }
3826 if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3827 verbose(env, "R%d leaks addr into packet\n", value_regno);
3828 return -EACCES;
3829 }
3830 err = check_packet_access(env, regno, off, size, false);
3831 if (!err && t == BPF_READ && value_regno >= 0) {
3832 mark_reg_unknown(env, regs, value_regno);
3833 }
3834 } else if (reg->type == PTR_TO_FLOW_KEYS) {
3835 if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3836 verbose(env, "R%d leaks addr into flow keys\n", value_regno);
3837 return -EACCES;
3838 }
3839
3840 err = check_flow_keys_access(env, off, size);
3841 if (!err && t == BPF_READ && value_regno >= 0) {
3842 mark_reg_unknown(env, regs, value_regno);
3843 }
3844 } else if (type_is_sk_pointer(reg->type)) {
3845 if (t == BPF_WRITE) {
3846 verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3847 return -EACCES;
3848 }
3849 err = check_sock_access(env, insn_idx, regno, off, size, t);
3850 if (!err && value_regno >= 0) {
3851 mark_reg_unknown(env, regs, value_regno);
3852 }
3853 } else if (reg->type == PTR_TO_TP_BUFFER) {
3854 err = check_tp_buffer_access(env, reg, regno, off, size);
3855 if (!err && t == BPF_READ && value_regno >= 0) {
3856 mark_reg_unknown(env, regs, value_regno);
3857 }
3858 } else if (reg->type == PTR_TO_BTF_ID) {
3859 err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno);
3860 } else if (reg->type == CONST_PTR_TO_MAP) {
3861 err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno);
3862 } else if (base_type(reg->type) == PTR_TO_BUF) {
3863 bool rdonly_mem = type_is_rdonly_mem(reg->type);
3864 const char *buf_info;
3865 u32 *max_access;
3866
3867 if (rdonly_mem) {
3868 if (t == BPF_WRITE) {
3869 verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3870 return -EACCES;
3871 }
3872 buf_info = "rdonly";
3873 max_access = &env->prog->aux->max_rdonly_access;
3874 } else {
3875 buf_info = "rdwr";
3876 max_access = &env->prog->aux->max_rdwr_access;
3877 }
3878
3879 err = check_buffer_access(env, reg, regno, off, size, false, buf_info, max_access);
3880 if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) {
3881 mark_reg_unknown(env, regs, value_regno);
3882 }
3883 } else {
3884 verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type));
3885 return -EACCES;
3886 }
3887
3888 if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && regs[value_regno].type == SCALAR_VALUE) {
3889 /* b/h/w load zero-extends, mark upper bits as known 0 */
3890 coerce_reg_to_size(®s[value_regno], size);
3891 }
3892 return err;
3893 }
3894
check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)3895 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3896 {
3897 int err;
3898
3899 if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || insn->imm != 0) {
3900 verbose(env, "BPF_XADD uses reserved fields\n");
3901 return -EINVAL;
3902 }
3903
3904 /* check src1 operand */
3905 err = check_reg_arg(env, insn->src_reg, SRC_OP);
3906 if (err) {
3907 return err;
3908 }
3909
3910 /* check src2 operand */
3911 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3912 if (err) {
3913 return err;
3914 }
3915
3916 if (is_pointer_value(env, insn->src_reg)) {
3917 verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3918 return -EACCES;
3919 }
3920
3921 if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) ||
3922 is_sk_reg(env, insn->dst_reg)) {
3923 verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", insn->dst_reg,
3924 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
3925 return -EACCES;
3926 }
3927
3928 /* check whether atomic_add can read the memory */
3929 err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1, true);
3930 if (err) {
3931 return err;
3932 }
3933
3934 /* check whether atomic_add can write into the same memory */
3935 return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3936 }
3937
3938 /* When register 'regno' is used to read the stack (either directly or through
3939 * a helper function) make sure that it's within stack boundary and, depending
3940 * on the access type, that all elements of the stack are initialized.
3941 *
3942 * 'off' includes 'regno->off', but not its dynamic part (if any).
3943 *
3944 * All registers that have been spilled on the stack in the slots within the
3945 * read offsets are marked as read.
3946 */
check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, enum stack_access_src type, struct bpf_call_arg_meta *meta)3947 static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size,
3948 bool zero_size_allowed, enum stack_access_src type,
3949 struct bpf_call_arg_meta *meta)
3950 {
3951 struct bpf_reg_state *reg = reg_state(env, regno);
3952 struct bpf_func_state *state = func(env, reg);
3953 int err, min_off, max_off, i, j, slot, spi;
3954 char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
3955 enum bpf_access_type bounds_check_type;
3956 /* Some accesses can write anything into the stack, others are
3957 * read-only.
3958 */
3959 bool clobber = false;
3960
3961 if (access_size == 0 && !zero_size_allowed) {
3962 verbose(env, "invalid zero-sized read\n");
3963 return -EACCES;
3964 }
3965
3966 if (type == ACCESS_HELPER) {
3967 /* The bounds checks for writes are more permissive than for
3968 * reads. However, if raw_mode is not set, we'll do extra
3969 * checks below.
3970 */
3971 bounds_check_type = BPF_WRITE;
3972 clobber = true;
3973 } else {
3974 bounds_check_type = BPF_READ;
3975 }
3976 err = check_stack_access_within_bounds(env, regno, off, access_size, type, bounds_check_type);
3977 if (err) {
3978 return err;
3979 }
3980
3981 if (tnum_is_const(reg->var_off)) {
3982 min_off = max_off = reg->var_off.value + off;
3983 } else {
3984 /* Variable offset is prohibited for unprivileged mode for
3985 * simplicity since it requires corresponding support in
3986 * Spectre masking for stack ALU.
3987 * See also retrieve_ptr_limit().
3988 */
3989 if (!env->bypass_spec_v1) {
3990 char tn_buf[48];
3991
3992 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3993 verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", regno, err_extra,
3994 tn_buf);
3995 return -EACCES;
3996 }
3997 /* Only initialized buffer on stack is allowed to be accessed
3998 * with variable offset. With uninitialized buffer it's hard to
3999 * guarantee that whole memory is marked as initialized on
4000 * helper return since specific bounds are unknown what may
4001 * cause uninitialized stack leaking.
4002 */
4003 if (meta && meta->raw_mode) {
4004 meta = NULL;
4005 }
4006
4007 min_off = reg->smin_value + off;
4008 max_off = reg->smax_value + off;
4009 }
4010
4011 if (meta && meta->raw_mode) {
4012 meta->access_size = access_size;
4013 meta->regno = regno;
4014 return 0;
4015 }
4016
4017 for (i = min_off; i < max_off + access_size; i++) {
4018 u8 *stype;
4019
4020 slot = -i - 1;
4021 spi = slot / BPF_REG_SIZE;
4022 if (state->allocated_stack <= slot) {
4023 goto err;
4024 }
4025 stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4026 if (*stype == STACK_MISC) {
4027 goto mark;
4028 }
4029 if (*stype == STACK_ZERO) {
4030 if (clobber) {
4031 /* helper can write anything into the stack */
4032 *stype = STACK_MISC;
4033 }
4034 goto mark;
4035 }
4036
4037 if (state->stack[spi].slot_type[0] == STACK_SPILL && state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) {
4038 goto mark;
4039 }
4040
4041 if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4042 (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || env->allow_ptr_leaks)) {
4043 if (clobber) {
4044 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4045 for (j = 0; j < BPF_REG_SIZE; j++) {
4046 state->stack[spi].slot_type[j] = STACK_MISC;
4047 }
4048 }
4049 goto mark;
4050 }
4051
4052 err:
4053 if (tnum_is_const(reg->var_off)) {
4054 verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", err_extra, regno, min_off, i - min_off,
4055 access_size);
4056 } else {
4057 char tn_buf[48];
4058
4059 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4060 verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", err_extra, regno, tn_buf, i - min_off,
4061 access_size);
4062 }
4063 return -EACCES;
4064 mark:
4065 /* reading any byte out of 8-byte 'spill_slot' will cause
4066 * the whole slot to be marked as 'read'
4067 */
4068 mark_reg_read(env, &state->stack[spi].spilled_ptr, state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
4069 }
4070 return update_stack_depth(env, state, min_off);
4071 }
4072
check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta)4073 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed,
4074 struct bpf_call_arg_meta *meta)
4075 {
4076 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
4077 const char *buf_info;
4078 u32 *max_access;
4079
4080 switch (base_type(reg->type)) {
4081 case PTR_TO_PACKET:
4082 case PTR_TO_PACKET_META:
4083 return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed);
4084 case PTR_TO_MAP_VALUE:
4085 if (check_map_access_type(env, regno, reg->off, access_size,
4086 meta && meta->raw_mode ? BPF_WRITE : BPF_READ)) {
4087 return -EACCES;
4088 }
4089 return check_map_access(env, regno, reg->off, access_size, zero_size_allowed);
4090 case PTR_TO_MEM:
4091 return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed);
4092 case PTR_TO_BUF:
4093 if (type_is_rdonly_mem(reg->type)) {
4094 if (meta && meta->raw_mode) {
4095 return -EACCES;
4096 }
4097
4098 buf_info = "rdonly";
4099 max_access = &env->prog->aux->max_rdonly_access;
4100 } else {
4101 buf_info = "rdwr";
4102 max_access = &env->prog->aux->max_rdwr_access;
4103 }
4104 return check_buffer_access(env, reg, regno, reg->off, access_size, zero_size_allowed, buf_info, max_access);
4105 case PTR_TO_STACK:
4106 return check_stack_range_initialized(env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER,
4107 meta);
4108 default: /* scalar_value or invalid ptr */
4109 /* Allow zero-byte read from NULL, regardless of pointer type */
4110 if (zero_size_allowed && access_size == 0 && register_is_null(reg)) {
4111 return 0;
4112 }
4113
4114 verbose(env, "R%d type=%s ", regno, reg_type_str(env, reg->type));
4115 verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
4116 return -EACCES;
4117 }
4118 }
4119
4120 /* Implementation details:
4121 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4122 * Two bpf_map_lookups (even with the same key) will have different reg->id.
4123 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4124 * value_or_null->value transition, since the verifier only cares about
4125 * the range of access to valid map value pointer and doesn't care about actual
4126 * address of the map element.
4127 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4128 * reg->id > 0 after value_or_null->value transition. By doing so
4129 * two bpf_map_lookups will be considered two different pointers that
4130 * point to different bpf_spin_locks.
4131 * The verifier allows taking only one bpf_spin_lock at a time to avoid
4132 * dead-locks.
4133 * Since only one bpf_spin_lock is allowed the checks are simpler than
4134 * reg_is_refcounted() logic. The verifier needs to remember only
4135 * one spin_lock instead of array of acquired_refs.
4136 * cur_state->active_spin_lock remembers which map value element got locked
4137 * and clears it after bpf_spin_unlock.
4138 */
process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock)4139 static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock)
4140 {
4141 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
4142 struct bpf_verifier_state *cur = env->cur_state;
4143 bool is_const = tnum_is_const(reg->var_off);
4144 struct bpf_map *map = reg->map_ptr;
4145 u64 val = reg->var_off.value;
4146
4147 if (!is_const) {
4148 verbose(env, "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", regno);
4149 return -EINVAL;
4150 }
4151 if (!map->btf) {
4152 verbose(env, "map '%s' has to have BTF in order to use bpf_spin_lock\n", map->name);
4153 return -EINVAL;
4154 }
4155 if (!map_value_has_spin_lock(map)) {
4156 if (map->spin_lock_off == -E2BIG) {
4157 verbose(env, "map '%s' has more than one 'struct bpf_spin_lock'\n", map->name);
4158 } else if (map->spin_lock_off == -ENOENT) {
4159 verbose(env, "map '%s' doesn't have 'struct bpf_spin_lock'\n", map->name);
4160 } else {
4161 verbose(env, "map '%s' is not a struct type or bpf_spin_lock is mangled\n", map->name);
4162 }
4163 return -EINVAL;
4164 }
4165 if (map->spin_lock_off != val + reg->off) {
4166 verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", val + reg->off);
4167 return -EINVAL;
4168 }
4169 if (is_lock) {
4170 if (cur->active_spin_lock) {
4171 verbose(env, "Locking two bpf_spin_locks are not allowed\n");
4172 return -EINVAL;
4173 }
4174 cur->active_spin_lock = reg->id;
4175 } else {
4176 if (!cur->active_spin_lock) {
4177 verbose(env, "bpf_spin_unlock without taking a lock\n");
4178 return -EINVAL;
4179 }
4180 if (cur->active_spin_lock != reg->id) {
4181 verbose(env, "bpf_spin_unlock of different lock\n");
4182 return -EINVAL;
4183 }
4184 cur->active_spin_lock = 0;
4185 }
4186 return 0;
4187 }
4188
arg_type_is_mem_ptr(enum bpf_arg_type type)4189 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4190 {
4191 return base_type(type) == ARG_PTR_TO_MEM || base_type(type) == ARG_PTR_TO_UNINIT_MEM;
4192 }
4193
arg_type_is_mem_size(enum bpf_arg_type type)4194 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4195 {
4196 return type == ARG_CONST_SIZE || type == ARG_CONST_SIZE_OR_ZERO;
4197 }
4198
arg_type_is_alloc_size(enum bpf_arg_type type)4199 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4200 {
4201 return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4202 }
4203
arg_type_is_int_ptr(enum bpf_arg_type type)4204 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4205 {
4206 return type == ARG_PTR_TO_INT || type == ARG_PTR_TO_LONG;
4207 }
4208
int_ptr_type_to_size(enum bpf_arg_type type)4209 static int int_ptr_type_to_size(enum bpf_arg_type type)
4210 {
4211 if (type == ARG_PTR_TO_INT) {
4212 return sizeof(u32);
4213 } else if (type == ARG_PTR_TO_LONG) {
4214 return sizeof(u64);
4215 }
4216
4217 return -EINVAL;
4218 }
4219
resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_arg_type *arg_type)4220 static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta,
4221 enum bpf_arg_type *arg_type)
4222 {
4223 if (!meta->map_ptr) {
4224 /* kernel subsystem misconfigured verifier */
4225 verbose(env, "invalid map_ptr to access map->type\n");
4226 return -EACCES;
4227 }
4228
4229 switch (meta->map_ptr->map_type) {
4230 case BPF_MAP_TYPE_SOCKMAP:
4231 case BPF_MAP_TYPE_SOCKHASH:
4232 if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4233 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4234 } else {
4235 verbose(env, "invalid arg_type for sockmap/sockhash\n");
4236 return -EINVAL;
4237 }
4238 break;
4239
4240 default:
4241 break;
4242 }
4243 return 0;
4244 }
4245
4246 struct bpf_reg_types {
4247 const enum bpf_reg_type types[10];
4248 u32 *btf_id;
4249 };
4250
4251 static const struct bpf_reg_types map_key_value_types = {
4252 .types =
4253 {
4254 PTR_TO_STACK,
4255 PTR_TO_PACKET,
4256 PTR_TO_PACKET_META,
4257 PTR_TO_MAP_VALUE,
4258 },
4259 };
4260
4261 static const struct bpf_reg_types sock_types = {
4262 .types =
4263 {
4264 PTR_TO_SOCK_COMMON,
4265 PTR_TO_SOCKET,
4266 PTR_TO_TCP_SOCK,
4267 PTR_TO_XDP_SOCK,
4268 },
4269 };
4270
4271 #ifdef CONFIG_NET
4272 static const struct bpf_reg_types btf_id_sock_common_types = {
4273 .types =
4274 {
4275 PTR_TO_SOCK_COMMON,
4276 PTR_TO_SOCKET,
4277 PTR_TO_TCP_SOCK,
4278 PTR_TO_XDP_SOCK,
4279 PTR_TO_BTF_ID,
4280 },
4281 .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4282 };
4283 #endif
4284
4285 static const struct bpf_reg_types mem_types = {
4286 .types =
4287 {
4288 PTR_TO_STACK,
4289 PTR_TO_PACKET,
4290 PTR_TO_PACKET_META,
4291 PTR_TO_MAP_VALUE,
4292 PTR_TO_MEM,
4293 PTR_TO_MEM | MEM_ALLOC,
4294 PTR_TO_BUF,
4295 },
4296 };
4297
4298 static const struct bpf_reg_types int_ptr_types = {
4299 .types =
4300 {
4301 PTR_TO_STACK,
4302 PTR_TO_PACKET,
4303 PTR_TO_PACKET_META,
4304 PTR_TO_MAP_VALUE,
4305 },
4306 };
4307
4308 static const struct bpf_reg_types fullsock_types = {.types = {PTR_TO_SOCKET}};
4309 static const struct bpf_reg_types scalar_types = {.types = {SCALAR_VALUE}};
4310 static const struct bpf_reg_types context_types = {.types = {PTR_TO_CTX}};
4311 static const struct bpf_reg_types alloc_mem_types = {.types = {PTR_TO_MEM | MEM_ALLOC}};
4312 static const struct bpf_reg_types const_map_ptr_types = {.types = {CONST_PTR_TO_MAP}};
4313 static const struct bpf_reg_types btf_ptr_types = {.types = {PTR_TO_BTF_ID}};
4314 static const struct bpf_reg_types spin_lock_types = {.types = {PTR_TO_MAP_VALUE}};
4315 static const struct bpf_reg_types percpu_btf_ptr_types = {.types = {PTR_TO_PERCPU_BTF_ID}};
4316
4317 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4318 [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
4319 [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
4320 [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
4321 [ARG_CONST_SIZE] = &scalar_types,
4322 [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
4323 [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
4324 [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
4325 [ARG_PTR_TO_CTX] = &context_types,
4326 [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
4327 #ifdef CONFIG_NET
4328 [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
4329 #endif
4330 [ARG_PTR_TO_SOCKET] = &fullsock_types,
4331 [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
4332 [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
4333 [ARG_PTR_TO_MEM] = &mem_types,
4334 [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
4335 [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
4336 [ARG_PTR_TO_INT] = &int_ptr_types,
4337 [ARG_PTR_TO_LONG] = &int_ptr_types,
4338 [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
4339 };
4340
check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, const u32 *arg_btf_id)4341 static int check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, const u32 *arg_btf_id)
4342 {
4343 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
4344 enum bpf_reg_type expected, type = reg->type;
4345 const struct bpf_reg_types *compatible;
4346 int i, j;
4347
4348 compatible = compatible_reg_types[base_type(arg_type)];
4349 if (!compatible) {
4350 verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4351 return -EFAULT;
4352 }
4353
4354 /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
4355 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
4356 *
4357 * Same for MAYBE_NULL:
4358 *
4359 * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
4360 * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
4361 *
4362 * Therefore we fold these flags depending on the arg_type before comparison.
4363 */
4364 if (arg_type & MEM_RDONLY) {
4365 type &= ~MEM_RDONLY;
4366 }
4367 if (arg_type & PTR_MAYBE_NULL) {
4368 type &= ~PTR_MAYBE_NULL;
4369 }
4370
4371 for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4372 expected = compatible->types[i];
4373 if (expected == NOT_INIT) {
4374 break;
4375 }
4376
4377 if (type == expected) {
4378 goto found;
4379 }
4380 }
4381
4382 verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
4383 for (j = 0; j + 1 < i; j++) {
4384 verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
4385 }
4386 verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
4387 return -EACCES;
4388
4389 found:
4390 if (reg->type == PTR_TO_BTF_ID) {
4391 if (!arg_btf_id) {
4392 if (!compatible->btf_id) {
4393 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4394 return -EFAULT;
4395 }
4396 arg_btf_id = compatible->btf_id;
4397 }
4398
4399 if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *arg_btf_id)) {
4400 verbose(env, "R%d is of type %s but %s is expected\n", regno, kernel_type_name(reg->btf_id),
4401 kernel_type_name(*arg_btf_id));
4402 return -EACCES;
4403 }
4404 }
4405
4406 return 0;
4407 }
4408
check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn)4409 static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta,
4410 const struct bpf_func_proto *fn)
4411 {
4412 u32 regno = BPF_REG_1 + arg;
4413 struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno];
4414 enum bpf_arg_type arg_type = fn->arg_type[arg];
4415 enum bpf_reg_type type = reg->type;
4416 int err = 0;
4417
4418 if (arg_type == ARG_DONTCARE) {
4419 return 0;
4420 }
4421
4422 err = check_reg_arg(env, regno, SRC_OP);
4423 if (err) {
4424 return err;
4425 }
4426
4427 if (arg_type == ARG_ANYTHING) {
4428 if (is_pointer_value(env, regno)) {
4429 verbose(env, "R%d leaks addr into helper function\n", regno);
4430 return -EACCES;
4431 }
4432 return 0;
4433 }
4434
4435 if (type_is_pkt_pointer(type) && !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4436 verbose(env, "helper access to the packet is not allowed\n");
4437 return -EACCES;
4438 }
4439
4440 if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4441 err = resolve_map_arg_type(env, meta, &arg_type);
4442 if (err) {
4443 return err;
4444 }
4445 }
4446
4447 if (register_is_null(reg) && type_may_be_null(arg_type)) {
4448 /* A NULL register has a SCALAR_VALUE type, so skip
4449 * type checking.
4450 */
4451 goto skip_type_check;
4452 }
4453
4454 err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4455 if (err) {
4456 return err;
4457 }
4458
4459 switch ((u32)type) {
4460 case SCALAR_VALUE:
4461 /* Pointer types where reg offset is explicitly allowed: */
4462 case PTR_TO_PACKET:
4463 case PTR_TO_PACKET_META:
4464 case PTR_TO_MAP_VALUE:
4465 case PTR_TO_MEM:
4466 case PTR_TO_MEM | MEM_RDONLY:
4467 case PTR_TO_MEM | MEM_ALLOC:
4468 case PTR_TO_BUF:
4469 case PTR_TO_BUF | MEM_RDONLY:
4470 case PTR_TO_STACK:
4471 /* Some of the argument types nevertheless require a
4472 * zero register offset.
4473 */
4474 if (arg_type == ARG_PTR_TO_ALLOC_MEM) {
4475 goto force_off_check;
4476 }
4477 break;
4478 /* All the rest must be rejected: */
4479 default:
4480 force_off_check:
4481 err = __check_ptr_off_reg(env, reg, regno, type == PTR_TO_BTF_ID);
4482 if (err < 0) {
4483 return err;
4484 }
4485 break;
4486 }
4487
4488 skip_type_check:
4489 if (reg->ref_obj_id) {
4490 if (meta->ref_obj_id) {
4491 verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno,
4492 reg->ref_obj_id, meta->ref_obj_id);
4493 return -EFAULT;
4494 }
4495 meta->ref_obj_id = reg->ref_obj_id;
4496 }
4497
4498 if (arg_type == ARG_CONST_MAP_PTR) {
4499 /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4500 meta->map_ptr = reg->map_ptr;
4501 } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
4502 /* bpf_map_xxx(..., map_ptr, ..., key) call:
4503 * check that [key, key + map->key_size) are within
4504 * stack limits and initialized
4505 */
4506 if (!meta->map_ptr) {
4507 /* in function declaration map_ptr must come before
4508 * map_key, so that it's verified and known before
4509 * we have to check map_key here. Otherwise it means
4510 * that kernel subsystem misconfigured verifier
4511 */
4512 verbose(env, "invalid map_ptr to access map->key\n");
4513 return -EACCES;
4514 }
4515 err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL);
4516 } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4517 if (type_may_be_null(arg_type) && register_is_null(reg)) {
4518 return 0;
4519 }
4520
4521 /* bpf_map_xxx(..., map_ptr, ..., value) call:
4522 * check [value, value + map->value_size) validity
4523 */
4524 if (!meta->map_ptr) {
4525 /* kernel subsystem misconfigured verifier */
4526 verbose(env, "invalid map_ptr to access map->value\n");
4527 return -EACCES;
4528 }
4529 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4530 err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta);
4531 } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4532 if (!reg->btf_id) {
4533 verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4534 return -EACCES;
4535 }
4536 meta->ret_btf_id = reg->btf_id;
4537 } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4538 if (meta->func_id == BPF_FUNC_spin_lock) {
4539 if (process_spin_lock(env, regno, true)) {
4540 return -EACCES;
4541 }
4542 } else if (meta->func_id == BPF_FUNC_spin_unlock) {
4543 if (process_spin_lock(env, regno, false)) {
4544 return -EACCES;
4545 }
4546 } else {
4547 verbose(env, "verifier internal error\n");
4548 return -EFAULT;
4549 }
4550 } else if (arg_type_is_mem_ptr(arg_type)) {
4551 /* The access to this pointer is only checked when we hit the
4552 * next is_mem_size argument below.
4553 */
4554 meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4555 } else if (arg_type_is_mem_size(arg_type)) {
4556 bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4557
4558 /* This is used to refine r0 return value bounds for helpers
4559 * that enforce this value as an upper bound on return values.
4560 * See do_refine_retval_range() for helpers that can refine
4561 * the return value. C type of helper is u32 so we pull register
4562 * bound from umax_value however, if negative verifier errors
4563 * out. Only upper bounds can be learned because retval is an
4564 * int type and negative retvals are allowed.
4565 */
4566 meta->msize_max_value = reg->umax_value;
4567
4568 /* The register is SCALAR_VALUE; the access check
4569 * happens using its boundaries.
4570 */
4571 if (!tnum_is_const(reg->var_off)) {
4572 /* For unprivileged variable accesses, disable raw
4573 * mode so that the program is required to
4574 * initialize all the memory that the helper could
4575 * just partially fill up.
4576 */
4577 meta = NULL;
4578 }
4579
4580 if (reg->smin_value < 0) {
4581 verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", regno);
4582 return -EACCES;
4583 }
4584
4585 if (reg->umin_value == 0) {
4586 err = check_helper_mem_access(env, regno - 1, 0, zero_size_allowed, meta);
4587 if (err) {
4588 return err;
4589 }
4590 }
4591
4592 if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4593 verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", regno);
4594 return -EACCES;
4595 }
4596 err = check_helper_mem_access(env, regno - 1, reg->umax_value, zero_size_allowed, meta);
4597 if (!err) {
4598 err = mark_chain_precision(env, regno);
4599 }
4600 } else if (arg_type_is_alloc_size(arg_type)) {
4601 if (!tnum_is_const(reg->var_off)) {
4602 verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n", regno);
4603 return -EACCES;
4604 }
4605 meta->mem_size = reg->var_off.value;
4606 } else if (arg_type_is_int_ptr(arg_type)) {
4607 int size = int_ptr_type_to_size(arg_type);
4608
4609 err = check_helper_mem_access(env, regno, size, false, meta);
4610 if (err) {
4611 return err;
4612 }
4613 err = check_ptr_alignment(env, reg, 0, size, true);
4614 }
4615
4616 return err;
4617 }
4618
may_update_sockmap(struct bpf_verifier_env *env, int func_id)4619 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4620 {
4621 enum bpf_attach_type eatype = env->prog->expected_attach_type;
4622 enum bpf_prog_type type = resolve_prog_type(env->prog);
4623
4624 if (func_id != BPF_FUNC_map_update_elem) {
4625 return false;
4626 }
4627
4628 /* It's not possible to get access to a locked struct sock in these
4629 * contexts, so updating is safe.
4630 */
4631 switch (type) {
4632 case BPF_PROG_TYPE_TRACING:
4633 if (eatype == BPF_TRACE_ITER) {
4634 return true;
4635 }
4636 break;
4637 case BPF_PROG_TYPE_SOCKET_FILTER:
4638 case BPF_PROG_TYPE_SCHED_CLS:
4639 case BPF_PROG_TYPE_SCHED_ACT:
4640 case BPF_PROG_TYPE_XDP:
4641 case BPF_PROG_TYPE_SK_REUSEPORT:
4642 case BPF_PROG_TYPE_FLOW_DISSECTOR:
4643 case BPF_PROG_TYPE_SK_LOOKUP:
4644 return true;
4645 default:
4646 break;
4647 }
4648
4649 verbose(env, "cannot update sockmap in this context\n");
4650 return false;
4651 }
4652
allow_tail_call_in_subprogs(struct bpf_verifier_env *env)4653 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4654 {
4655 return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
4656 }
4657
check_map_func_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, int func_id)4658 static int check_map_func_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, int func_id)
4659 {
4660 if (!map) {
4661 return 0;
4662 }
4663
4664 /* We need a two way check, first is from map perspective ... */
4665 switch (map->map_type) {
4666 case BPF_MAP_TYPE_PROG_ARRAY:
4667 if (func_id != BPF_FUNC_tail_call) {
4668 goto error;
4669 }
4670 break;
4671 case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4672 if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output &&
4673 func_id != BPF_FUNC_skb_output && func_id != BPF_FUNC_perf_event_read_value &&
4674 func_id != BPF_FUNC_xdp_output) {
4675 goto error;
4676 }
4677 break;
4678 case BPF_MAP_TYPE_RINGBUF:
4679 if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve &&
4680 func_id != BPF_FUNC_ringbuf_query) {
4681 goto error;
4682 }
4683 break;
4684 case BPF_MAP_TYPE_STACK_TRACE:
4685 if (func_id != BPF_FUNC_get_stackid) {
4686 goto error;
4687 }
4688 break;
4689 case BPF_MAP_TYPE_CGROUP_ARRAY:
4690 if (func_id != BPF_FUNC_skb_under_cgroup && func_id != BPF_FUNC_current_task_under_cgroup) {
4691 goto error;
4692 }
4693 break;
4694 case BPF_MAP_TYPE_CGROUP_STORAGE:
4695 case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4696 if (func_id != BPF_FUNC_get_local_storage) {
4697 goto error;
4698 }
4699 break;
4700 case BPF_MAP_TYPE_DEVMAP:
4701 case BPF_MAP_TYPE_DEVMAP_HASH:
4702 if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) {
4703 goto error;
4704 }
4705 break;
4706 /* Restrict bpf side of cpumap and xskmap, open when use-cases
4707 * appear.
4708 */
4709 case BPF_MAP_TYPE_CPUMAP:
4710 if (func_id != BPF_FUNC_redirect_map) {
4711 goto error;
4712 }
4713 break;
4714 case BPF_MAP_TYPE_XSKMAP:
4715 if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) {
4716 goto error;
4717 }
4718 break;
4719 case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4720 case BPF_MAP_TYPE_HASH_OF_MAPS:
4721 if (func_id != BPF_FUNC_map_lookup_elem) {
4722 goto error;
4723 }
4724 break;
4725 case BPF_MAP_TYPE_SOCKMAP:
4726 if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update &&
4727 func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map &&
4728 func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem &&
4729 !may_update_sockmap(env, func_id)) {
4730 goto error;
4731 }
4732 break;
4733 case BPF_MAP_TYPE_SOCKHASH:
4734 if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update &&
4735 func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash &&
4736 func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem &&
4737 !may_update_sockmap(env, func_id)) {
4738 goto error;
4739 }
4740 break;
4741 case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
4742 if (func_id != BPF_FUNC_sk_select_reuseport) {
4743 goto error;
4744 }
4745 break;
4746 case BPF_MAP_TYPE_QUEUE:
4747 case BPF_MAP_TYPE_STACK:
4748 if (func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_map_pop_elem &&
4749 func_id != BPF_FUNC_map_push_elem) {
4750 goto error;
4751 }
4752 break;
4753 case BPF_MAP_TYPE_SK_STORAGE:
4754 if (func_id != BPF_FUNC_sk_storage_get && func_id != BPF_FUNC_sk_storage_delete) {
4755 goto error;
4756 }
4757 break;
4758 case BPF_MAP_TYPE_INODE_STORAGE:
4759 if (func_id != BPF_FUNC_inode_storage_get && func_id != BPF_FUNC_inode_storage_delete) {
4760 goto error;
4761 }
4762 break;
4763 default:
4764 break;
4765 }
4766
4767 /* ... and second from the function itself. */
4768 switch (func_id) {
4769 case BPF_FUNC_tail_call:
4770 if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
4771 goto error;
4772 }
4773 if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4774 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4775 return -EINVAL;
4776 }
4777 break;
4778 case BPF_FUNC_perf_event_read:
4779 case BPF_FUNC_perf_event_output:
4780 case BPF_FUNC_perf_event_read_value:
4781 case BPF_FUNC_skb_output:
4782 case BPF_FUNC_xdp_output:
4783 if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
4784 goto error;
4785 }
4786 break;
4787 case BPF_FUNC_ringbuf_output:
4788 case BPF_FUNC_ringbuf_reserve:
4789 case BPF_FUNC_ringbuf_query:
4790 if (map->map_type != BPF_MAP_TYPE_RINGBUF) {
4791 goto error;
4792 }
4793 break;
4794 case BPF_FUNC_get_stackid:
4795 if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) {
4796 goto error;
4797 }
4798 break;
4799 case BPF_FUNC_current_task_under_cgroup:
4800 case BPF_FUNC_skb_under_cgroup:
4801 if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) {
4802 goto error;
4803 }
4804 break;
4805 case BPF_FUNC_redirect_map:
4806 if (map->map_type != BPF_MAP_TYPE_DEVMAP && map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4807 map->map_type != BPF_MAP_TYPE_CPUMAP && map->map_type != BPF_MAP_TYPE_XSKMAP) {
4808 goto error;
4809 }
4810 break;
4811 case BPF_FUNC_sk_redirect_map:
4812 case BPF_FUNC_msg_redirect_map:
4813 case BPF_FUNC_sock_map_update:
4814 if (map->map_type != BPF_MAP_TYPE_SOCKMAP) {
4815 goto error;
4816 }
4817 break;
4818 case BPF_FUNC_sk_redirect_hash:
4819 case BPF_FUNC_msg_redirect_hash:
4820 case BPF_FUNC_sock_hash_update:
4821 if (map->map_type != BPF_MAP_TYPE_SOCKHASH) {
4822 goto error;
4823 }
4824 break;
4825 case BPF_FUNC_get_local_storage:
4826 if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
4827 goto error;
4828 }
4829 break;
4830 case BPF_FUNC_sk_select_reuseport:
4831 if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4832 map->map_type != BPF_MAP_TYPE_SOCKHASH) {
4833 goto error;
4834 }
4835 break;
4836 case BPF_FUNC_map_peek_elem:
4837 case BPF_FUNC_map_pop_elem:
4838 case BPF_FUNC_map_push_elem:
4839 if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) {
4840 goto error;
4841 }
4842 break;
4843 case BPF_FUNC_sk_storage_get:
4844 case BPF_FUNC_sk_storage_delete:
4845 if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
4846 goto error;
4847 }
4848 break;
4849 case BPF_FUNC_inode_storage_get:
4850 case BPF_FUNC_inode_storage_delete:
4851 if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) {
4852 goto error;
4853 }
4854 break;
4855 default:
4856 break;
4857 }
4858
4859 return 0;
4860 error:
4861 verbose(env, "cannot pass map_type %d into func %s#%d\n", map->map_type, func_id_name(func_id), func_id);
4862 return -EINVAL;
4863 }
4864
check_raw_mode_ok(const struct bpf_func_proto *fn)4865 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4866 {
4867 int count = 0;
4868
4869 if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) {
4870 count++;
4871 }
4872 if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) {
4873 count++;
4874 }
4875 if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) {
4876 count++;
4877 }
4878 if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) {
4879 count++;
4880 }
4881 if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) {
4882 count++;
4883 }
4884
4885 /* We only support one arg being in raw mode at the moment,
4886 * which is sufficient for the helper functions we have
4887 * right now.
4888 */
4889 return count <= 1;
4890 }
4891
check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next)4892 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next)
4893 {
4894 return (arg_type_is_mem_ptr(arg_curr) && !arg_type_is_mem_size(arg_next)) ||
4895 (!arg_type_is_mem_ptr(arg_curr) && arg_type_is_mem_size(arg_next));
4896 }
4897
check_arg_pair_ok(const struct bpf_func_proto *fn)4898 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4899 {
4900 /* bpf_xxx(..., buf, len) call will access 'len'
4901 * bytes from memory 'buf'. Both arg types need
4902 * to be paired, so make sure there's no buggy
4903 * helper function specification.
4904 */
4905 if (arg_type_is_mem_size(fn->arg1_type) || arg_type_is_mem_ptr(fn->arg5_type) ||
4906 check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4907 check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4908 check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4909 check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) {
4910 return false;
4911 }
4912
4913 return true;
4914 }
4915
check_refcount_ok(const struct bpf_func_proto *fn, int func_id)4916 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4917 {
4918 int count = 0;
4919
4920 if (arg_type_may_be_refcounted(fn->arg1_type)) {
4921 count++;
4922 }
4923 if (arg_type_may_be_refcounted(fn->arg2_type)) {
4924 count++;
4925 }
4926 if (arg_type_may_be_refcounted(fn->arg3_type)) {
4927 count++;
4928 }
4929 if (arg_type_may_be_refcounted(fn->arg4_type)) {
4930 count++;
4931 }
4932 if (arg_type_may_be_refcounted(fn->arg5_type)) {
4933 count++;
4934 }
4935
4936 /* A reference acquiring function cannot acquire
4937 * another refcounted ptr.
4938 */
4939 if (may_be_acquire_function(func_id) && count) {
4940 return false;
4941 }
4942
4943 /* We only support one arg being unreferenced at the moment,
4944 * which is sufficient for the helper functions we have right now.
4945 */
4946 return count <= 1;
4947 }
4948
check_btf_id_ok(const struct bpf_func_proto *fn)4949 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4950 {
4951 int i;
4952
4953 for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4954 if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) {
4955 return false;
4956 }
4957
4958 if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) {
4959 return false;
4960 }
4961 }
4962
4963 return true;
4964 }
4965
check_func_proto(const struct bpf_func_proto *fn, int func_id)4966 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4967 {
4968 return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && check_btf_id_ok(fn) && check_refcount_ok(fn, func_id)
4969 ? 0
4970 : -EINVAL;
4971 }
4972
4973 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4974 * are now invalid, so turn them into unknown SCALAR_VALUE.
4975 */
__clear_all_pkt_pointers(struct bpf_verifier_env *env, struct bpf_func_state *state)4976 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, struct bpf_func_state *state)
4977 {
4978 struct bpf_reg_state *regs = state->regs, *reg;
4979 int i;
4980
4981 for (i = 0; i < MAX_BPF_REG; i++) {
4982 if (reg_is_pkt_pointer_any(®s[i])) {
4983 mark_reg_unknown(env, regs, i);
4984 }
4985 }
4986
4987 bpf_for_each_spilled_reg(i, state, reg)
4988 {
4989 if (!reg) {
4990 continue;
4991 }
4992 if (reg_is_pkt_pointer_any(reg)) {
4993 __mark_reg_unknown(env, reg);
4994 }
4995 }
4996 }
4997
clear_all_pkt_pointers(struct bpf_verifier_env *env)4998 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
4999 {
5000 struct bpf_verifier_state *vstate = env->cur_state;
5001 int i;
5002
5003 for (i = 0; i <= vstate->curframe; i++) {
5004 __clear_all_pkt_pointers(env, vstate->frame[i]);
5005 }
5006 }
5007
release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id)5008 static void release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id)
5009 {
5010 struct bpf_reg_state *regs = state->regs, *reg;
5011 int i;
5012
5013 for (i = 0; i < MAX_BPF_REG; i++) {
5014 if (regs[i].ref_obj_id == ref_obj_id) {
5015 mark_reg_unknown(env, regs, i);
5016 }
5017 }
5018
5019 bpf_for_each_spilled_reg(i, state, reg)
5020 {
5021 if (!reg) {
5022 continue;
5023 }
5024 if (reg->ref_obj_id == ref_obj_id) {
5025 __mark_reg_unknown(env, reg);
5026 }
5027 }
5028 }
5029
5030 /* The pointer with the specified id has released its reference to kernel
5031 * resources. Identify all copies of the same pointer and clear the reference.
5032 */
release_reference(struct bpf_verifier_env *env, int ref_obj_id)5033 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
5034 {
5035 struct bpf_verifier_state *vstate = env->cur_state;
5036 int err;
5037 int i;
5038
5039 err = release_reference_state(cur_func(env), ref_obj_id);
5040 if (err) {
5041 return err;
5042 }
5043
5044 for (i = 0; i <= vstate->curframe; i++) {
5045 release_reg_references(env, vstate->frame[i], ref_obj_id);
5046 }
5047
5048 return 0;
5049 }
5050
clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs)5051 static void clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs)
5052 {
5053 int i;
5054
5055 /* after the call registers r0 - r5 were scratched */
5056 for (i = 0; i < CALLER_SAVED_REGS; i++) {
5057 mark_reg_not_init(env, regs, caller_saved[i]);
5058 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5059 }
5060 }
5061
check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)5062 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)
5063 {
5064 struct bpf_verifier_state *state = env->cur_state;
5065 struct bpf_func_info_aux *func_info_aux;
5066 struct bpf_func_state *caller, *callee;
5067 int i, err, subprog, target_insn;
5068 bool is_global = false;
5069
5070 if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5071 verbose(env, "the call stack of %d frames is too deep\n", state->curframe + 2);
5072 return -E2BIG;
5073 }
5074
5075 target_insn = *insn_idx + insn->imm;
5076 subprog = find_subprog(env, target_insn + 1);
5077 if (subprog < 0) {
5078 verbose(env, "verifier bug. No program starts at insn %d\n", target_insn + 1);
5079 return -EFAULT;
5080 }
5081
5082 caller = state->frame[state->curframe];
5083 if (state->frame[state->curframe + 1]) {
5084 verbose(env, "verifier bug. Frame %d already allocated\n", state->curframe + 1);
5085 return -EFAULT;
5086 }
5087
5088 func_info_aux = env->prog->aux->func_info_aux;
5089 if (func_info_aux) {
5090 is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5091 }
5092 err = btf_check_func_arg_match(env, subprog, caller->regs);
5093 if (err == -EFAULT) {
5094 return err;
5095 }
5096 if (is_global) {
5097 if (err) {
5098 verbose(env, "Caller passes invalid args into func#%d\n", subprog);
5099 return err;
5100 } else {
5101 if (env->log.level & BPF_LOG_LEVEL) {
5102 verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
5103 }
5104 clear_caller_saved_regs(env, caller->regs);
5105
5106 /* All global functions return a 64-bit SCALAR_VALUE */
5107 mark_reg_unknown(env, caller->regs, BPF_REG_0);
5108 caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5109
5110 /* continue with next insn after call */
5111 return 0;
5112 }
5113 }
5114
5115 callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5116 if (!callee) {
5117 return -ENOMEM;
5118 }
5119 state->frame[state->curframe + 1] = callee;
5120
5121 /* callee cannot access r0, r6 - r9 for reading and has to write
5122 * into its own stack before reading from it.
5123 * callee can read/write into caller's stack
5124 */
5125 init_func_state(env, callee,
5126 /* remember the callsite, it will be used by bpf_exit */
5127 *insn_idx /* callsite */, state->curframe + 1 /* frameno within this callchain */,
5128 subprog /* subprog number within this prog */);
5129
5130 /* Transfer references to the callee */
5131 err = transfer_reference_state(callee, caller);
5132 if (err) {
5133 return err;
5134 }
5135
5136 /* copy r1 - r5 args that callee can access. The copy includes parent
5137 * pointers, which connects us up to the liveness chain
5138 */
5139 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
5140 callee->regs[i] = caller->regs[i];
5141 }
5142
5143 clear_caller_saved_regs(env, caller->regs);
5144
5145 /* only increment it after check_reg_arg() finished */
5146 state->curframe++;
5147
5148 /* and go analyze first insn of the callee */
5149 *insn_idx = target_insn;
5150
5151 if (env->log.level & BPF_LOG_LEVEL) {
5152 verbose(env, "caller:\n");
5153 print_verifier_state(env, caller);
5154 verbose(env, "callee:\n");
5155 print_verifier_state(env, callee);
5156 }
5157 return 0;
5158 }
5159
prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)5160 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
5161 {
5162 struct bpf_verifier_state *state = env->cur_state;
5163 struct bpf_func_state *caller, *callee;
5164 struct bpf_reg_state *r0;
5165 int err;
5166
5167 callee = state->frame[state->curframe];
5168 r0 = &callee->regs[BPF_REG_0];
5169 if (r0->type == PTR_TO_STACK) {
5170 /* technically it's ok to return caller's stack pointer
5171 * (or caller's caller's pointer) back to the caller,
5172 * since these pointers are valid. Only current stack
5173 * pointer will be invalid as soon as function exits,
5174 * but let's be conservative
5175 */
5176 verbose(env, "cannot return stack pointer to the caller\n");
5177 return -EINVAL;
5178 }
5179
5180 state->curframe--;
5181 caller = state->frame[state->curframe];
5182 /* return to the caller whatever r0 had in the callee */
5183 caller->regs[BPF_REG_0] = *r0;
5184
5185 /* Transfer references to the caller */
5186 err = transfer_reference_state(caller, callee);
5187 if (err) {
5188 return err;
5189 }
5190
5191 *insn_idx = callee->callsite + 1;
5192 if (env->log.level & BPF_LOG_LEVEL) {
5193 verbose(env, "returning from callee:\n");
5194 print_verifier_state(env, callee);
5195 verbose(env, "to caller at %d:\n", *insn_idx);
5196 print_verifier_state(env, caller);
5197 }
5198 /* clear everything in the callee */
5199 free_func_state(callee);
5200 state->frame[state->curframe + 1] = NULL;
5201 return 0;
5202 }
5203
do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, int func_id, struct bpf_call_arg_meta *meta)5204 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, int func_id,
5205 struct bpf_call_arg_meta *meta)
5206 {
5207 struct bpf_reg_state *ret_reg = ®s[BPF_REG_0];
5208
5209 if (ret_type != RET_INTEGER ||
5210 (func_id != BPF_FUNC_get_stack && func_id != BPF_FUNC_probe_read_str &&
5211 func_id != BPF_FUNC_probe_read_kernel_str && func_id != BPF_FUNC_probe_read_user_str)) {
5212 return;
5213 }
5214
5215 ret_reg->smax_value = meta->msize_max_value;
5216 ret_reg->s32_max_value = meta->msize_max_value;
5217 ret_reg->smin_value = -MAX_ERRNO;
5218 ret_reg->s32_min_value = -MAX_ERRNO;
5219 reg_bounds_sync(ret_reg);
5220 }
5221
record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)5222 static int record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)
5223 {
5224 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5225 struct bpf_map *map = meta->map_ptr;
5226
5227 if (func_id != BPF_FUNC_tail_call && func_id != BPF_FUNC_map_lookup_elem && func_id != BPF_FUNC_map_update_elem &&
5228 func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem &&
5229 func_id != BPF_FUNC_map_peek_elem) {
5230 return 0;
5231 }
5232
5233 if (map == NULL) {
5234 verbose(env, "kernel subsystem misconfigured verifier\n");
5235 return -EINVAL;
5236 }
5237
5238 /* In case of read-only, some additional restrictions
5239 * need to be applied in order to prevent altering the
5240 * state of the map from program side.
5241 */
5242 if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5243 (func_id == BPF_FUNC_map_delete_elem || func_id == BPF_FUNC_map_update_elem ||
5244 func_id == BPF_FUNC_map_push_elem || func_id == BPF_FUNC_map_pop_elem)) {
5245 verbose(env, "write into map forbidden\n");
5246 return -EACCES;
5247 }
5248
5249 if (!BPF_MAP_PTR(aux->map_ptr_state)) {
5250 bpf_map_ptr_store(aux, meta->map_ptr, !meta->map_ptr->bypass_spec_v1);
5251 } else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) {
5252 bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, !meta->map_ptr->bypass_spec_v1);
5253 }
5254 return 0;
5255 }
5256
record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)5257 static int record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)
5258 {
5259 struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5260 struct bpf_reg_state *regs = cur_regs(env), *reg;
5261 struct bpf_map *map = meta->map_ptr;
5262 u64 val;
5263 int err;
5264
5265 if (func_id != BPF_FUNC_tail_call) {
5266 return 0;
5267 }
5268 if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5269 verbose(env, "kernel subsystem misconfigured verifier\n");
5270 return -EINVAL;
5271 }
5272
5273 reg = ®s[BPF_REG_3];
5274 val = reg->var_off.value;
5275 max = map->max_entries;
5276
5277 if (!(register_is_const(reg) && val < max)) {
5278 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5279 return 0;
5280 }
5281
5282 err = mark_chain_precision(env, BPF_REG_3);
5283 if (err) {
5284 return err;
5285 }
5286
5287 if (bpf_map_key_unseen(aux)) {
5288 bpf_map_key_store(aux, val);
5289 } else if (!bpf_map_key_poisoned(aux) && bpf_map_key_immediate(aux) != val) {
5290 bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5291 }
5292 return 0;
5293 }
5294
check_reference_leak(struct bpf_verifier_env *env)5295 static int check_reference_leak(struct bpf_verifier_env *env)
5296 {
5297 struct bpf_func_state *state = cur_func(env);
5298 int i;
5299
5300 for (i = 0; i < state->acquired_refs; i++) {
5301 verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx);
5302 }
5303 return state->acquired_refs ? -EINVAL : 0;
5304 }
5305
check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)5306 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
5307 {
5308 const struct bpf_func_proto *fn = NULL;
5309 enum bpf_return_type ret_type;
5310 enum bpf_type_flag ret_flag;
5311 struct bpf_reg_state *regs;
5312 struct bpf_call_arg_meta meta;
5313 bool changes_data;
5314 int i, err;
5315
5316 /* find function prototype */
5317 if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5318 verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
5319 return -EINVAL;
5320 }
5321
5322 if (env->ops->get_func_proto) {
5323 fn = env->ops->get_func_proto(func_id, env->prog);
5324 }
5325 if (!fn) {
5326 verbose(env, "unknown func %s#%d\n", func_id_name(func_id), func_id);
5327 return -EINVAL;
5328 }
5329
5330 /* eBPF programs must be GPL compatible to use GPL-ed functions */
5331 if (!env->prog->gpl_compatible && fn->gpl_only) {
5332 verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5333 return -EINVAL;
5334 }
5335
5336 if (fn->allowed && !fn->allowed(env->prog)) {
5337 verbose(env, "helper call is not allowed in probe\n");
5338 return -EINVAL;
5339 }
5340
5341 /* With LD_ABS/IND some JITs save/restore skb from r1. */
5342 changes_data = bpf_helper_changes_pkt_data(fn->func);
5343 if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
5344 verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id);
5345 return -EINVAL;
5346 }
5347
5348 memset(&meta, 0, sizeof(meta));
5349 meta.pkt_access = fn->pkt_access;
5350
5351 err = check_func_proto(fn, func_id);
5352 if (err) {
5353 verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id);
5354 return err;
5355 }
5356
5357 meta.func_id = func_id;
5358 /* check args */
5359 for (i = 0; i < 5; i++) {
5360 err = check_func_arg(env, i, &meta, fn);
5361 if (err) {
5362 return err;
5363 }
5364 }
5365
5366 err = record_func_map(env, &meta, func_id, insn_idx);
5367 if (err) {
5368 return err;
5369 }
5370
5371 err = record_func_key(env, &meta, func_id, insn_idx);
5372 if (err) {
5373 return err;
5374 }
5375
5376 /* Mark slots with STACK_MISC in case of raw mode, stack offset
5377 * is inferred from register state.
5378 */
5379 for (i = 0; i < meta.access_size; i++) {
5380 err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false);
5381 if (err) {
5382 return err;
5383 }
5384 }
5385
5386 if (func_id == BPF_FUNC_tail_call) {
5387 err = check_reference_leak(env);
5388 if (err) {
5389 verbose(env, "tail_call would lead to reference leak\n");
5390 return err;
5391 }
5392 } else if (is_release_function(func_id)) {
5393 err = release_reference(env, meta.ref_obj_id);
5394 if (err) {
5395 verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id);
5396 return err;
5397 }
5398 }
5399
5400 regs = cur_regs(env);
5401 /* check that flags argument in get_local_storage(map, flags) is 0,
5402 * this is required because get_local_storage() can't return an error.
5403 */
5404 if (func_id == BPF_FUNC_get_local_storage && !register_is_null(®s[BPF_REG_2])) {
5405 verbose(env, "get_local_storage() doesn't support non-zero flags\n");
5406 return -EINVAL;
5407 }
5408
5409 /* reset caller saved regs */
5410 for (i = 0; i < CALLER_SAVED_REGS; i++) {
5411 mark_reg_not_init(env, regs, caller_saved[i]);
5412 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5413 }
5414
5415 /* helper call returns 64-bit value. */
5416 regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5417
5418 /* update return register (already marked as written above) */
5419 ret_type = fn->ret_type;
5420 ret_flag = type_flag(fn->ret_type);
5421 if (ret_type == RET_INTEGER) {
5422 /* sets type to SCALAR_VALUE */
5423 mark_reg_unknown(env, regs, BPF_REG_0);
5424 } else if (ret_type == RET_VOID) {
5425 regs[BPF_REG_0].type = NOT_INIT;
5426 } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
5427 /* There is no offset yet applied, variable or fixed */
5428 mark_reg_known_zero(env, regs, BPF_REG_0);
5429 /* remember map_ptr, so that check_map_access()
5430 * can check 'value_size' boundary of memory access
5431 * to map element returned from bpf_map_lookup_elem()
5432 */
5433 if (meta.map_ptr == NULL) {
5434 verbose(env, "kernel subsystem misconfigured verifier\n");
5435 return -EINVAL;
5436 }
5437 regs[BPF_REG_0].map_ptr = meta.map_ptr;
5438 regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
5439 if (!type_may_be_null(ret_type) && map_value_has_spin_lock(meta.map_ptr)) {
5440 regs[BPF_REG_0].id = ++env->id_gen;
5441 }
5442 } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
5443 mark_reg_known_zero(env, regs, BPF_REG_0);
5444 regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
5445 } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
5446 mark_reg_known_zero(env, regs, BPF_REG_0);
5447 regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
5448 } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
5449 mark_reg_known_zero(env, regs, BPF_REG_0);
5450 regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
5451 } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
5452 mark_reg_known_zero(env, regs, BPF_REG_0);
5453 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
5454 regs[BPF_REG_0].mem_size = meta.mem_size;
5455 } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
5456 const struct btf_type *t;
5457
5458 mark_reg_known_zero(env, regs, BPF_REG_0);
5459 t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5460 if (!btf_type_is_struct(t)) {
5461 u32 tsize;
5462 const struct btf_type *ret;
5463 const char *tname;
5464
5465 /* resolve the type size of ksym. */
5466 ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5467 if (IS_ERR(ret)) {
5468 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5469 verbose(env, "unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret));
5470 return -EINVAL;
5471 }
5472 regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
5473 regs[BPF_REG_0].mem_size = tsize;
5474 } else {
5475 /* MEM_RDONLY may be carried from ret_flag, but it
5476 * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
5477 * it will confuse the check of PTR_TO_BTF_ID in
5478 * check_mem_access().
5479 */
5480 ret_flag &= ~MEM_RDONLY;
5481
5482 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
5483 regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5484 }
5485 } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
5486 int ret_btf_id;
5487
5488 mark_reg_known_zero(env, regs, BPF_REG_0);
5489 regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
5490 ret_btf_id = *fn->ret_btf_id;
5491 if (ret_btf_id == 0) {
5492 verbose(env, "invalid return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id);
5493 return -EINVAL;
5494 }
5495 regs[BPF_REG_0].btf_id = ret_btf_id;
5496 } else {
5497 verbose(env, "unknown return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id);
5498 return -EINVAL;
5499 }
5500
5501 if (type_may_be_null(regs[BPF_REG_0].type)) {
5502 regs[BPF_REG_0].id = ++env->id_gen;
5503 }
5504
5505 if (is_ptr_cast_function(func_id)) {
5506 /* For release_reference() */
5507 regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5508 } else if (is_acquire_function(func_id, meta.map_ptr)) {
5509 int id = acquire_reference_state(env, insn_idx);
5510 if (id < 0) {
5511 return id;
5512 }
5513 /* For mark_ptr_or_null_reg() */
5514 regs[BPF_REG_0].id = id;
5515 /* For release_reference() */
5516 regs[BPF_REG_0].ref_obj_id = id;
5517 }
5518
5519 do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
5520
5521 err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5522 if (err) {
5523 return err;
5524 }
5525
5526 if ((func_id == BPF_FUNC_get_stack || func_id == BPF_FUNC_get_task_stack) && !env->prog->has_callchain_buf) {
5527 const char *err_str;
5528
5529 #ifdef CONFIG_PERF_EVENTS
5530 err = get_callchain_buffers(sysctl_perf_event_max_stack);
5531 err_str = "cannot get callchain buffer for func %s#%d\n";
5532 #else
5533 err = -ENOTSUPP;
5534 err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
5535 #endif
5536 if (err) {
5537 verbose(env, err_str, func_id_name(func_id), func_id);
5538 return err;
5539 }
5540
5541 env->prog->has_callchain_buf = true;
5542 }
5543
5544 if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) {
5545 env->prog->call_get_stack = true;
5546 }
5547
5548 if (changes_data) {
5549 clear_all_pkt_pointers(env);
5550 }
5551 return 0;
5552 }
5553
signed_add_overflows(s64 a, s64 b)5554 static bool signed_add_overflows(s64 a, s64 b)
5555 {
5556 /* Do the add in u64, where overflow is well-defined */
5557 s64 res = (s64)((u64)a + (u64)b);
5558
5559 if (b < 0) {
5560 return res > a;
5561 }
5562 return res < a;
5563 }
5564
signed_add32_overflows(s32 a, s32 b)5565 static bool signed_add32_overflows(s32 a, s32 b)
5566 {
5567 /* Do the add in u32, where overflow is well-defined */
5568 s32 res = (s32)((u32)a + (u32)b);
5569
5570 if (b < 0) {
5571 return res > a;
5572 }
5573 return res < a;
5574 }
5575
signed_sub_overflows(s64 a, s64 b)5576 static bool signed_sub_overflows(s64 a, s64 b)
5577 {
5578 /* Do the sub in u64, where overflow is well-defined */
5579 s64 res = (s64)((u64)a - (u64)b);
5580
5581 if (b < 0) {
5582 return res < a;
5583 }
5584 return res > a;
5585 }
5586
signed_sub32_overflows(s32 a, s32 b)5587 static bool signed_sub32_overflows(s32 a, s32 b)
5588 {
5589 /* Do the sub in u32, where overflow is well-defined */
5590 s32 res = (s32)((u32)a - (u32)b);
5591
5592 if (b < 0) {
5593 return res < a;
5594 }
5595 return res > a;
5596 }
5597
check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type)5598 static bool check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type)
5599 {
5600 bool known = tnum_is_const(reg->var_off);
5601 s64 val = reg->var_off.value;
5602 s64 smin = reg->smin_value;
5603
5604 if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
5605 verbose(env, "math between %s pointer and %lld is not allowed\n", reg_type_str(env, type), val);
5606 return false;
5607 }
5608
5609 if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
5610 verbose(env, "%s pointer offset %d is not allowed\n", reg_type_str(env, type), reg->off);
5611 return false;
5612 }
5613
5614 if (smin == S64_MIN) {
5615 verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
5616 reg_type_str(env, type));
5617 return false;
5618 }
5619
5620 if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
5621 verbose(env, "value %lld makes %s pointer be out of bounds\n", smin, reg_type_str(env, type));
5622 return false;
5623 }
5624
5625 return true;
5626 }
5627
cur_aux(struct bpf_verifier_env *env)5628 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
5629 {
5630 return &env->insn_aux_data[env->insn_idx];
5631 }
5632
5633 enum {
5634 REASON_BOUNDS = -1,
5635 REASON_TYPE = -2,
5636 REASON_PATHS = -3,
5637 REASON_LIMIT = -4,
5638 REASON_STACK = -5,
5639 };
5640
retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, u32 *alu_limit, bool mask_to_left)5641 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, u32 *alu_limit, bool mask_to_left)
5642 {
5643 u32 max = 0, ptr_limit = 0;
5644
5645 switch (ptr_reg->type) {
5646 case PTR_TO_STACK:
5647 /* Offset 0 is out-of-bounds, but acceptable start for the
5648 * left direction, see BPF_REG_FP. Also, unknown scalar
5649 * offset where we would need to deal with min/max bounds is
5650 * currently prohibited for unprivileged.
5651 */
5652 max = MAX_BPF_STACK + mask_to_left;
5653 ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
5654 break;
5655 case PTR_TO_MAP_VALUE:
5656 max = ptr_reg->map_ptr->value_size;
5657 ptr_limit = (mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value) + ptr_reg->off;
5658 break;
5659 default:
5660 return REASON_TYPE;
5661 }
5662
5663 if (ptr_limit >= max) {
5664 return REASON_LIMIT;
5665 }
5666 *alu_limit = ptr_limit;
5667 return 0;
5668 }
5669
can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn)5670 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn)
5671 {
5672 return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5673 }
5674
update_alu_sanitation_state(struct bpf_insn_aux_data *aux, u32 alu_state, u32 alu_limit)5675 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, u32 alu_state, u32 alu_limit)
5676 {
5677 /* If we arrived here from different branches with different
5678 * state or limits to sanitize, then this won't work.
5679 */
5680 if (aux->alu_state && (aux->alu_state != alu_state || aux->alu_limit != alu_limit)) {
5681 return REASON_PATHS;
5682 }
5683
5684 /* Corresponding fixup done in fixup_bpf_calls(). */
5685 aux->alu_state = alu_state;
5686 aux->alu_limit = alu_limit;
5687 return 0;
5688 }
5689
sanitize_val_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)5690 static int sanitize_val_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
5691 {
5692 struct bpf_insn_aux_data *aux = cur_aux(env);
5693
5694 if (can_skip_alu_sanitation(env, insn)) {
5695 return 0;
5696 }
5697
5698 return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
5699 }
5700
sanitize_needed(u8 opcode)5701 static bool sanitize_needed(u8 opcode)
5702 {
5703 return opcode == BPF_ADD || opcode == BPF_SUB;
5704 }
5705
5706 struct bpf_sanitize_info {
5707 struct bpf_insn_aux_data aux;
5708 bool mask_to_left;
5709 };
5710
sanitize_speculative_path(struct bpf_verifier_env *env, const struct bpf_insn *insn, u32 next_idx, u32 curr_idx)5711 static struct bpf_verifier_state *sanitize_speculative_path(struct bpf_verifier_env *env, const struct bpf_insn *insn,
5712 u32 next_idx, u32 curr_idx)
5713 {
5714 struct bpf_verifier_state *branch;
5715 struct bpf_reg_state *regs;
5716
5717 branch = push_stack(env, next_idx, curr_idx, true);
5718 if (branch && insn) {
5719 regs = branch->frame[branch->curframe]->regs;
5720 if (BPF_SRC(insn->code) == BPF_K) {
5721 mark_reg_unknown(env, regs, insn->dst_reg);
5722 } else if (BPF_SRC(insn->code) == BPF_X) {
5723 mark_reg_unknown(env, regs, insn->dst_reg);
5724 mark_reg_unknown(env, regs, insn->src_reg);
5725 }
5726 }
5727 return branch;
5728 }
5729
sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg, struct bpf_sanitize_info *info, const bool commit_window)5730 static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg,
5731 const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg,
5732 struct bpf_sanitize_info *info, const bool commit_window)
5733 {
5734 struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
5735 struct bpf_verifier_state *vstate = env->cur_state;
5736 bool off_is_imm = tnum_is_const(off_reg->var_off);
5737 bool off_is_neg = off_reg->smin_value < 0;
5738 bool ptr_is_dst_reg = ptr_reg == dst_reg;
5739 u8 opcode = BPF_OP(insn->code);
5740 u32 alu_state, alu_limit;
5741 struct bpf_reg_state tmp;
5742 bool ret;
5743 int err;
5744
5745 if (can_skip_alu_sanitation(env, insn)) {
5746 return 0;
5747 }
5748
5749 /* We already marked aux for masking from non-speculative
5750 * paths, thus we got here in the first place. We only care
5751 * to explore bad access from here.
5752 */
5753 if (vstate->speculative) {
5754 goto do_sim;
5755 }
5756
5757 if (!commit_window) {
5758 if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) {
5759 return REASON_BOUNDS;
5760 }
5761
5762 info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg);
5763 }
5764
5765 err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
5766 if (err < 0) {
5767 return err;
5768 }
5769
5770 if (commit_window) {
5771 /* In commit phase we narrow the masking window based on
5772 * the observed pointer move after the simulated operation.
5773 */
5774 alu_state = info->aux.alu_state;
5775 alu_limit = abs(info->aux.alu_limit - alu_limit);
5776 } else {
5777 alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
5778 alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
5779 alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
5780
5781 /* Limit pruning on unknown scalars to enable deep search for
5782 * potential masking differences from other program paths.
5783 */
5784 if (!off_is_imm) {
5785 env->explore_alu_limits = true;
5786 }
5787 }
5788
5789 err = update_alu_sanitation_state(aux, alu_state, alu_limit);
5790 if (err < 0) {
5791 return err;
5792 }
5793 do_sim:
5794 /* If we're in commit phase, we're done here given we already
5795 * pushed the truncated dst_reg into the speculative verification
5796 * stack.
5797 *
5798 * Also, when register is a known constant, we rewrite register-based
5799 * operation to immediate-based, and thus do not need masking (and as
5800 * a consequence, do not need to simulate the zero-truncation either).
5801 */
5802 if (commit_window || off_is_imm) {
5803 return 0;
5804 }
5805
5806 /* Simulate and find potential out-of-bounds access under
5807 * speculative execution from truncation as a result of
5808 * masking when off was not within expected range. If off
5809 * sits in dst, then we temporarily need to move ptr there
5810 * to simulate dst (== 0) +/-= ptr. Needed, for example,
5811 * for cases where we use K-based arithmetic in one direction
5812 * and truncated reg-based in the other in order to explore
5813 * bad access.
5814 */
5815 if (!ptr_is_dst_reg) {
5816 tmp = *dst_reg;
5817 *dst_reg = *ptr_reg;
5818 }
5819 ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
5820 if (!ptr_is_dst_reg && ret) {
5821 *dst_reg = tmp;
5822 }
5823 return !ret ? REASON_STACK : 0;
5824 }
5825
sanitize_mark_insn_seen(struct bpf_verifier_env *env)5826 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
5827 {
5828 struct bpf_verifier_state *vstate = env->cur_state;
5829
5830 /* If we simulate paths under speculation, we don't update the
5831 * insn as 'seen' such that when we verify unreachable paths in
5832 * the non-speculative domain, sanitize_dead_code() can still
5833 * rewrite/sanitize them.
5834 */
5835 if (!vstate->speculative) {
5836 env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
5837 }
5838 }
5839
sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason, const struct bpf_reg_state *off_reg, const struct bpf_reg_state *dst_reg)5840 static int sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason,
5841 const struct bpf_reg_state *off_reg, const struct bpf_reg_state *dst_reg)
5842 {
5843 static const char *err = "pointer arithmetic with it prohibited for !root";
5844 const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
5845 u32 dst = insn->dst_reg, src = insn->src_reg;
5846
5847 switch (reason) {
5848 case REASON_BOUNDS:
5849 verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", off_reg == dst_reg ? dst : src, err);
5850 break;
5851 case REASON_TYPE:
5852 verbose(env, "R%d has pointer with unsupported alu operation, %s\n", off_reg == dst_reg ? src : dst, err);
5853 break;
5854 case REASON_PATHS:
5855 verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", dst, op, err);
5856 break;
5857 case REASON_LIMIT:
5858 verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", dst, op, err);
5859 break;
5860 case REASON_STACK:
5861 verbose(env, "R%d could not be pushed for speculative verification, %s\n", dst, err);
5862 break;
5863 default:
5864 verbose(env, "verifier internal error: unknown reason (%d)\n", reason);
5865 break;
5866 }
5867
5868 return -EACCES;
5869 }
5870
5871 /* check that stack access falls within stack limits and that 'reg' doesn't
5872 * have a variable offset.
5873 *
5874 * Variable offset is prohibited for unprivileged mode for simplicity since it
5875 * requires corresponding support in Spectre masking for stack ALU. See also
5876 * retrieve_ptr_limit().
5877 *
5878 *
5879 * 'off' includes 'reg->off'.
5880 */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env *env, int regno, const struct bpf_reg_state *reg, int off)5881 static int check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env *env, int regno,
5882 const struct bpf_reg_state *reg, int off)
5883 {
5884 if (!tnum_is_const(reg->var_off)) {
5885 char tn_buf[48];
5886
5887 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5888 verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", regno, tn_buf, off);
5889 return -EACCES;
5890 }
5891
5892 if (off >= 0 || off < -MAX_BPF_STACK) {
5893 verbose(env,
5894 "R%d stack pointer arithmetic goes out of range, "
5895 "prohibited for !root; off=%d\n",
5896 regno, off);
5897 return -EACCES;
5898 }
5899
5900 return 0;
5901 }
5902
sanitize_check_bounds(struct bpf_verifier_env *env, const struct bpf_insn *insn, const struct bpf_reg_state *dst_reg)5903 static int sanitize_check_bounds(struct bpf_verifier_env *env, const struct bpf_insn *insn,
5904 const struct bpf_reg_state *dst_reg)
5905 {
5906 u32 dst = insn->dst_reg;
5907
5908 /* For unprivileged we require that resulting offset must be in bounds
5909 * in order to be able to sanitize access later on.
5910 */
5911 if (env->bypass_spec_v1) {
5912 return 0;
5913 }
5914
5915 switch (dst_reg->type) {
5916 case PTR_TO_STACK:
5917 if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, dst_reg->off + dst_reg->var_off.value)) {
5918 return -EACCES;
5919 }
5920 break;
5921 case PTR_TO_MAP_VALUE:
5922 if (check_map_access(env, dst, dst_reg->off, 1, false)) {
5923 verbose(env,
5924 "R%d pointer arithmetic of map value goes out of range, "
5925 "prohibited for !root\n",
5926 dst);
5927 return -EACCES;
5928 }
5929 break;
5930 default:
5931 break;
5932 }
5933
5934 return 0;
5935 }
5936
5937 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
5938 * Caller should also handle BPF_MOV case separately.
5939 * If we return -EACCES, caller may want to try again treating pointer as a
5940 * scalar. So we only emit a diagnostic if !env->allow_ptr_leaks.
5941 */
adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg)5942 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn,
5943 const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg)
5944 {
5945 struct bpf_verifier_state *vstate = env->cur_state;
5946 struct bpf_func_state *state = vstate->frame[vstate->curframe];
5947 struct bpf_reg_state *regs = state->regs, *dst_reg;
5948 bool known = tnum_is_const(off_reg->var_off);
5949 s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, smin_ptr = ptr_reg->smin_value,
5950 smax_ptr = ptr_reg->smax_value;
5951 u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value,
5952 umax_ptr = ptr_reg->umax_value;
5953 struct bpf_sanitize_info info = {};
5954 u8 opcode = BPF_OP(insn->code);
5955 u32 dst = insn->dst_reg;
5956 int ret;
5957
5958 dst_reg = ®s[dst];
5959
5960 if ((known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val || umin_val > umax_val) {
5961 /* Taint dst register if offset had invalid bounds derived from
5962 * e.g. dead branches.
5963 */
5964 __mark_reg_unknown(env, dst_reg);
5965 return 0;
5966 }
5967
5968 if (BPF_CLASS(insn->code) != BPF_ALU64) {
5969 /* 32-bit ALU ops on pointers produce (meaningless) scalars */
5970 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
5971 __mark_reg_unknown(env, dst_reg);
5972 return 0;
5973 }
5974
5975 verbose(env, "R%d 32-bit pointer arithmetic prohibited\n", dst);
5976 return -EACCES;
5977 }
5978
5979 if (ptr_reg->type & PTR_MAYBE_NULL) {
5980 verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", dst,
5981 reg_type_str(env, ptr_reg->type));
5982 return -EACCES;
5983 }
5984
5985 switch (base_type(ptr_reg->type)) {
5986 case CONST_PTR_TO_MAP:
5987 /* smin_val represents the known value */
5988 if (known && smin_val == 0 && opcode == BPF_ADD) {
5989 break;
5990 }
5991 fallthrough;
5992 case PTR_TO_PACKET_END:
5993 case PTR_TO_SOCKET:
5994 case PTR_TO_SOCK_COMMON:
5995 case PTR_TO_TCP_SOCK:
5996 case PTR_TO_XDP_SOCK:
5997 reject:
5998 verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str(env, ptr_reg->type));
5999 return -EACCES;
6000 default:
6001 if (type_may_be_null(ptr_reg->type)) {
6002 goto reject;
6003 }
6004 break;
6005 }
6006
6007 /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
6008 * The id may be overwritten later if we create a new variable offset.
6009 */
6010 dst_reg->type = ptr_reg->type;
6011 dst_reg->id = ptr_reg->id;
6012
6013 if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) {
6014 return -EINVAL;
6015 }
6016
6017 /* pointer types do not carry 32-bit bounds at the moment. */
6018 verifier_mark_reg32_unbounded(dst_reg);
6019
6020 if (sanitize_needed(opcode)) {
6021 ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, &info, false);
6022 if (ret < 0) {
6023 return sanitize_err(env, insn, ret, off_reg, dst_reg);
6024 }
6025 }
6026
6027 switch (opcode) {
6028 case BPF_ADD:
6029 /* We can take a fixed offset as long as it doesn't overflow
6030 * the s32 'off' field
6031 */
6032 if (known && (ptr_reg->off + smin_val == (s64)(s32)(ptr_reg->off + smin_val))) {
6033 /* pointer += K. Accumulate it into fixed offset */
6034 dst_reg->smin_value = smin_ptr;
6035 dst_reg->smax_value = smax_ptr;
6036 dst_reg->umin_value = umin_ptr;
6037 dst_reg->umax_value = umax_ptr;
6038 dst_reg->var_off = ptr_reg->var_off;
6039 dst_reg->off = ptr_reg->off + smin_val;
6040 dst_reg->raw = ptr_reg->raw;
6041 break;
6042 }
6043 /* A new variable offset is created. Note that off_reg->off
6044 * == 0, since it's a scalar.
6045 * dst_reg gets the pointer type and since some positive
6046 * integer value was added to the pointer, give it a new 'id'
6047 * if it's a PTR_TO_PACKET.
6048 * this creates a new 'base' pointer, off_reg (variable) gets
6049 * added into the variable offset, and we copy the fixed offset
6050 * from ptr_reg.
6051 */
6052 if (signed_add_overflows(smin_ptr, smin_val) || signed_add_overflows(smax_ptr, smax_val)) {
6053 dst_reg->smin_value = S64_MIN;
6054 dst_reg->smax_value = S64_MAX;
6055 } else {
6056 dst_reg->smin_value = smin_ptr + smin_val;
6057 dst_reg->smax_value = smax_ptr + smax_val;
6058 }
6059 if (umin_ptr + umin_val < umin_ptr || umax_ptr + umax_val < umax_ptr) {
6060 dst_reg->umin_value = 0;
6061 dst_reg->umax_value = U64_MAX;
6062 } else {
6063 dst_reg->umin_value = umin_ptr + umin_val;
6064 dst_reg->umax_value = umax_ptr + umax_val;
6065 }
6066 dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
6067 dst_reg->off = ptr_reg->off;
6068 dst_reg->raw = ptr_reg->raw;
6069 if (reg_is_pkt_pointer(ptr_reg)) {
6070 dst_reg->id = ++env->id_gen;
6071 /* something was added to pkt_ptr, set range to zero */
6072 dst_reg->raw = 0;
6073 }
6074 break;
6075 case BPF_SUB:
6076 if (dst_reg == off_reg) {
6077 /* scalar -= pointer. Creates an unknown scalar */
6078 verbose(env, "R%d tried to subtract pointer from scalar\n", dst);
6079 return -EACCES;
6080 }
6081 /* We don't allow subtraction from FP, because (according to
6082 * test_verifier.c test "invalid fp arithmetic", JITs might not
6083 * be able to deal with it.
6084 */
6085 if (ptr_reg->type == PTR_TO_STACK) {
6086 verbose(env, "R%d subtraction from stack pointer prohibited\n", dst);
6087 return -EACCES;
6088 }
6089 if (known && (ptr_reg->off - smin_val == (s64)(s32)(ptr_reg->off - smin_val))) {
6090 /* pointer -= K. Subtract it from fixed offset */
6091 dst_reg->smin_value = smin_ptr;
6092 dst_reg->smax_value = smax_ptr;
6093 dst_reg->umin_value = umin_ptr;
6094 dst_reg->umax_value = umax_ptr;
6095 dst_reg->var_off = ptr_reg->var_off;
6096 dst_reg->id = ptr_reg->id;
6097 dst_reg->off = ptr_reg->off - smin_val;
6098 dst_reg->raw = ptr_reg->raw;
6099 break;
6100 }
6101 /* A new variable offset is created. If the subtrahend is known
6102 * nonnegative, then any reg->range we had before is still good.
6103 */
6104 if (signed_sub_overflows(smin_ptr, smax_val) || signed_sub_overflows(smax_ptr, smin_val)) {
6105 /* Overflow possible, we know nothing */
6106 dst_reg->smin_value = S64_MIN;
6107 dst_reg->smax_value = S64_MAX;
6108 } else {
6109 dst_reg->smin_value = smin_ptr - smax_val;
6110 dst_reg->smax_value = smax_ptr - smin_val;
6111 }
6112 if (umin_ptr < umax_val) {
6113 /* Overflow possible, we know nothing */
6114 dst_reg->umin_value = 0;
6115 dst_reg->umax_value = U64_MAX;
6116 } else {
6117 /* Cannot overflow (as long as bounds are consistent) */
6118 dst_reg->umin_value = umin_ptr - umax_val;
6119 dst_reg->umax_value = umax_ptr - umin_val;
6120 }
6121 dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
6122 dst_reg->off = ptr_reg->off;
6123 dst_reg->raw = ptr_reg->raw;
6124 if (reg_is_pkt_pointer(ptr_reg)) {
6125 dst_reg->id = ++env->id_gen;
6126 /* something was added to pkt_ptr, set range to zero */
6127 if (smin_val < 0) {
6128 dst_reg->raw = 0;
6129 }
6130 }
6131 break;
6132 case BPF_AND:
6133 case BPF_OR:
6134 case BPF_XOR:
6135 /* bitwise ops on pointers are troublesome, prohibit. */
6136 verbose(env, "R%d bitwise operator %s on pointer prohibited\n", dst, bpf_alu_string[opcode >> 0x4]);
6137 return -EACCES;
6138 default:
6139 /* other operators (e.g. MUL,LSH) produce non-pointer results */
6140 verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", dst, bpf_alu_string[opcode >> 0x4]);
6141 return -EACCES;
6142 }
6143
6144 if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) {
6145 return -EINVAL;
6146 }
6147
6148 reg_bounds_sync(dst_reg);
6149
6150 if (sanitize_check_bounds(env, insn, dst_reg) < 0) {
6151 return -EACCES;
6152 }
6153 if (sanitize_needed(opcode)) {
6154 ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, &info, true);
6155 if (ret < 0) {
6156 return sanitize_err(env, insn, ret, off_reg, dst_reg);
6157 }
6158 }
6159
6160 return 0;
6161 }
6162
scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6163 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6164 {
6165 s32 smin_val = src_reg->s32_min_value;
6166 s32 smax_val = src_reg->s32_max_value;
6167 u32 umin_val = src_reg->u32_min_value;
6168 u32 umax_val = src_reg->u32_max_value;
6169
6170 if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6171 signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6172 dst_reg->s32_min_value = S32_MIN;
6173 dst_reg->s32_max_value = S32_MAX;
6174 } else {
6175 dst_reg->s32_min_value += smin_val;
6176 dst_reg->s32_max_value += smax_val;
6177 }
6178 if (dst_reg->u32_min_value + umin_val < umin_val || dst_reg->u32_max_value + umax_val < umax_val) {
6179 dst_reg->u32_min_value = 0;
6180 dst_reg->u32_max_value = U32_MAX;
6181 } else {
6182 dst_reg->u32_min_value += umin_val;
6183 dst_reg->u32_max_value += umax_val;
6184 }
6185 }
6186
scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6187 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6188 {
6189 s64 smin_val = src_reg->smin_value;
6190 s64 smax_val = src_reg->smax_value;
6191 u64 umin_val = src_reg->umin_value;
6192 u64 umax_val = src_reg->umax_value;
6193
6194 if (signed_add_overflows(dst_reg->smin_value, smin_val) || signed_add_overflows(dst_reg->smax_value, smax_val)) {
6195 dst_reg->smin_value = S64_MIN;
6196 dst_reg->smax_value = S64_MAX;
6197 } else {
6198 dst_reg->smin_value += smin_val;
6199 dst_reg->smax_value += smax_val;
6200 }
6201 if (dst_reg->umin_value + umin_val < umin_val || dst_reg->umax_value + umax_val < umax_val) {
6202 dst_reg->umin_value = 0;
6203 dst_reg->umax_value = U64_MAX;
6204 } else {
6205 dst_reg->umin_value += umin_val;
6206 dst_reg->umax_value += umax_val;
6207 }
6208 }
6209
scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6210 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6211 {
6212 s32 smin_val = src_reg->s32_min_value;
6213 s32 smax_val = src_reg->s32_max_value;
6214 u32 umin_val = src_reg->u32_min_value;
6215 u32 umax_val = src_reg->u32_max_value;
6216
6217 if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6218 signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6219 /* Overflow possible, we know nothing */
6220 dst_reg->s32_min_value = S32_MIN;
6221 dst_reg->s32_max_value = S32_MAX;
6222 } else {
6223 dst_reg->s32_min_value -= smax_val;
6224 dst_reg->s32_max_value -= smin_val;
6225 }
6226 if (dst_reg->u32_min_value < umax_val) {
6227 /* Overflow possible, we know nothing */
6228 dst_reg->u32_min_value = 0;
6229 dst_reg->u32_max_value = U32_MAX;
6230 } else {
6231 /* Cannot overflow (as long as bounds are consistent) */
6232 dst_reg->u32_min_value -= umax_val;
6233 dst_reg->u32_max_value -= umin_val;
6234 }
6235 }
6236
scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6237 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6238 {
6239 s64 smin_val = src_reg->smin_value;
6240 s64 smax_val = src_reg->smax_value;
6241 u64 umin_val = src_reg->umin_value;
6242 u64 umax_val = src_reg->umax_value;
6243
6244 if (signed_sub_overflows(dst_reg->smin_value, smax_val) || signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6245 /* Overflow possible, we know nothing */
6246 dst_reg->smin_value = S64_MIN;
6247 dst_reg->smax_value = S64_MAX;
6248 } else {
6249 dst_reg->smin_value -= smax_val;
6250 dst_reg->smax_value -= smin_val;
6251 }
6252 if (dst_reg->umin_value < umax_val) {
6253 /* Overflow possible, we know nothing */
6254 dst_reg->umin_value = 0;
6255 dst_reg->umax_value = U64_MAX;
6256 } else {
6257 /* Cannot overflow (as long as bounds are consistent) */
6258 dst_reg->umin_value -= umax_val;
6259 dst_reg->umax_value -= umin_val;
6260 }
6261 }
6262
scalar32_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6263 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6264 {
6265 s32 smin_val = src_reg->s32_min_value;
6266 u32 umin_val = src_reg->u32_min_value;
6267 u32 umax_val = src_reg->u32_max_value;
6268
6269 if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6270 /* Ain't nobody got time to multiply that sign */
6271 verifier_mark_reg32_unbounded(dst_reg);
6272 return;
6273 }
6274 /* Both values are positive, so we can work with unsigned and
6275 * copy the result to signed (unless it exceeds S32_MAX).
6276 */
6277 if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6278 /* Potential overflow, we know nothing */
6279 verifier_mark_reg32_unbounded(dst_reg);
6280 return;
6281 }
6282 dst_reg->u32_min_value *= umin_val;
6283 dst_reg->u32_max_value *= umax_val;
6284 if (dst_reg->u32_max_value > S32_MAX) {
6285 /* Overflow possible, we know nothing */
6286 dst_reg->s32_min_value = S32_MIN;
6287 dst_reg->s32_max_value = S32_MAX;
6288 } else {
6289 dst_reg->s32_min_value = dst_reg->u32_min_value;
6290 dst_reg->s32_max_value = dst_reg->u32_max_value;
6291 }
6292 }
6293
scalar_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6294 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6295 {
6296 s64 smin_val = src_reg->smin_value;
6297 u64 umin_val = src_reg->umin_value;
6298 u64 umax_val = src_reg->umax_value;
6299
6300 if (smin_val < 0 || dst_reg->smin_value < 0) {
6301 /* Ain't nobody got time to multiply that sign */
6302 verifier_mark_reg64_unbounded(dst_reg);
6303 return;
6304 }
6305 /* Both values are positive, so we can work with unsigned and
6306 * copy the result to signed (unless it exceeds S64_MAX).
6307 */
6308 if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6309 /* Potential overflow, we know nothing */
6310 verifier_mark_reg64_unbounded(dst_reg);
6311 return;
6312 }
6313 dst_reg->umin_value *= umin_val;
6314 dst_reg->umax_value *= umax_val;
6315 if (dst_reg->umax_value > S64_MAX) {
6316 /* Overflow possible, we know nothing */
6317 dst_reg->smin_value = S64_MIN;
6318 dst_reg->smax_value = S64_MAX;
6319 } else {
6320 dst_reg->smin_value = dst_reg->umin_value;
6321 dst_reg->smax_value = dst_reg->umax_value;
6322 }
6323 }
6324
scalar32_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6325 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6326 {
6327 bool src_known = tnum_subreg_is_const(src_reg->var_off);
6328 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6329 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6330 s32 smin_val = src_reg->s32_min_value;
6331 u32 umax_val = src_reg->u32_max_value;
6332
6333 if (src_known && dst_known) {
6334 verifier_mark_reg32_known(dst_reg, var32_off.value);
6335 return;
6336 }
6337
6338 /* We get our minimum from the var_off, since that's inherently
6339 * bitwise. Our maximum is the minimum of the operands' maxima.
6340 */
6341 dst_reg->u32_min_value = var32_off.value;
6342 dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6343 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6344 /* Lose signed bounds when ANDing negative numbers,
6345 * ain't nobody got time for that.
6346 */
6347 dst_reg->s32_min_value = S32_MIN;
6348 dst_reg->s32_max_value = S32_MAX;
6349 } else {
6350 /* ANDing two positives gives a positive, so safe to
6351 * cast result into s64.
6352 */
6353 dst_reg->s32_min_value = dst_reg->u32_min_value;
6354 dst_reg->s32_max_value = dst_reg->u32_max_value;
6355 }
6356 }
6357
scalar_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6358 static void scalar_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6359 {
6360 bool src_known = tnum_is_const(src_reg->var_off);
6361 bool dst_known = tnum_is_const(dst_reg->var_off);
6362 s64 smin_val = src_reg->smin_value;
6363 u64 umax_val = src_reg->umax_value;
6364
6365 if (src_known && dst_known) {
6366 verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6367 return;
6368 }
6369
6370 /* We get our minimum from the var_off, since that's inherently
6371 * bitwise. Our maximum is the minimum of the operands' maxima.
6372 */
6373 dst_reg->umin_value = dst_reg->var_off.value;
6374 dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6375 if (dst_reg->smin_value < 0 || smin_val < 0) {
6376 /* Lose signed bounds when ANDing negative numbers,
6377 * ain't nobody got time for that.
6378 */
6379 dst_reg->smin_value = S64_MIN;
6380 dst_reg->smax_value = S64_MAX;
6381 } else {
6382 /* ANDing two positives gives a positive, so safe to
6383 * cast result into s64.
6384 */
6385 dst_reg->smin_value = dst_reg->umin_value;
6386 dst_reg->smax_value = dst_reg->umax_value;
6387 }
6388 /* We may learn something more from the var_off */
6389 verifier_update_reg_bounds(dst_reg);
6390 }
6391
scalar32_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6392 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6393 {
6394 bool src_known = tnum_subreg_is_const(src_reg->var_off);
6395 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6396 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6397 s32 smin_val = src_reg->s32_min_value;
6398 u32 umin_val = src_reg->u32_min_value;
6399
6400 if (src_known && dst_known) {
6401 verifier_mark_reg32_known(dst_reg, var32_off.value);
6402 return;
6403 }
6404
6405 /* We get our maximum from the var_off, and our minimum is the
6406 * maximum of the operands' minima
6407 */
6408 dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6409 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6410 if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6411 /* Lose signed bounds when ORing negative numbers,
6412 * ain't nobody got time for that.
6413 */
6414 dst_reg->s32_min_value = S32_MIN;
6415 dst_reg->s32_max_value = S32_MAX;
6416 } else {
6417 /* ORing two positives gives a positive, so safe to
6418 * cast result into s64.
6419 */
6420 dst_reg->s32_min_value = dst_reg->u32_min_value;
6421 dst_reg->s32_max_value = dst_reg->u32_max_value;
6422 }
6423 }
6424
scalar_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6425 static void scalar_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6426 {
6427 bool src_known = tnum_is_const(src_reg->var_off);
6428 bool dst_known = tnum_is_const(dst_reg->var_off);
6429 s64 smin_val = src_reg->smin_value;
6430 u64 umin_val = src_reg->umin_value;
6431
6432 if (src_known && dst_known) {
6433 verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6434 return;
6435 }
6436
6437 /* We get our maximum from the var_off, and our minimum is the
6438 * maximum of the operands' minima
6439 */
6440 dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6441 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6442 if (dst_reg->smin_value < 0 || smin_val < 0) {
6443 /* Lose signed bounds when ORing negative numbers,
6444 * ain't nobody got time for that.
6445 */
6446 dst_reg->smin_value = S64_MIN;
6447 dst_reg->smax_value = S64_MAX;
6448 } else {
6449 /* ORing two positives gives a positive, so safe to
6450 * cast result into s64.
6451 */
6452 dst_reg->smin_value = dst_reg->umin_value;
6453 dst_reg->smax_value = dst_reg->umax_value;
6454 }
6455 /* We may learn something more from the var_off */
6456 verifier_update_reg_bounds(dst_reg);
6457 }
6458
scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6459 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6460 {
6461 bool src_known = tnum_subreg_is_const(src_reg->var_off);
6462 bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6463 struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6464 s32 smin_val = src_reg->s32_min_value;
6465
6466 if (src_known && dst_known) {
6467 verifier_mark_reg32_known(dst_reg, var32_off.value);
6468 return;
6469 }
6470
6471 /* We get both minimum and maximum from the var32_off. */
6472 dst_reg->u32_min_value = var32_off.value;
6473 dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6474
6475 if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6476 /* XORing two positive sign numbers gives a positive,
6477 * so safe to cast u32 result into s32.
6478 */
6479 dst_reg->s32_min_value = dst_reg->u32_min_value;
6480 dst_reg->s32_max_value = dst_reg->u32_max_value;
6481 } else {
6482 dst_reg->s32_min_value = S32_MIN;
6483 dst_reg->s32_max_value = S32_MAX;
6484 }
6485 }
6486
scalar_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6487 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6488 {
6489 bool src_known = tnum_is_const(src_reg->var_off);
6490 bool dst_known = tnum_is_const(dst_reg->var_off);
6491 s64 smin_val = src_reg->smin_value;
6492
6493 if (src_known && dst_known) {
6494 /* dst_reg->var_off.value has been updated earlier */
6495 verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6496 return;
6497 }
6498
6499 /* We get both minimum and maximum from the var_off. */
6500 dst_reg->umin_value = dst_reg->var_off.value;
6501 dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6502
6503 if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6504 /* XORing two positive sign numbers gives a positive,
6505 * so safe to cast u64 result into s64.
6506 */
6507 dst_reg->smin_value = dst_reg->umin_value;
6508 dst_reg->smax_value = dst_reg->umax_value;
6509 } else {
6510 dst_reg->smin_value = S64_MIN;
6511 dst_reg->smax_value = S64_MAX;
6512 }
6513
6514 verifier_update_reg_bounds(dst_reg);
6515 }
6516
__scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)6517 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)
6518 {
6519 /* We lose all sign bit information (except what we can pick
6520 * up from var_off)
6521 */
6522 dst_reg->s32_min_value = S32_MIN;
6523 dst_reg->s32_max_value = S32_MAX;
6524 /* If we might shift our top bit out, then we know nothing */
6525 if (umax_val > VERIFIER_THIRTYONE || dst_reg->u32_max_value > 1ULL << (VERIFIER_THIRTYONE - umax_val)) {
6526 dst_reg->u32_min_value = 0;
6527 dst_reg->u32_max_value = U32_MAX;
6528 } else {
6529 dst_reg->u32_min_value <<= umin_val;
6530 dst_reg->u32_max_value <<= umax_val;
6531 }
6532 }
6533
scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6534 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6535 {
6536 u32 umax_val = src_reg->u32_max_value;
6537 u32 umin_val = src_reg->u32_min_value;
6538 /* u32 alu operation will zext upper bits */
6539 struct tnum subreg = tnum_subreg(dst_reg->var_off);
6540
6541 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6542 dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6543 /* Not required but being careful mark reg64 bounds as unknown so
6544 * that we are forced to pick them up from tnum and zext later and
6545 * if some path skips this step we are still safe.
6546 */
6547 verifier_mark_reg64_unbounded(dst_reg);
6548 verifier_update_reg32_bounds(dst_reg);
6549 }
6550
__scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)6551 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)
6552 {
6553 /* Special case <<32 because it is a common compiler pattern to sign
6554 * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6555 * positive we know this shift will also be positive so we can track
6556 * bounds correctly. Otherwise we lose all sign bit information except
6557 * what we can pick up from var_off. Perhaps we can generalize this
6558 * later to shifts of any length.
6559 */
6560 if (umin_val == 0x20 && umax_val == 0x20 && dst_reg->s32_max_value >= 0) {
6561 dst_reg->smax_value = (s64)dst_reg->s32_max_value << 0x20;
6562 } else {
6563 dst_reg->smax_value = S64_MAX;
6564 }
6565
6566 if (umin_val == 0x20 && umax_val == 0x20 && dst_reg->s32_min_value >= 0) {
6567 dst_reg->smin_value = (s64)dst_reg->s32_min_value << 0x20;
6568 } else {
6569 dst_reg->smin_value = S64_MIN;
6570 }
6571
6572 /* If we might shift our top bit out, then we know nothing */
6573 if (dst_reg->umax_value > 1ULL << (0x3f - umax_val)) {
6574 dst_reg->umin_value = 0;
6575 dst_reg->umax_value = U64_MAX;
6576 } else {
6577 dst_reg->umin_value <<= umin_val;
6578 dst_reg->umax_value <<= umax_val;
6579 }
6580 }
6581
scalar_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6582 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6583 {
6584 u64 umax_val = src_reg->umax_value;
6585 u64 umin_val = src_reg->umin_value;
6586
6587 /* scalar64 calc uses 32bit unshifted bounds so must be called first */
6588 __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6589 __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6590
6591 dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6592 /* We may learn something more from the var_off */
6593 verifier_update_reg_bounds(dst_reg);
6594 }
6595
scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6596 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6597 {
6598 struct tnum subreg = tnum_subreg(dst_reg->var_off);
6599 u32 umax_val = src_reg->u32_max_value;
6600 u32 umin_val = src_reg->u32_min_value;
6601
6602 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
6603 * be negative, then either:
6604 * 1) src_reg might be zero, so the sign bit of the result is
6605 * unknown, so we lose our signed bounds
6606 * 2) it's known negative, thus the unsigned bounds capture the
6607 * signed bounds
6608 * 3) the signed bounds cross zero, so they tell us nothing
6609 * about the result
6610 * If the value in dst_reg is known nonnegative, then again the
6611 * unsigned bounts capture the signed bounds.
6612 * Thus, in all cases it suffices to blow away our signed bounds
6613 * and rely on inferring new ones from the unsigned bounds and
6614 * var_off of the result.
6615 */
6616 dst_reg->s32_min_value = S32_MIN;
6617 dst_reg->s32_max_value = S32_MAX;
6618
6619 dst_reg->var_off = tnum_rshift(subreg, umin_val);
6620 dst_reg->u32_min_value >>= umax_val;
6621 dst_reg->u32_max_value >>= umin_val;
6622
6623 verifier_mark_reg64_unbounded(dst_reg);
6624 verifier_update_reg32_bounds(dst_reg);
6625 }
6626
scalar_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6627 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6628 {
6629 u64 umax_val = src_reg->umax_value;
6630 u64 umin_val = src_reg->umin_value;
6631
6632 /* BPF_RSH is an unsigned shift. If the value in dst_reg might
6633 * be negative, then either:
6634 * 1) src_reg might be zero, so the sign bit of the result is
6635 * unknown, so we lose our signed bounds
6636 * 2) it's known negative, thus the unsigned bounds capture the
6637 * signed bounds
6638 * 3) the signed bounds cross zero, so they tell us nothing
6639 * about the result
6640 * If the value in dst_reg is known nonnegative, then again the
6641 * unsigned bounts capture the signed bounds.
6642 * Thus, in all cases it suffices to blow away our signed bounds
6643 * and rely on inferring new ones from the unsigned bounds and
6644 * var_off of the result.
6645 */
6646 dst_reg->smin_value = S64_MIN;
6647 dst_reg->smax_value = S64_MAX;
6648 dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6649 dst_reg->umin_value >>= umax_val;
6650 dst_reg->umax_value >>= umin_val;
6651
6652 /* Its not easy to operate on alu32 bounds here because it depends
6653 * on bits being shifted in. Take easy way out and mark unbounded
6654 * so we can recalculate later from tnum.
6655 */
6656 verifier_mark_reg32_unbounded(dst_reg);
6657 verifier_update_reg_bounds(dst_reg);
6658 }
6659
scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6660 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6661 {
6662 u64 umin_val = src_reg->u32_min_value;
6663
6664 /* Upon reaching here, src_known is true and
6665 * umax_val is equal to umin_val.
6666 */
6667 dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6668 dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6669
6670 dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 0x20);
6671
6672 /* blow away the dst_reg umin_value/umax_value and rely on
6673 * dst_reg var_off to refine the result.
6674 */
6675 dst_reg->u32_min_value = 0;
6676 dst_reg->u32_max_value = U32_MAX;
6677
6678 verifier_mark_reg64_unbounded(dst_reg);
6679 verifier_update_reg32_bounds(dst_reg);
6680 }
6681
scalar_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6682 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6683 {
6684 u64 umin_val = src_reg->umin_value;
6685
6686 /* Upon reaching here, src_known is true and umax_val is equal
6687 * to umin_val.
6688 */
6689 dst_reg->smin_value >>= umin_val;
6690 dst_reg->smax_value >>= umin_val;
6691
6692 dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 0x40);
6693
6694 /* blow away the dst_reg umin_value/umax_value and rely on
6695 * dst_reg var_off to refine the result.
6696 */
6697 dst_reg->umin_value = 0;
6698 dst_reg->umax_value = U64_MAX;
6699
6700 /* Its not easy to operate on alu32 bounds here because it depends
6701 * on bits being shifted in from upper 32-bits. Take easy way out
6702 * and mark unbounded so we can recalculate later from tnum.
6703 */
6704 verifier_mark_reg32_unbounded(dst_reg);
6705 verifier_update_reg_bounds(dst_reg);
6706 }
6707
6708 /* WARNING: This function does calculations on 64-bit values, but the actual
6709 * execution may occur on 32-bit values. Therefore, things like bitshifts
6710 * need extra checks in the 32-bit case.
6711 */
adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg)6712 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn,
6713 struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg)
6714 {
6715 struct bpf_reg_state *regs = cur_regs(env);
6716 u8 opcode = BPF_OP(insn->code);
6717 bool src_known;
6718 s64 smin_val, smax_val;
6719 u64 umin_val, umax_val;
6720 s32 s32_min_val, s32_max_val;
6721 u32 u32_min_val, u32_max_val;
6722 u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? VERIFIER_SIXTYFOUR : VERIFIER_THIRTYTWO;
6723 bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6724 int ret;
6725
6726 smin_val = src_reg.smin_value;
6727 smax_val = src_reg.smax_value;
6728 umin_val = src_reg.umin_value;
6729 umax_val = src_reg.umax_value;
6730
6731 s32_min_val = src_reg.s32_min_value;
6732 s32_max_val = src_reg.s32_max_value;
6733 u32_min_val = src_reg.u32_min_value;
6734 u32_max_val = src_reg.u32_max_value;
6735
6736 if (alu32) {
6737 src_known = tnum_subreg_is_const(src_reg.var_off);
6738 if ((src_known && (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || s32_min_val > s32_max_val ||
6739 u32_min_val > u32_max_val) {
6740 /* Taint dst register if offset had invalid bounds
6741 * derived from e.g. dead branches.
6742 */
6743 __mark_reg_unknown(env, dst_reg);
6744 return 0;
6745 }
6746 } else {
6747 src_known = tnum_is_const(src_reg.var_off);
6748 if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val ||
6749 umin_val > umax_val) {
6750 /* Taint dst register if offset had invalid bounds
6751 * derived from e.g. dead branches.
6752 */
6753 __mark_reg_unknown(env, dst_reg);
6754 return 0;
6755 }
6756 }
6757
6758 if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6759 __mark_reg_unknown(env, dst_reg);
6760 return 0;
6761 }
6762
6763 if (sanitize_needed(opcode)) {
6764 ret = sanitize_val_alu(env, insn);
6765 if (ret < 0) {
6766 return sanitize_err(env, insn, ret, NULL, NULL);
6767 }
6768 }
6769
6770 /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6771 * There are two classes of instructions: The first class we track both
6772 * alu32 and alu64 sign/unsigned bounds independently this provides the
6773 * greatest amount of precision when alu operations are mixed with jmp32
6774 * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6775 * and BPF_OR. This is possible because these ops have fairly easy to
6776 * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6777 * See alu32 verifier tests for examples. The second class of
6778 * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6779 * with regards to tracking sign/unsigned bounds because the bits may
6780 * cross subreg boundaries in the alu64 case. When this happens we mark
6781 * the reg unbounded in the subreg bound space and use the resulting
6782 * tnum to calculate an approximation of the sign/unsigned bounds.
6783 */
6784 switch (opcode) {
6785 case BPF_ADD:
6786 scalar32_min_max_add(dst_reg, &src_reg);
6787 scalar_min_max_add(dst_reg, &src_reg);
6788 dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6789 break;
6790 case BPF_SUB:
6791 scalar32_min_max_sub(dst_reg, &src_reg);
6792 scalar_min_max_sub(dst_reg, &src_reg);
6793 dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6794 break;
6795 case BPF_MUL:
6796 dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
6797 scalar32_min_max_mul(dst_reg, &src_reg);
6798 scalar_min_max_mul(dst_reg, &src_reg);
6799 break;
6800 case BPF_AND:
6801 dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
6802 scalar32_min_max_and(dst_reg, &src_reg);
6803 scalar_min_max_and(dst_reg, &src_reg);
6804 break;
6805 case BPF_OR:
6806 dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
6807 scalar32_min_max_or(dst_reg, &src_reg);
6808 scalar_min_max_or(dst_reg, &src_reg);
6809 break;
6810 case BPF_XOR:
6811 dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6812 scalar32_min_max_xor(dst_reg, &src_reg);
6813 scalar_min_max_xor(dst_reg, &src_reg);
6814 break;
6815 case BPF_LSH:
6816 if (umax_val >= insn_bitness) {
6817 /* Shifts greater than 31 or 63 are undefined.
6818 * This includes shifts by a negative number.
6819 */
6820 mark_reg_unknown(env, regs, insn->dst_reg);
6821 break;
6822 }
6823 if (alu32) {
6824 scalar32_min_max_lsh(dst_reg, &src_reg);
6825 } else {
6826 scalar_min_max_lsh(dst_reg, &src_reg);
6827 }
6828 break;
6829 case BPF_RSH:
6830 if (umax_val >= insn_bitness) {
6831 /* Shifts greater than 31 or 63 are undefined.
6832 * This includes shifts by a negative number.
6833 */
6834 mark_reg_unknown(env, regs, insn->dst_reg);
6835 break;
6836 }
6837 if (alu32) {
6838 scalar32_min_max_rsh(dst_reg, &src_reg);
6839 } else {
6840 scalar_min_max_rsh(dst_reg, &src_reg);
6841 }
6842 break;
6843 case BPF_ARSH:
6844 if (umax_val >= insn_bitness) {
6845 /* Shifts greater than 31 or 63 are undefined.
6846 * This includes shifts by a negative number.
6847 */
6848 mark_reg_unknown(env, regs, insn->dst_reg);
6849 break;
6850 }
6851 if (alu32) {
6852 scalar32_min_max_arsh(dst_reg, &src_reg);
6853 } else {
6854 scalar_min_max_arsh(dst_reg, &src_reg);
6855 }
6856 break;
6857 default:
6858 mark_reg_unknown(env, regs, insn->dst_reg);
6859 break;
6860 }
6861
6862 /* ALU32 ops are zero extended into 64bit register */
6863 if (alu32) {
6864 zext_32_to_64(dst_reg);
6865 }
6866
6867 reg_bounds_sync(dst_reg);
6868 return 0;
6869 }
6870
6871 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
6872 * and var_off.
6873 */
adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn)6874 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn)
6875 {
6876 struct bpf_verifier_state *vstate = env->cur_state;
6877 struct bpf_func_state *state = vstate->frame[vstate->curframe];
6878 struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6879 struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
6880 u8 opcode = BPF_OP(insn->code);
6881 int err;
6882
6883 dst_reg = ®s[insn->dst_reg];
6884 src_reg = NULL;
6885 if (dst_reg->type != SCALAR_VALUE) {
6886 ptr_reg = dst_reg;
6887 } else {
6888 /* Make sure ID is cleared otherwise dst_reg min/max could be
6889 * incorrectly propagated into other registers by find_equal_scalars()
6890 */
6891 dst_reg->id = 0;
6892 }
6893 if (BPF_SRC(insn->code) == BPF_X) {
6894 src_reg = ®s[insn->src_reg];
6895 if (src_reg->type != SCALAR_VALUE) {
6896 if (dst_reg->type != SCALAR_VALUE) {
6897 /* Combining two pointers by any ALU op yields
6898 * an arbitrary scalar. Disallow all math except
6899 * pointer subtraction
6900 */
6901 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6902 mark_reg_unknown(env, regs, insn->dst_reg);
6903 return 0;
6904 }
6905 verbose(env, "R%d pointer %s pointer prohibited\n", insn->dst_reg,
6906 bpf_alu_string[opcode >> VERIFIER_FOUR]);
6907 return -EACCES;
6908 } else {
6909 /* scalar += pointer
6910 * This is legal, but we have to reverse our
6911 * src/dest handling in computing the range
6912 */
6913 err = mark_chain_precision(env, insn->dst_reg);
6914 if (err) {
6915 return err;
6916 }
6917 return adjust_ptr_min_max_vals(env, insn, src_reg, dst_reg);
6918 }
6919 } else if (ptr_reg) {
6920 /* pointer += scalar */
6921 err = mark_chain_precision(env, insn->src_reg);
6922 if (err) {
6923 return err;
6924 }
6925 return adjust_ptr_min_max_vals(env, insn, dst_reg, src_reg);
6926 }
6927 } else {
6928 /* Pretend the src is a reg with a known value, since we only
6929 * need to be able to read from this state.
6930 */
6931 off_reg.type = SCALAR_VALUE;
6932 verifier_mark_reg_known(&off_reg, insn->imm);
6933 src_reg = &off_reg;
6934 if (ptr_reg) { /* pointer += K */
6935 return adjust_ptr_min_max_vals(env, insn, ptr_reg, src_reg);
6936 }
6937 }
6938
6939 /* Got here implies adding two SCALAR_VALUEs */
6940 if (WARN_ON_ONCE(ptr_reg)) {
6941 print_verifier_state(env, state);
6942 verbose(env, "verifier internal error: unexpected ptr_reg\n");
6943 return -EINVAL;
6944 }
6945 if (WARN_ON(!src_reg)) {
6946 print_verifier_state(env, state);
6947 verbose(env, "verifier internal error: no src_reg\n");
6948 return -EINVAL;
6949 }
6950 return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
6951 }
6952
6953 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)6954 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
6955 {
6956 struct bpf_reg_state *regs = cur_regs(env);
6957 u8 opcode = BPF_OP(insn->code);
6958 int err;
6959
6960 if (opcode == BPF_END || opcode == BPF_NEG) {
6961 if (opcode == BPF_NEG) {
6962 if (BPF_SRC(insn->code) != 0 || insn->src_reg != BPF_REG_0 || insn->off != 0 || insn->imm != 0) {
6963 verbose(env, "BPF_NEG uses reserved fields\n");
6964 return -EINVAL;
6965 }
6966 } else {
6967 if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
6968 (insn->imm != 0x10 && insn->imm != VERIFIER_THIRTYTWO && insn->imm != VERIFIER_SIXTYFOUR) ||
6969 BPF_CLASS(insn->code) == BPF_ALU64) {
6970 verbose(env, "BPF_END uses reserved fields\n");
6971 return -EINVAL;
6972 }
6973 }
6974
6975 /* check src operand */
6976 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6977 if (err) {
6978 return err;
6979 }
6980
6981 if (is_pointer_value(env, insn->dst_reg)) {
6982 verbose(env, "R%d pointer arithmetic prohibited\n", insn->dst_reg);
6983 return -EACCES;
6984 }
6985
6986 /* check dest operand */
6987 err = check_reg_arg(env, insn->dst_reg, DST_OP);
6988 if (err) {
6989 return err;
6990 }
6991 } else if (opcode == BPF_MOV) {
6992 if (BPF_SRC(insn->code) == BPF_X) {
6993 if (insn->imm != 0 || insn->off != 0) {
6994 verbose(env, "BPF_MOV uses reserved fields\n");
6995 return -EINVAL;
6996 }
6997 /* check src operand */
6998 err = check_reg_arg(env, insn->src_reg, SRC_OP);
6999 if (err) {
7000 return err;
7001 }
7002 } else {
7003 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7004 verbose(env, "BPF_MOV uses reserved fields\n");
7005 return -EINVAL;
7006 }
7007 }
7008
7009 /* check dest operand, mark as required later */
7010 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7011 if (err) {
7012 return err;
7013 }
7014
7015 if (BPF_SRC(insn->code) == BPF_X) {
7016 struct bpf_reg_state *src_reg = regs + insn->src_reg;
7017 struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
7018
7019 if (BPF_CLASS(insn->code) == BPF_ALU64) {
7020 /* case: R1 = R2
7021 * copy register state to dest reg
7022 */
7023 if (src_reg->type == SCALAR_VALUE && !src_reg->id) {
7024 /* Assign src and dst registers the same ID
7025 * that will be used by find_equal_scalars()
7026 * to propagate min/max range.
7027 */
7028 src_reg->id = ++env->id_gen;
7029 }
7030 *dst_reg = *src_reg;
7031 dst_reg->live |= REG_LIVE_WRITTEN;
7032 dst_reg->subreg_def = DEF_NOT_SUBREG;
7033 } else {
7034 /* R1 = (u32) R2 */
7035 if (is_pointer_value(env, insn->src_reg)) {
7036 verbose(env, "R%d partial copy of pointer\n", insn->src_reg);
7037 return -EACCES;
7038 } else if (src_reg->type == SCALAR_VALUE) {
7039 *dst_reg = *src_reg;
7040 /* Make sure ID is cleared otherwise
7041 * dst_reg min/max could be incorrectly
7042 * propagated into src_reg by find_equal_scalars()
7043 */
7044 dst_reg->id = 0;
7045 dst_reg->live |= REG_LIVE_WRITTEN;
7046 dst_reg->subreg_def = env->insn_idx + 1;
7047 } else {
7048 mark_reg_unknown(env, regs, insn->dst_reg);
7049 }
7050 zext_32_to_64(dst_reg);
7051
7052 reg_bounds_sync(dst_reg);
7053 }
7054 } else {
7055 /* case: R = imm
7056 * remember the value we stored into this reg
7057 */
7058 /* clear any state __mark_reg_known doesn't set */
7059 mark_reg_unknown(env, regs, insn->dst_reg);
7060 regs[insn->dst_reg].type = SCALAR_VALUE;
7061 if (BPF_CLASS(insn->code) == BPF_ALU64) {
7062 verifier_mark_reg_known(regs + insn->dst_reg, insn->imm);
7063 } else {
7064 verifier_mark_reg_known(regs + insn->dst_reg, (u32)insn->imm);
7065 }
7066 }
7067 } else if (opcode > BPF_END) {
7068 verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
7069 return -EINVAL;
7070 } else { /* all other ALU ops: and, sub, xor, add, ... */
7071 if (BPF_SRC(insn->code) == BPF_X) {
7072 if (insn->imm != 0 || insn->off != 0) {
7073 verbose(env, "BPF_ALU uses reserved fields\n");
7074 return -EINVAL;
7075 }
7076 /* check src1 operand */
7077 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7078 if (err) {
7079 return err;
7080 }
7081 } else {
7082 if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7083 verbose(env, "BPF_ALU uses reserved fields\n");
7084 return -EINVAL;
7085 }
7086 }
7087
7088 /* check src2 operand */
7089 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7090 if (err) {
7091 return err;
7092 }
7093
7094 if ((opcode == BPF_MOD || opcode == BPF_DIV) && BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
7095 verbose(env, "div by zero\n");
7096 return -EINVAL;
7097 }
7098
7099 if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
7100 int size = BPF_CLASS(insn->code) == BPF_ALU64 ? VERIFIER_SIXTYFOUR : 32;
7101 if (insn->imm < 0 || insn->imm >= size) {
7102 verbose(env, "invalid shift %d\n", insn->imm);
7103 return -EINVAL;
7104 }
7105 }
7106 /* check dest operand */
7107 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7108 if (err) {
7109 return err;
7110 }
7111 return adjust_reg_min_max_vals(env, insn);
7112 }
7113
7114 return 0;
7115 }
7116
__find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, u16 new_range)7117 static void __find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg,
7118 enum bpf_reg_type type, u16 new_range)
7119 {
7120 struct bpf_reg_state *reg;
7121 int i;
7122
7123 for (i = 0; i < MAX_BPF_REG; i++) {
7124 reg = &state->regs[i];
7125 if (reg->type == type && reg->id == dst_reg->id) {
7126 /* keep the maximum range already checked */
7127 reg->range = max(reg->range, new_range);
7128 }
7129 }
7130
7131 bpf_for_each_spilled_reg(i, state, reg)
7132 {
7133 if (!reg) {
7134 continue;
7135 }
7136 if (reg->type == type && reg->id == dst_reg->id) {
7137 reg->range = max(reg->range, new_range);
7138 }
7139 }
7140 }
7141
find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open)7142 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg,
7143 enum bpf_reg_type type, bool range_right_open)
7144 {
7145 u16 new_range;
7146 int i;
7147
7148 if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) {
7149 /* This doesn't give us any range */
7150 return;
7151 }
7152
7153 if (dst_reg->umax_value > MAX_PACKET_OFF || dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) {
7154 /* Risk of overflow. For instance, ptr + (1<<63) may be less
7155 * than pkt_end, but that's because it's also less than pkt.
7156 */
7157 return;
7158 }
7159
7160 new_range = dst_reg->off;
7161 if (range_right_open) {
7162 new_range--;
7163 }
7164
7165 /* Examples for register markings:
7166 *
7167 * pkt_data in dst register:
7168 *
7169 * r2 = r3;
7170 * r2 += 8;
7171 * if (r2 > pkt_end) goto <handle exception>
7172 * <access okay>
7173 *
7174 * r2 = r3;
7175 * r2 += 8;
7176 * if (r2 < pkt_end) goto <access okay>
7177 * <handle exception>
7178 *
7179 * Where:
7180 * r2 == dst_reg, pkt_end == src_reg
7181 * r2=pkt(id=n,off=8,r=0)
7182 * r3=pkt(id=n,off=0,r=0)
7183 *
7184 * pkt_data in src register:
7185 *
7186 * r2 = r3;
7187 * r2 += 8;
7188 * if (pkt_end >= r2) goto <access okay>
7189 * <handle exception>
7190 *
7191 * r2 = r3;
7192 * r2 += 8;
7193 * if (pkt_end <= r2) goto <handle exception>
7194 * <access okay>
7195 *
7196 * Where:
7197 * pkt_end == dst_reg, r2 == src_reg
7198 * r2=pkt(id=n,off=8,r=0)
7199 * r3=pkt(id=n,off=0,r=0)
7200 *
7201 * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
7202 * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
7203 * and [r3, r3 + 8-1) respectively is safe to access depending on
7204 * the check.
7205 */
7206
7207 /* If our ids match, then we must have the same max_value. And we
7208 * don't care about the other reg's fixed offset, since if it's too big
7209 * the range won't allow anything.
7210 * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
7211 */
7212 for (i = 0; i <= vstate->curframe; i++) {
7213 __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, new_range);
7214 }
7215 }
7216
is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)7217 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
7218 {
7219 struct tnum subreg = tnum_subreg(reg->var_off);
7220 s32 sval = (s32)val;
7221
7222 switch (opcode) {
7223 case BPF_JEQ:
7224 if (tnum_is_const(subreg)) {
7225 return !!tnum_equals_const(subreg, val);
7226 }
7227 break;
7228 case BPF_JNE:
7229 if (tnum_is_const(subreg)) {
7230 return !tnum_equals_const(subreg, val);
7231 }
7232 break;
7233 case BPF_JSET:
7234 if ((~subreg.mask & subreg.value) & val) {
7235 return 1;
7236 }
7237 if (!((subreg.mask | subreg.value) & val)) {
7238 return 0;
7239 }
7240 break;
7241 case BPF_JGT:
7242 if (reg->u32_min_value > val) {
7243 return 1;
7244 } else if (reg->u32_max_value <= val) {
7245 return 0;
7246 }
7247 break;
7248 case BPF_JSGT:
7249 if (reg->s32_min_value > sval) {
7250 return 1;
7251 } else if (reg->s32_max_value <= sval) {
7252 return 0;
7253 }
7254 break;
7255 case BPF_JLT:
7256 if (reg->u32_max_value < val) {
7257 return 1;
7258 } else if (reg->u32_min_value >= val) {
7259 return 0;
7260 }
7261 break;
7262 case BPF_JSLT:
7263 if (reg->s32_max_value < sval) {
7264 return 1;
7265 } else if (reg->s32_min_value >= sval) {
7266 return 0;
7267 }
7268 break;
7269 case BPF_JGE:
7270 if (reg->u32_min_value >= val) {
7271 return 1;
7272 } else if (reg->u32_max_value < val) {
7273 return 0;
7274 }
7275 break;
7276 case BPF_JSGE:
7277 if (reg->s32_min_value >= sval) {
7278 return 1;
7279 } else if (reg->s32_max_value < sval) {
7280 return 0;
7281 }
7282 break;
7283 case BPF_JLE:
7284 if (reg->u32_max_value <= val) {
7285 return 1;
7286 } else if (reg->u32_min_value > val) {
7287 return 0;
7288 }
7289 break;
7290 case BPF_JSLE:
7291 if (reg->s32_max_value <= sval) {
7292 return 1;
7293 } else if (reg->s32_min_value > sval) {
7294 return 0;
7295 }
7296 break;
7297 }
7298
7299 return -1;
7300 }
7301
is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)7302 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7303 {
7304 s64 sval = (s64)val;
7305
7306 switch (opcode) {
7307 case BPF_JEQ:
7308 if (tnum_is_const(reg->var_off)) {
7309 return !!tnum_equals_const(reg->var_off, val);
7310 }
7311 break;
7312 case BPF_JNE:
7313 if (tnum_is_const(reg->var_off)) {
7314 return !tnum_equals_const(reg->var_off, val);
7315 }
7316 break;
7317 case BPF_JSET:
7318 if ((~reg->var_off.mask & reg->var_off.value) & val) {
7319 return 1;
7320 }
7321 if (!((reg->var_off.mask | reg->var_off.value) & val)) {
7322 return 0;
7323 }
7324 break;
7325 case BPF_JGT:
7326 if (reg->umin_value > val) {
7327 return 1;
7328 } else if (reg->umax_value <= val) {
7329 return 0;
7330 }
7331 break;
7332 case BPF_JSGT:
7333 if (reg->smin_value > sval) {
7334 return 1;
7335 } else if (reg->smax_value <= sval) {
7336 return 0;
7337 }
7338 break;
7339 case BPF_JLT:
7340 if (reg->umax_value < val) {
7341 return 1;
7342 } else if (reg->umin_value >= val) {
7343 return 0;
7344 }
7345 break;
7346 case BPF_JSLT:
7347 if (reg->smax_value < sval) {
7348 return 1;
7349 } else if (reg->smin_value >= sval) {
7350 return 0;
7351 }
7352 break;
7353 case BPF_JGE:
7354 if (reg->umin_value >= val) {
7355 return 1;
7356 } else if (reg->umax_value < val) {
7357 return 0;
7358 }
7359 break;
7360 case BPF_JSGE:
7361 if (reg->smin_value >= sval) {
7362 return 1;
7363 } else if (reg->smax_value < sval) {
7364 return 0;
7365 }
7366 break;
7367 case BPF_JLE:
7368 if (reg->umax_value <= val) {
7369 return 1;
7370 } else if (reg->umin_value > val) {
7371 return 0;
7372 }
7373 break;
7374 case BPF_JSLE:
7375 if (reg->smax_value <= sval) {
7376 return 1;
7377 } else if (reg->smin_value > sval) {
7378 return 0;
7379 }
7380 break;
7381 }
7382
7383 return -1;
7384 }
7385
7386 /* compute branch direction of the expression "if (reg opcode val) goto target;"
7387 * and return:
7388 * 1 - branch will be taken and "goto target" will be executed
7389 * 0 - branch will not be taken and fall-through to next insn
7390 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7391 * range [0,10]
7392 */
is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32)7393 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32)
7394 {
7395 if (__is_pointer_value(false, reg)) {
7396 if (!reg_type_not_null(reg->type)) {
7397 return -1;
7398 }
7399
7400 /* If pointer is valid tests against zero will fail so we can
7401 * use this to direct branch taken.
7402 */
7403 if (val != 0) {
7404 return -1;
7405 }
7406
7407 switch (opcode) {
7408 case BPF_JEQ:
7409 return 0;
7410 case BPF_JNE:
7411 return 1;
7412 default:
7413 return -1;
7414 }
7415 }
7416
7417 if (is_jmp32) {
7418 return is_branch32_taken(reg, val, opcode);
7419 }
7420 return is_branch64_taken(reg, val, opcode);
7421 }
7422
7423 /* Adjusts the register min/max values in the case that the dst_reg is the
7424 * variable register that we are working on, and src_reg is a constant or we're
7425 * simply doing a BPF_K check.
7426 * In JEQ/JNE cases we also adjust the var_off values.
7427 */
reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32)7428 static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32,
7429 u8 opcode, bool is_jmp32)
7430 {
7431 struct tnum false_32off = tnum_subreg(false_reg->var_off);
7432 struct tnum false_64off = false_reg->var_off;
7433 struct tnum true_32off = tnum_subreg(true_reg->var_off);
7434 struct tnum true_64off = true_reg->var_off;
7435 s64 sval = (s64)val;
7436 s32 sval32 = (s32)val32;
7437
7438 /* If the dst_reg is a pointer, we can't learn anything about its
7439 * variable offset from the compare (unless src_reg were a pointer into
7440 * the same object, but we don't bother with that.
7441 * Since false_reg and true_reg have the same type by construction, we
7442 * only need to check one of them for pointerness.
7443 */
7444 if (__is_pointer_value(false, false_reg)) {
7445 return;
7446 }
7447
7448 switch (opcode) {
7449 /* JEQ/JNE comparison doesn't change the register equivalence.
7450 *
7451 * r1 = r2;
7452 * if (r1 == 42) goto label;
7453 * ...
7454 * label: // here both r1 and r2 are known to be 42.
7455 *
7456 * Hence when marking register as known preserve it's ID.
7457 */
7458 case BPF_JEQ:
7459 if (is_jmp32) {
7460 __mark_reg32_known(true_reg, val32);
7461 true_32off = tnum_subreg(true_reg->var_off);
7462 } else {
7463 ___mark_reg_known(true_reg, val);
7464 true_64off = true_reg->var_off;
7465 }
7466 break;
7467 case BPF_JNE:
7468 if (is_jmp32) {
7469 __mark_reg32_known(false_reg, val32);
7470 false_32off = tnum_subreg(false_reg->var_off);
7471 } else {
7472 ___mark_reg_known(false_reg, val);
7473 false_64off = false_reg->var_off;
7474 }
7475 break;
7476 case BPF_JSET:
7477 if (is_jmp32) {
7478 false_32off = tnum_and(false_32off, tnum_const(~val32));
7479 if (is_power_of_2(val32)) {
7480 true_32off = tnum_or(true_32off, tnum_const(val32));
7481 }
7482 } else {
7483 false_64off = tnum_and(false_64off, tnum_const(~val));
7484 if (is_power_of_2(val)) {
7485 true_64off = tnum_or(true_64off, tnum_const(val));
7486 }
7487 }
7488 break;
7489 case BPF_JGE:
7490 case BPF_JGT: {
7491 if (is_jmp32) {
7492 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
7493 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7494
7495 false_reg->u32_max_value = min(false_reg->u32_max_value, false_umax);
7496 true_reg->u32_min_value = max(true_reg->u32_min_value, true_umin);
7497 } else {
7498 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
7499 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7500
7501 false_reg->umax_value = min(false_reg->umax_value, false_umax);
7502 true_reg->umin_value = max(true_reg->umin_value, true_umin);
7503 }
7504 break;
7505 }
7506 case BPF_JSGE:
7507 case BPF_JSGT: {
7508 if (is_jmp32) {
7509 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
7510 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7511
7512 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7513 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7514 } else {
7515 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
7516 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7517
7518 false_reg->smax_value = min(false_reg->smax_value, false_smax);
7519 true_reg->smin_value = max(true_reg->smin_value, true_smin);
7520 }
7521 break;
7522 }
7523 case BPF_JLE:
7524 case BPF_JLT: {
7525 if (is_jmp32) {
7526 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
7527 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7528
7529 false_reg->u32_min_value = max(false_reg->u32_min_value, false_umin);
7530 true_reg->u32_max_value = min(true_reg->u32_max_value, true_umax);
7531 } else {
7532 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
7533 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7534
7535 false_reg->umin_value = max(false_reg->umin_value, false_umin);
7536 true_reg->umax_value = min(true_reg->umax_value, true_umax);
7537 }
7538 break;
7539 }
7540 case BPF_JSLE:
7541 case BPF_JSLT: {
7542 if (is_jmp32) {
7543 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
7544 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7545
7546 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7547 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7548 } else {
7549 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
7550 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7551
7552 false_reg->smin_value = max(false_reg->smin_value, false_smin);
7553 true_reg->smax_value = min(true_reg->smax_value, true_smax);
7554 }
7555 break;
7556 }
7557 default:
7558 return;
7559 }
7560
7561 if (is_jmp32) {
7562 false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), tnum_subreg(false_32off));
7563 true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), tnum_subreg(true_32off));
7564 verifier_reg_combine_32_into_64(false_reg);
7565 verifier_reg_combine_32_into_64(true_reg);
7566 } else {
7567 false_reg->var_off = false_64off;
7568 true_reg->var_off = true_64off;
7569 __reg_combine_64_into_32(false_reg);
7570 __reg_combine_64_into_32(true_reg);
7571 }
7572 }
7573
7574 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
7575 * the variable reg.
7576 */
reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32)7577 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32,
7578 u8 opcode, bool is_jmp32)
7579 {
7580 /* How can we transform "a <op> b" into "b <op> a"? */
7581 static const u8 opcode_flip[VERIFIER_SIXTEEN] = {
7582 [BPF_JEQ >> VERIFIER_FOUR] = BPF_JEQ,
7583 [BPF_JNE >> VERIFIER_FOUR] = BPF_JNE,
7584 [BPF_JSET >> VERIFIER_FOUR] = BPF_JSET,
7585 /* these swap "lesser" and "greater" (L and G in the opcodes) */
7586 [BPF_JGE >> VERIFIER_FOUR] = BPF_JLE,
7587 [BPF_JGT >> VERIFIER_FOUR] = BPF_JLT,
7588 [BPF_JLE >> VERIFIER_FOUR] = BPF_JGE,
7589 [BPF_JLT >> VERIFIER_FOUR] = BPF_JGT,
7590 [BPF_JSGE >> VERIFIER_FOUR] = BPF_JSLE,
7591 [BPF_JSGT >> VERIFIER_FOUR] = BPF_JSLT,
7592 [BPF_JSLE >> VERIFIER_FOUR] = BPF_JSGE,
7593 [BPF_JSLT >> VERIFIER_FOUR] = BPF_JSGT};
7594 opcode = opcode_flip[opcode >> VERIFIER_FOUR];
7595 /* This uses zero as "not present in table"; luckily the zero opcode,
7596 * BPF_JA, can't get here.
7597 */
7598 if (opcode) {
7599 reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7600 }
7601 }
7602
7603 /* Regs are known to be equal, so intersect their min/max/var_off */
__reg_combine_min_max(struct bpf_reg_state *src_reg, struct bpf_reg_state *dst_reg)7604 static void __reg_combine_min_max(struct bpf_reg_state *src_reg, struct bpf_reg_state *dst_reg)
7605 {
7606 src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, dst_reg->umin_value);
7607 src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, dst_reg->umax_value);
7608 src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, dst_reg->smin_value);
7609 src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, dst_reg->smax_value);
7610 src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, dst_reg->var_off);
7611 /* We might have learned new bounds from the var_off. */
7612 reg_bounds_sync(src_reg);
7613 reg_bounds_sync(dst_reg);
7614 }
7615
reg_combine_min_max(struct bpf_reg_state *true_src, struct bpf_reg_state *true_dst, struct bpf_reg_state *false_src, struct bpf_reg_state *false_dst, u8 opcode)7616 static void reg_combine_min_max(struct bpf_reg_state *true_src, struct bpf_reg_state *true_dst,
7617 struct bpf_reg_state *false_src, struct bpf_reg_state *false_dst, u8 opcode)
7618 {
7619 switch (opcode) {
7620 case BPF_JEQ:
7621 __reg_combine_min_max(true_src, true_dst);
7622 break;
7623 case BPF_JNE:
7624 __reg_combine_min_max(false_src, false_dst);
7625 break;
7626 }
7627 }
7628
mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null)7629 static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null)
7630 {
7631 if (type_may_be_null(reg->type) && reg->id == id &&
7632 !WARN_ON_ONCE(!reg->id)) {
7633 if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
7634 !tnum_equals_const(reg->var_off, 0) || reg->off)) {
7635 /* Old offset (both fixed and variable parts) should
7636 * have been known-zero, because we don't allow pointer
7637 * arithmetic on pointers that might be NULL. If we
7638 * see this happening, don't convert the register.
7639 */
7640 return;
7641 }
7642 if (is_null) {
7643 reg->type = SCALAR_VALUE;
7644 } else if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
7645 const struct bpf_map *map = reg->map_ptr;
7646
7647 if (map->inner_map_meta) {
7648 reg->type = CONST_PTR_TO_MAP;
7649 reg->map_ptr = map->inner_map_meta;
7650 } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7651 reg->type = PTR_TO_XDP_SOCK;
7652 } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7653 reg->type = PTR_TO_SOCKET;
7654 } else {
7655 reg->type = PTR_TO_MAP_VALUE;
7656 }
7657 } else {
7658 reg->type &= ~PTR_MAYBE_NULL;
7659 }
7660
7661 if (is_null) {
7662 /* We don't need id and ref_obj_id from this point
7663 * onwards anymore, thus we should better reset it,
7664 * so that state pruning has chances to take effect.
7665 */
7666 reg->id = 0;
7667 reg->ref_obj_id = 0;
7668 } else if (!reg_may_point_to_spin_lock(reg)) {
7669 /* For not-NULL ptr, reg->ref_obj_id will be reset
7670 * in release_reg_references().
7671 *
7672 * reg->id is still used by spin_lock ptr. Other
7673 * than spin_lock ptr type, reg->id can be reset.
7674 */
7675 reg->id = 0;
7676 }
7677 }
7678 }
7679
__mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, bool is_null)7680 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, bool is_null)
7681 {
7682 struct bpf_reg_state *reg;
7683 int i;
7684
7685 for (i = 0; i < MAX_BPF_REG; i++) {
7686 mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
7687 }
7688
7689 bpf_for_each_spilled_reg(i, state, reg)
7690 {
7691 if (!reg) {
7692 continue;
7693 }
7694 mark_ptr_or_null_reg(state, reg, id, is_null);
7695 }
7696 }
7697
7698 /* The logic is similar to find_good_pkt_pointers(), both could eventually
7699 * be folded together at some point.
7700 */
mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null)7701 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null)
7702 {
7703 struct bpf_func_state *state = vstate->frame[vstate->curframe];
7704 struct bpf_reg_state *regs = state->regs;
7705 u32 ref_obj_id = regs[regno].ref_obj_id;
7706 u32 id = regs[regno].id;
7707 int i;
7708
7709 if (ref_obj_id && ref_obj_id == id && is_null) {
7710 /* regs[regno] is in the " == NULL" branch.
7711 * No one could have freed the reference state before
7712 * doing the NULL check.
7713 */
7714 WARN_ON_ONCE(release_reference_state(state, id));
7715 }
7716
7717 for (i = 0; i <= vstate->curframe; i++) {
7718 __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
7719 }
7720 }
7721
try_match_pkt_pointers(const struct bpf_insn *insn, struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg, struct bpf_verifier_state *this_branch, struct bpf_verifier_state *other_branch)7722 static bool try_match_pkt_pointers(const struct bpf_insn *insn, struct bpf_reg_state *dst_reg,
7723 struct bpf_reg_state *src_reg, struct bpf_verifier_state *this_branch,
7724 struct bpf_verifier_state *other_branch)
7725 {
7726 if (BPF_SRC(insn->code) != BPF_X) {
7727 return false;
7728 }
7729
7730 /* Pointers are always 64-bit. */
7731 if (BPF_CLASS(insn->code) == BPF_JMP32) {
7732 return false;
7733 }
7734
7735 switch (BPF_OP(insn->code)) {
7736 case BPF_JGT:
7737 if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7738 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7739 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
7740 find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, false);
7741 } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7742 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7743 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
7744 find_good_pkt_pointers(other_branch, src_reg, src_reg->type, true);
7745 } else {
7746 return false;
7747 }
7748 break;
7749 case BPF_JLT:
7750 if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7751 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7752 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
7753 find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, true);
7754 } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7755 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7756 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
7757 find_good_pkt_pointers(this_branch, src_reg, src_reg->type, false);
7758 } else {
7759 return false;
7760 }
7761 break;
7762 case BPF_JGE:
7763 if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7764 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7765 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
7766 find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, true);
7767 } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7768 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7769 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
7770 find_good_pkt_pointers(other_branch, src_reg, src_reg->type, false);
7771 } else {
7772 return false;
7773 }
7774 break;
7775 case BPF_JLE:
7776 if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7777 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7778 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
7779 find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, false);
7780 } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7781 (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7782 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
7783 find_good_pkt_pointers(this_branch, src_reg, src_reg->type, true);
7784 } else {
7785 return false;
7786 }
7787 break;
7788 default:
7789 return false;
7790 }
7791
7792 return true;
7793 }
7794
find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg)7795 static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg)
7796 {
7797 struct bpf_func_state *state;
7798 struct bpf_reg_state *reg;
7799 int i, j;
7800
7801 for (i = 0; i <= vstate->curframe; i++) {
7802 state = vstate->frame[i];
7803 for (j = 0; j < MAX_BPF_REG; j++) {
7804 reg = &state->regs[j];
7805 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) {
7806 *reg = *known_reg;
7807 }
7808 }
7809
7810 bpf_for_each_spilled_reg(j, state, reg)
7811 {
7812 if (!reg) {
7813 continue;
7814 }
7815 if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) {
7816 *reg = *known_reg;
7817 }
7818 }
7819 }
7820 }
7821
check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)7822 static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)
7823 {
7824 struct bpf_verifier_state *this_branch = env->cur_state;
7825 struct bpf_verifier_state *other_branch;
7826 struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7827 struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7828 u8 opcode = BPF_OP(insn->code);
7829 bool is_jmp32;
7830 int pred = -1;
7831 int err;
7832
7833 /* Only conditional jumps are expected to reach here. */
7834 if (opcode == BPF_JA || opcode > BPF_JSLE) {
7835 verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7836 return -EINVAL;
7837 }
7838
7839 if (BPF_SRC(insn->code) == BPF_X) {
7840 if (insn->imm != 0) {
7841 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7842 return -EINVAL;
7843 }
7844
7845 /* check src1 operand */
7846 err = check_reg_arg(env, insn->src_reg, SRC_OP);
7847 if (err) {
7848 return err;
7849 }
7850
7851 if (is_pointer_value(env, insn->src_reg)) {
7852 verbose(env, "R%d pointer comparison prohibited\n", insn->src_reg);
7853 return -EACCES;
7854 }
7855 src_reg = ®s[insn->src_reg];
7856 } else {
7857 if (insn->src_reg != BPF_REG_0) {
7858 verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7859 return -EINVAL;
7860 }
7861 }
7862
7863 /* check src2 operand */
7864 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7865 if (err) {
7866 return err;
7867 }
7868
7869 dst_reg = ®s[insn->dst_reg];
7870 is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
7871
7872 if (BPF_SRC(insn->code) == BPF_K) {
7873 pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7874 } else if (src_reg->type == SCALAR_VALUE && is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7875 pred = is_branch_taken(dst_reg, tnum_subreg(src_reg->var_off).value, opcode, is_jmp32);
7876 } else if (src_reg->type == SCALAR_VALUE && !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7877 pred = is_branch_taken(dst_reg, src_reg->var_off.value, opcode, is_jmp32);
7878 }
7879
7880 if (pred >= 0) {
7881 /* If we get here with a dst_reg pointer type it is because
7882 * above is_branch_taken() special cased the 0 comparison.
7883 */
7884 if (!__is_pointer_value(false, dst_reg)) {
7885 err = mark_chain_precision(env, insn->dst_reg);
7886 }
7887 if (BPF_SRC(insn->code) == BPF_X && !err) {
7888 err = mark_chain_precision(env, insn->src_reg);
7889 }
7890 if (err) {
7891 return err;
7892 }
7893 }
7894
7895 if (pred == 1) {
7896 /* Only follow the goto, ignore fall-through. If needed, push
7897 * the fall-through branch for simulation under speculative
7898 * execution.
7899 */
7900 if (!env->bypass_spec_v1 && !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) {
7901 return -EFAULT;
7902 }
7903 *insn_idx += insn->off;
7904 return 0;
7905 } else if (pred == 0) {
7906 /* Only follow the fall-through branch, since that's where the
7907 * program will go. If needed, push the goto branch for
7908 * simulation under speculative execution.
7909 */
7910 if (!env->bypass_spec_v1 && !sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1, *insn_idx)) {
7911 return -EFAULT;
7912 }
7913 return 0;
7914 }
7915
7916 other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
7917 if (!other_branch) {
7918 return -EFAULT;
7919 }
7920 other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
7921
7922 /* detect if we are comparing against a constant value so we can adjust
7923 * our min/max values for our dst register.
7924 * this is only legit if both are scalars (or pointers to the same
7925 * object, I suppose, but we don't support that right now), because
7926 * otherwise the different base pointers mean the offsets aren't
7927 * comparable.
7928 */
7929 if (BPF_SRC(insn->code) == BPF_X) {
7930 struct bpf_reg_state *src_reg_in = ®s[insn->src_reg];
7931
7932 if (dst_reg->type == SCALAR_VALUE && src_reg_in->type == SCALAR_VALUE) {
7933 if (tnum_is_const(src_reg_in->var_off) || (is_jmp32 && tnum_is_const(tnum_subreg(src_reg_in->var_off)))) {
7934 reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, src_reg_in->var_off.value,
7935 tnum_subreg(src_reg_in->var_off).value, opcode, is_jmp32);
7936 } else if (tnum_is_const(dst_reg->var_off) || (is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off)))) {
7937 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], src_reg_in, dst_reg->var_off.value,
7938 tnum_subreg(dst_reg->var_off).value, opcode, is_jmp32);
7939 } else if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE)) {
7940 /* Comparing for equality, we can combine knowledge */
7941 reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], src_reg_in,
7942 dst_reg, opcode);
7943 }
7944 if (src_reg_in->id && !WARN_ON_ONCE(src_reg_in->id != other_branch_regs[insn->src_reg].id)) {
7945 find_equal_scalars(this_branch, src_reg_in);
7946 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
7947 }
7948 }
7949 } else if (dst_reg->type == SCALAR_VALUE) {
7950 reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, insn->imm, (u32)insn->imm, opcode, is_jmp32);
7951 }
7952
7953 if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
7954 !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
7955 find_equal_scalars(this_branch, dst_reg);
7956 find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
7957 }
7958
7959 /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
7960 * NOTE: these optimizations below are related with pointer comparison
7961 * which will never be JMP32.
7962 */
7963 if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
7964 type_may_be_null(dst_reg->type)) {
7965 /* Mark all identical registers in each branch as either
7966 * safe or unknown depending R == 0 or R != 0 conditional.
7967 */
7968 mark_ptr_or_null_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
7969 mark_ptr_or_null_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
7970 } else if (!try_match_pkt_pointers(insn, dst_reg, ®s[insn->src_reg], this_branch, other_branch) &&
7971 is_pointer_value(env, insn->dst_reg)) {
7972 verbose(env, "R%d pointer comparison prohibited\n", insn->dst_reg);
7973 return -EACCES;
7974 }
7975 if (env->log.level & BPF_LOG_LEVEL) {
7976 print_verifier_state(env, this_branch->frame[this_branch->curframe]);
7977 }
7978 return 0;
7979 }
7980
7981 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)7982 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
7983 {
7984 struct bpf_insn_aux_data *aux = cur_aux(env);
7985 struct bpf_reg_state *regs = cur_regs(env);
7986 struct bpf_reg_state *dst_reg;
7987 struct bpf_map *map;
7988 int err;
7989
7990 if (BPF_SIZE(insn->code) != BPF_DW) {
7991 verbose(env, "invalid BPF_LD_IMM insn\n");
7992 return -EINVAL;
7993 }
7994 if (insn->off != 0) {
7995 verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
7996 return -EINVAL;
7997 }
7998
7999 err = check_reg_arg(env, insn->dst_reg, DST_OP);
8000 if (err) {
8001 return err;
8002 }
8003
8004 dst_reg = ®s[insn->dst_reg];
8005 if (insn->src_reg == 0) {
8006 u64 imm = ((u64)(insn + 1)->imm << VERIFIER_THIRTYTWO) | (u32)insn->imm;
8007
8008 dst_reg->type = SCALAR_VALUE;
8009 verifier_mark_reg_known(®s[insn->dst_reg], imm);
8010 return 0;
8011 }
8012
8013 /* All special src_reg cases are listed below. From this point onwards
8014 * we either succeed and assign a corresponding dst_reg->type after
8015 * zeroing the offset, or fail and reject the program.
8016 */
8017 mark_reg_known_zero(env, regs, insn->dst_reg);
8018
8019 if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8020 dst_reg->type = aux->btf_var.reg_type;
8021 switch (base_type(dst_reg->type)) {
8022 case PTR_TO_MEM:
8023 dst_reg->mem_size = aux->btf_var.mem_size;
8024 break;
8025 case PTR_TO_BTF_ID:
8026 case PTR_TO_PERCPU_BTF_ID:
8027 dst_reg->btf_id = aux->btf_var.btf_id;
8028 break;
8029 default:
8030 verbose(env, "bpf verifier is misconfigured\n");
8031 return -EFAULT;
8032 }
8033 return 0;
8034 }
8035
8036 map = env->used_maps[aux->map_index];
8037 dst_reg->map_ptr = map;
8038
8039 if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8040 dst_reg->type = PTR_TO_MAP_VALUE;
8041 dst_reg->off = aux->map_off;
8042 if (map_value_has_spin_lock(map)) {
8043 dst_reg->id = ++env->id_gen;
8044 }
8045 } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8046 dst_reg->type = CONST_PTR_TO_MAP;
8047 } else {
8048 verbose(env, "bpf verifier is misconfigured\n");
8049 return -EINVAL;
8050 }
8051
8052 return 0;
8053 }
8054
may_access_skb(enum bpf_prog_type type)8055 static bool may_access_skb(enum bpf_prog_type type)
8056 {
8057 switch (type) {
8058 case BPF_PROG_TYPE_SOCKET_FILTER:
8059 case BPF_PROG_TYPE_SCHED_CLS:
8060 case BPF_PROG_TYPE_SCHED_ACT:
8061 return true;
8062 default:
8063 return false;
8064 }
8065 }
8066
8067 /* verify safety of LD_ABS|LD_IND instructions:
8068 * - they can only appear in the programs where ctx == skb
8069 * - since they are wrappers of function calls, they scratch R1-R5 registers,
8070 * preserve R6-R9, and store return value into R0
8071 *
8072 * Implicit input:
8073 * ctx == skb == R6 == CTX
8074 *
8075 * Explicit input:
8076 * SRC == any register
8077 * IMM == 32-bit immediate
8078 *
8079 * Output:
8080 * R0 - 8/16/32-bit skb data converted to cpu endianness
8081 */
check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)8082 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
8083 {
8084 struct bpf_reg_state *regs = cur_regs(env);
8085 static const int ctx_reg = BPF_REG_6;
8086 u8 mode = BPF_MODE(insn->code);
8087 int i, err;
8088
8089 if (!may_access_skb(resolve_prog_type(env->prog))) {
8090 verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
8091 return -EINVAL;
8092 }
8093
8094 if (!env->ops->gen_ld_abs) {
8095 verbose(env, "bpf verifier is misconfigured\n");
8096 return -EINVAL;
8097 }
8098
8099 if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW ||
8100 (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
8101 verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
8102 return -EINVAL;
8103 }
8104
8105 /* check whether implicit source operand (register R6) is readable */
8106 err = check_reg_arg(env, ctx_reg, SRC_OP);
8107 if (err) {
8108 return err;
8109 }
8110
8111 /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8112 * gen_ld_abs() may terminate the program at runtime, leading to
8113 * reference leak.
8114 */
8115 err = check_reference_leak(env);
8116 if (err) {
8117 verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8118 return err;
8119 }
8120
8121 if (env->cur_state->active_spin_lock) {
8122 verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8123 return -EINVAL;
8124 }
8125
8126 if (regs[ctx_reg].type != PTR_TO_CTX) {
8127 verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
8128 return -EINVAL;
8129 }
8130
8131 if (mode == BPF_IND) {
8132 /* check explicit source operand */
8133 err = check_reg_arg(env, insn->src_reg, SRC_OP);
8134 if (err) {
8135 return err;
8136 }
8137 }
8138
8139 err = check_ptr_off_reg(env, ®s[ctx_reg], ctx_reg);
8140 if (err < 0) {
8141 return err;
8142 }
8143
8144 /* reset caller saved regs to unreadable */
8145 for (i = 0; i < CALLER_SAVED_REGS; i++) {
8146 mark_reg_not_init(env, regs, caller_saved[i]);
8147 check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8148 }
8149
8150 /* mark destination R0 register as readable, since it contains
8151 * the value fetched from the packet.
8152 * Already marked as written above.
8153 */
8154 mark_reg_unknown(env, regs, BPF_REG_0);
8155 /* ld_abs load up to 32-bit skb data. */
8156 regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
8157 return 0;
8158 }
8159
check_return_code(struct bpf_verifier_env *env)8160 static int check_return_code(struct bpf_verifier_env *env)
8161 {
8162 struct tnum enforce_attach_type_range = tnum_unknown;
8163 const struct bpf_prog *prog = env->prog;
8164 struct bpf_reg_state *reg;
8165 struct tnum range = tnum_range(0, 1);
8166 enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8167 int err;
8168 const bool is_subprog = env->cur_state->frame[0]->subprogno;
8169
8170 /* LSM and struct_ops func-ptr's return type could be "void" */
8171 if (!is_subprog && (prog_type == BPF_PROG_TYPE_STRUCT_OPS || prog_type == BPF_PROG_TYPE_LSM) &&
8172 !prog->aux->attach_func_proto->type) {
8173 return 0;
8174 }
8175
8176 /* eBPF calling convetion is such that R0 is used
8177 * to return the value from eBPF program.
8178 * Make sure that it's readable at this time
8179 * of bpf_exit, which means that program wrote
8180 * something into it earlier
8181 */
8182 err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8183 if (err) {
8184 return err;
8185 }
8186
8187 if (is_pointer_value(env, BPF_REG_0)) {
8188 verbose(env, "R0 leaks addr as return value\n");
8189 return -EACCES;
8190 }
8191
8192 reg = cur_regs(env) + BPF_REG_0;
8193 if (is_subprog) {
8194 if (reg->type != SCALAR_VALUE) {
8195 verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8196 reg_type_str(env, reg->type));
8197 return -EINVAL;
8198 }
8199 return 0;
8200 }
8201
8202 switch (prog_type) {
8203 case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
8204 if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
8205 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8206 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8207 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8208 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8209 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) {
8210 range = tnum_range(1, 1);
8211 }
8212 break;
8213 case BPF_PROG_TYPE_CGROUP_SKB:
8214 if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8215 range = tnum_range(0, 3);
8216 enforce_attach_type_range = tnum_range(2, 3);
8217 }
8218 break;
8219 case BPF_PROG_TYPE_CGROUP_SOCK:
8220 case BPF_PROG_TYPE_SOCK_OPS:
8221 case BPF_PROG_TYPE_CGROUP_DEVICE:
8222 case BPF_PROG_TYPE_CGROUP_SYSCTL:
8223 case BPF_PROG_TYPE_CGROUP_SOCKOPT:
8224 break;
8225 case BPF_PROG_TYPE_RAW_TRACEPOINT:
8226 if (!env->prog->aux->attach_btf_id) {
8227 return 0;
8228 }
8229 range = tnum_const(0);
8230 break;
8231 case BPF_PROG_TYPE_TRACING:
8232 switch (env->prog->expected_attach_type) {
8233 case BPF_TRACE_FENTRY:
8234 case BPF_TRACE_FEXIT:
8235 range = tnum_const(0);
8236 break;
8237 case BPF_TRACE_RAW_TP:
8238 case BPF_MODIFY_RETURN:
8239 return 0;
8240 case BPF_TRACE_ITER:
8241 break;
8242 default:
8243 return -ENOTSUPP;
8244 }
8245 break;
8246 case BPF_PROG_TYPE_SK_LOOKUP:
8247 range = tnum_range(SK_DROP, SK_PASS);
8248 break;
8249 case BPF_PROG_TYPE_EXT:
8250 /* freplace program can return anything as its return value
8251 * depends on the to-be-replaced kernel func or bpf program.
8252 */
8253 default:
8254 return 0;
8255 }
8256
8257 if (reg->type != SCALAR_VALUE) {
8258 verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str(env, reg->type));
8259 return -EINVAL;
8260 }
8261
8262 if (!tnum_in(range, reg->var_off)) {
8263 char tn_buf[48];
8264
8265 verbose(env, "At program exit the register R0 ");
8266 if (!tnum_is_unknown(reg->var_off)) {
8267 tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8268 verbose(env, "has value %s", tn_buf);
8269 } else {
8270 verbose(env, "has unknown scalar value");
8271 }
8272 tnum_strn(tn_buf, sizeof(tn_buf), range);
8273 verbose(env, " should have been in %s\n", tn_buf);
8274 return -EINVAL;
8275 }
8276
8277 if (!tnum_is_unknown(enforce_attach_type_range) && tnum_in(enforce_attach_type_range, reg->var_off)) {
8278 env->prog->enforce_expected_attach_type = 1;
8279 }
8280 return 0;
8281 }
8282
8283 /* non-recursive DFS pseudo code
8284 * 1 procedure DFS-iterative(G,v):
8285 * 2 label v as discovered
8286 * 3 let S be a stack
8287 * 4 S.push(v)
8288 * 5 while S is not empty
8289 * 6 t <- S.pop()
8290 * 7 if t is what we're looking for:
8291 * 8 return t
8292 * 9 for all edges e in G.adjacentEdges(t) do
8293 * 10 if edge e is already labelled
8294 * 11 continue with the next edge
8295 * 12 w <- G.adjacentVertex(t,e)
8296 * 13 if vertex w is not discovered and not explored
8297 * 14 label e as tree-edge
8298 * 15 label w as discovered
8299 * 16 S.push(w)
8300 * 17 continue at 5
8301 * 18 else if vertex w is discovered
8302 * 19 label e as back-edge
8303 * 20 else
8304 * 21 // vertex w is explored
8305 * 22 label e as forward- or cross-edge
8306 * 23 label t as explored
8307 * 24 S.pop()
8308 *
8309 * convention:
8310 * 0x10 - discovered
8311 * 0x11 - discovered and fall-through edge labelled
8312 * 0x12 - discovered and fall-through and branch edges labelled
8313 * 0x20 - explored
8314 */
8315
8316 enum {
8317 DISCOVERED = 0x10,
8318 EXPLORED = 0x20,
8319 FALLTHROUGH = 1,
8320 BRANCH = 2,
8321 };
8322
state_htab_size(struct bpf_verifier_env *env)8323 static u32 state_htab_size(struct bpf_verifier_env *env)
8324 {
8325 return env->prog->len;
8326 }
8327
explored_state(struct bpf_verifier_env *env, int idx)8328 static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
8329 {
8330 struct bpf_verifier_state *cur = env->cur_state;
8331 struct bpf_func_state *state = cur->frame[cur->curframe];
8332
8333 return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8334 }
8335
init_explored_state(struct bpf_verifier_env *env, int idx)8336 static void init_explored_state(struct bpf_verifier_env *env, int idx)
8337 {
8338 env->insn_aux_data[idx].prune_point = true;
8339 }
8340
8341 /* t, w, e - match pseudo-code above:
8342 * t - index of current instruction
8343 * w - next instruction
8344 * e - edge
8345 */
push_insn(int t, int w, int e, struct bpf_verifier_env *env, bool loop_ok)8346 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, bool loop_ok)
8347 {
8348 int *insn_stack = env->cfg.insn_stack;
8349 int *insn_state = env->cfg.insn_state;
8350
8351 if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) {
8352 return 0;
8353 }
8354
8355 if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) {
8356 return 0;
8357 }
8358
8359 if (w < 0 || w >= env->prog->len) {
8360 verbose_linfo(env, t, "%d: ", t);
8361 verbose(env, "jump out of range from insn %d to %d\n", t, w);
8362 return -EINVAL;
8363 }
8364
8365 if (e == BRANCH) {
8366 /* mark branch target for state pruning */
8367 init_explored_state(env, w);
8368 }
8369
8370 if (insn_state[w] == 0) {
8371 /* tree-edge */
8372 insn_state[t] = DISCOVERED | e;
8373 insn_state[w] = DISCOVERED;
8374 if (env->cfg.cur_stack >= env->prog->len) {
8375 return -E2BIG;
8376 }
8377 insn_stack[env->cfg.cur_stack++] = w;
8378 return 1;
8379 } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8380 if (loop_ok && env->bpf_capable) {
8381 return 0;
8382 }
8383 verbose_linfo(env, t, "%d: ", t);
8384 verbose_linfo(env, w, "%d: ", w);
8385 verbose(env, "back-edge from insn %d to %d\n", t, w);
8386 return -EINVAL;
8387 } else if (insn_state[w] == EXPLORED) {
8388 /* forward- or cross-edge */
8389 insn_state[t] = DISCOVERED | e;
8390 } else {
8391 verbose(env, "insn state internal bug\n");
8392 return -EFAULT;
8393 }
8394 return 0;
8395 }
8396
8397 /* non-recursive depth-first-search to detect loops in BPF program
8398 * loop == back-edge in directed graph
8399 */
check_cfg(struct bpf_verifier_env *env)8400 static int check_cfg(struct bpf_verifier_env *env)
8401 {
8402 struct bpf_insn *insns = env->prog->insnsi;
8403 int insn_cnt = env->prog->len;
8404 int *insn_stack, *insn_state;
8405 int ret = 0;
8406 int i, t;
8407
8408 insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8409 if (!insn_state) {
8410 return -ENOMEM;
8411 }
8412
8413 insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8414 if (!insn_stack) {
8415 kvfree(insn_state);
8416 return -ENOMEM;
8417 }
8418
8419 insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
8420 insn_stack[0] = 0; /* 0 is the first instruction */
8421 env->cfg.cur_stack = 1;
8422
8423 while (1) {
8424 if (env->cfg.cur_stack == 0) {
8425 goto check_state;
8426 }
8427 t = insn_stack[env->cfg.cur_stack - 1];
8428
8429 if (BPF_CLASS(insns[t].code) == BPF_JMP || BPF_CLASS(insns[t].code) == BPF_JMP32) {
8430 u8 opcode = BPF_OP(insns[t].code);
8431 if (opcode == BPF_EXIT) {
8432 goto mark_explored;
8433 } else if (opcode == BPF_CALL) {
8434 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8435 if (ret == 1) {
8436 continue;
8437 } else if (ret < 0) {
8438 goto err_free;
8439 }
8440 if (t + 1 < insn_cnt) {
8441 init_explored_state(env, t + 1);
8442 }
8443 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8444 init_explored_state(env, t);
8445 ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, false);
8446 if (ret == 1) {
8447 continue;
8448 } else if (ret < 0) {
8449 goto err_free;
8450 }
8451 }
8452 } else if (opcode == BPF_JA) {
8453 if (BPF_SRC(insns[t].code) != BPF_K) {
8454 ret = -EINVAL;
8455 goto err_free;
8456 }
8457 /* unconditional jump with single edge */
8458 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env, true);
8459 if (ret == 1) {
8460 continue;
8461 } else if (ret < 0) {
8462 goto err_free;
8463 }
8464 /* unconditional jmp is not a good pruning point,
8465 * but it's marked, since backtracking needs
8466 * to record jmp history in is_state_visited().
8467 */
8468 init_explored_state(env, t + insns[t].off + 1);
8469 /* tell verifier to check for equivalent states
8470 * after every call and jump
8471 */
8472 if (t + 1 < insn_cnt) {
8473 init_explored_state(env, t + 1);
8474 }
8475 } else {
8476 /* conditional jump with two edges */
8477 init_explored_state(env, t);
8478 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8479 if (ret == 1) {
8480 continue;
8481 } else if (ret < 0) {
8482 goto err_free;
8483 }
8484
8485 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8486 if (ret == 1) {
8487 continue;
8488 } else if (ret < 0) {
8489 goto err_free;
8490 }
8491 }
8492 } else {
8493 /* all other non-branch instructions with single
8494 * fall-through edge
8495 */
8496 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8497 if (ret == 1) {
8498 continue;
8499 } else if (ret < 0) {
8500 goto err_free;
8501 }
8502 }
8503
8504 mark_explored:
8505 insn_state[t] = EXPLORED;
8506 if (env->cfg.cur_stack-- <= 0) {
8507 verbose(env, "pop stack internal bug\n");
8508 ret = -EFAULT;
8509 goto err_free;
8510 }
8511 continue;
8512 }
8513
8514 check_state:
8515 for (i = 0; i < insn_cnt; i++) {
8516 if (insn_state[i] != EXPLORED) {
8517 verbose(env, "unreachable insn %d\n", i);
8518 ret = -EINVAL;
8519 goto err_free;
8520 }
8521 }
8522 ret = 0; /* cfg looks good */
8523
8524 err_free:
8525 kvfree(insn_state);
8526 kvfree(insn_stack);
8527 env->cfg.insn_state = env->cfg.insn_stack = NULL;
8528 return ret;
8529 }
8530
check_abnormal_return(struct bpf_verifier_env *env)8531 static int check_abnormal_return(struct bpf_verifier_env *env)
8532 {
8533 int i;
8534
8535 for (i = 1; i < env->subprog_cnt; i++) {
8536 if (env->subprog_info[i].has_ld_abs) {
8537 verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8538 return -EINVAL;
8539 }
8540 if (env->subprog_info[i].has_tail_call) {
8541 verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8542 return -EINVAL;
8543 }
8544 }
8545 return 0;
8546 }
8547
8548 /* The minimum supported BTF func info size */
8549 #define MIN_BPF_FUNCINFO_SIZE 8
8550 #define MAX_FUNCINFO_REC_SIZE 252
8551
check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)8552 static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8553 {
8554 const struct btf_type *type, *func_proto, *ret_type;
8555 u32 i, nfuncs, urec_size, min_size;
8556 u32 krec_size = sizeof(struct bpf_func_info);
8557 struct bpf_func_info *krecord;
8558 struct bpf_func_info_aux *info_aux = NULL;
8559 struct bpf_prog *prog;
8560 const struct btf *btf;
8561 void __user *urecord;
8562 u32 prev_offset = 0;
8563 bool scalar_return;
8564 int ret = -ENOMEM;
8565
8566 nfuncs = attr->func_info_cnt;
8567 if (!nfuncs) {
8568 if (check_abnormal_return(env)) {
8569 return -EINVAL;
8570 }
8571 return 0;
8572 }
8573
8574 if (nfuncs != env->subprog_cnt) {
8575 verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8576 return -EINVAL;
8577 }
8578
8579 urec_size = attr->func_info_rec_size;
8580 if (urec_size < MIN_BPF_FUNCINFO_SIZE || urec_size > MAX_FUNCINFO_REC_SIZE || urec_size % sizeof(u32)) {
8581 verbose(env, "invalid func info rec size %u\n", urec_size);
8582 return -EINVAL;
8583 }
8584
8585 prog = env->prog;
8586 btf = prog->aux->btf;
8587
8588 urecord = u64_to_user_ptr(attr->func_info);
8589 min_size = min_t(u32, krec_size, urec_size);
8590
8591 krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8592 if (!krecord) {
8593 return -ENOMEM;
8594 }
8595 info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8596 if (!info_aux) {
8597 goto err_free;
8598 }
8599
8600 for (i = 0; i < nfuncs; i++) {
8601 ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8602 if (ret) {
8603 if (ret == -E2BIG) {
8604 verbose(env, "nonzero tailing record in func info");
8605 /* set the size kernel expects so loader can zero
8606 * out the rest of the record.
8607 */
8608 if (put_user(min_size, &uattr->func_info_rec_size)) {
8609 ret = -EFAULT;
8610 }
8611 }
8612 goto err_free;
8613 }
8614
8615 if (copy_from_user(&krecord[i], urecord, min_size)) {
8616 ret = -EFAULT;
8617 goto err_free;
8618 }
8619
8620 /* check insn_off */
8621 ret = -EINVAL;
8622 if (i == 0) {
8623 if (krecord[i].insn_off) {
8624 verbose(env, "nonzero insn_off %u for the first func info record", krecord[i].insn_off);
8625 goto err_free;
8626 }
8627 } else if (krecord[i].insn_off <= prev_offset) {
8628 verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", krecord[i].insn_off,
8629 prev_offset);
8630 goto err_free;
8631 }
8632
8633 if (env->subprog_info[i].start != krecord[i].insn_off) {
8634 verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8635 goto err_free;
8636 }
8637
8638 /* check type_id */
8639 type = btf_type_by_id(btf, krecord[i].type_id);
8640 if (!type || !btf_type_is_func(type)) {
8641 verbose(env, "invalid type id %d in func info", krecord[i].type_id);
8642 goto err_free;
8643 }
8644 info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8645
8646 func_proto = btf_type_by_id(btf, type->type);
8647 if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) {
8648 /* btf_func_check() already verified it during BTF load */
8649 goto err_free;
8650 }
8651 ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8652 scalar_return = btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8653 if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8654 verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8655 goto err_free;
8656 }
8657 if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8658 verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8659 goto err_free;
8660 }
8661
8662 prev_offset = krecord[i].insn_off;
8663 urecord += urec_size;
8664 }
8665
8666 prog->aux->func_info = krecord;
8667 prog->aux->func_info_cnt = nfuncs;
8668 prog->aux->func_info_aux = info_aux;
8669 return 0;
8670
8671 err_free:
8672 kvfree(krecord);
8673 kfree(info_aux);
8674 return ret;
8675 }
8676
adjust_btf_func(struct bpf_verifier_env *env)8677 static void adjust_btf_func(struct bpf_verifier_env *env)
8678 {
8679 struct bpf_prog_aux *aux = env->prog->aux;
8680 int i;
8681
8682 if (!aux->func_info) {
8683 return;
8684 }
8685
8686 for (i = 0; i < env->subprog_cnt; i++) {
8687 aux->func_info[i].insn_off = env->subprog_info[i].start;
8688 }
8689 }
8690
8691 #define MIN_BPF_LINEINFO_SIZE \
8692 (offsetof(struct bpf_line_info, line_col) + sizeof(((struct bpf_line_info *)(0))->line_col))
8693 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
8694
check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)8695 static int check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8696 {
8697 u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8698 struct bpf_subprog_info *sub;
8699 struct bpf_line_info *linfo;
8700 struct bpf_prog *prog;
8701 const struct btf *btf;
8702 void __user *ulinfo;
8703 int err;
8704
8705 nr_linfo = attr->line_info_cnt;
8706 if (!nr_linfo) {
8707 return 0;
8708 }
8709 if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) {
8710 return -EINVAL;
8711 }
8712
8713 rec_size = attr->line_info_rec_size;
8714 if (rec_size < MIN_BPF_LINEINFO_SIZE || rec_size > MAX_LINEINFO_REC_SIZE || rec_size & (sizeof(u32) - 1)) {
8715 return -EINVAL;
8716 }
8717
8718 /* Need to zero it in case the userspace may
8719 * pass in a smaller bpf_line_info object.
8720 */
8721 linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), GFP_KERNEL | __GFP_NOWARN);
8722 if (!linfo) {
8723 return -ENOMEM;
8724 }
8725
8726 prog = env->prog;
8727 btf = prog->aux->btf;
8728
8729 s = 0;
8730 sub = env->subprog_info;
8731 ulinfo = u64_to_user_ptr(attr->line_info);
8732 expected_size = sizeof(struct bpf_line_info);
8733 ncopy = min_t(u32, expected_size, rec_size);
8734 for (i = 0; i < nr_linfo; i++) {
8735 err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8736 if (err) {
8737 if (err == -E2BIG) {
8738 verbose(env, "nonzero tailing record in line_info");
8739 if (put_user(expected_size, &uattr->line_info_rec_size)) {
8740 err = -EFAULT;
8741 }
8742 }
8743 goto err_free;
8744 }
8745
8746 if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8747 err = -EFAULT;
8748 goto err_free;
8749 }
8750
8751 /*
8752 * Check insn_off to ensure
8753 * 1) strictly increasing AND
8754 * 2) bounded by prog->len
8755 *
8756 * The linfo[0].insn_off == 0 check logically falls into
8757 * the later "missing bpf_line_info for func..." case
8758 * because the first linfo[0].insn_off must be the
8759 * first sub also and the first sub must have
8760 * subprog_info[0].start == 0.
8761 */
8762 if ((i && linfo[i].insn_off <= prev_offset) || linfo[i].insn_off >= prog->len) {
8763 verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", i, linfo[i].insn_off,
8764 prev_offset, prog->len);
8765 err = -EINVAL;
8766 goto err_free;
8767 }
8768
8769 if (!prog->insnsi[linfo[i].insn_off].code) {
8770 verbose(env, "Invalid insn code at line_info[%u].insn_off\n", i);
8771 err = -EINVAL;
8772 goto err_free;
8773 }
8774
8775 if (!btf_name_by_offset(btf, linfo[i].line_off) || !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8776 verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8777 err = -EINVAL;
8778 goto err_free;
8779 }
8780
8781 if (s != env->subprog_cnt) {
8782 if (linfo[i].insn_off == sub[s].start) {
8783 sub[s].linfo_idx = i;
8784 s++;
8785 } else if (sub[s].start < linfo[i].insn_off) {
8786 verbose(env, "missing bpf_line_info for func#%u\n", s);
8787 err = -EINVAL;
8788 goto err_free;
8789 }
8790 }
8791
8792 prev_offset = linfo[i].insn_off;
8793 ulinfo += rec_size;
8794 }
8795
8796 if (s != env->subprog_cnt) {
8797 verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", env->subprog_cnt - s, s);
8798 err = -EINVAL;
8799 goto err_free;
8800 }
8801
8802 prog->aux->linfo = linfo;
8803 prog->aux->nr_linfo = nr_linfo;
8804
8805 return 0;
8806
8807 err_free:
8808 kvfree(linfo);
8809 return err;
8810 }
8811
check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)8812 static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8813 {
8814 struct btf *btf;
8815 int err;
8816
8817 if (!attr->func_info_cnt && !attr->line_info_cnt) {
8818 if (check_abnormal_return(env)) {
8819 return -EINVAL;
8820 }
8821 return 0;
8822 }
8823
8824 btf = btf_get_by_fd(attr->prog_btf_fd);
8825 if (IS_ERR(btf)) {
8826 return PTR_ERR(btf);
8827 }
8828 env->prog->aux->btf = btf;
8829
8830 err = check_btf_func(env, attr, uattr);
8831 if (err) {
8832 return err;
8833 }
8834
8835 err = check_btf_line(env, attr, uattr);
8836 if (err) {
8837 return err;
8838 }
8839
8840 return 0;
8841 }
8842
8843 /* check %cur's range satisfies %old's */
range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur)8844 static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur)
8845 {
8846 return old->umin_value <= cur->umin_value && old->umax_value >= cur->umax_value &&
8847 old->smin_value <= cur->smin_value && old->smax_value >= cur->smax_value &&
8848 old->u32_min_value <= cur->u32_min_value && old->u32_max_value >= cur->u32_max_value &&
8849 old->s32_min_value <= cur->s32_min_value && old->s32_max_value >= cur->s32_max_value;
8850 }
8851
8852 /* If in the old state two registers had the same id, then they need to have
8853 * the same id in the new state as well. But that id could be different from
8854 * the old state, so we need to track the mapping from old to new ids.
8855 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
8856 * regs with old id 5 must also have new id 9 for the new state to be safe. But
8857 * regs with a different old id could still have new id 9, we don't care about
8858 * that.
8859 * So we look through our idmap to see if this old id has been seen before. If
8860 * so, we require the new id to match; otherwise, we add the id pair to the map.
8861 */
check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)8862 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
8863 {
8864 unsigned int i;
8865
8866 for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
8867 if (!idmap[i].old) {
8868 /* Reached an empty slot; haven't seen this id before */
8869 idmap[i].old = old_id;
8870 idmap[i].cur = cur_id;
8871 return true;
8872 }
8873 if (idmap[i].old == old_id) {
8874 return idmap[i].cur == cur_id;
8875 }
8876 }
8877 /* We ran out of idmap slots, which should be impossible */
8878 WARN_ON_ONCE(1);
8879 return false;
8880 }
8881
clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st)8882 static void clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st)
8883 {
8884 enum bpf_reg_liveness live;
8885 int i, j;
8886
8887 for (i = 0; i < BPF_REG_FP; i++) {
8888 live = st->regs[i].live;
8889 /* liveness must not touch this register anymore */
8890 st->regs[i].live |= REG_LIVE_DONE;
8891 if (!(live & REG_LIVE_READ)) {
8892 /* since the register is unused, clear its state
8893 * to make further comparison simpler
8894 */
8895 verifier_mark_reg_not_init(env, &st->regs[i]);
8896 }
8897 }
8898
8899 for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
8900 live = st->stack[i].spilled_ptr.live;
8901 /* liveness must not touch this stack slot anymore */
8902 st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
8903 if (!(live & REG_LIVE_READ)) {
8904 verifier_mark_reg_not_init(env, &st->stack[i].spilled_ptr);
8905 for (j = 0; j < BPF_REG_SIZE; j++) {
8906 st->stack[i].slot_type[j] = STACK_INVALID;
8907 }
8908 }
8909 }
8910 }
8911
clean_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *st)8912 static void clean_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
8913 {
8914 int i;
8915
8916 if (st->frame[0]->regs[0].live & REG_LIVE_DONE) {
8917 /* all regs in this state in all frames were already marked */
8918 return;
8919 }
8920
8921 for (i = 0; i <= st->curframe; i++) {
8922 clean_func_state(env, st->frame[i]);
8923 }
8924 }
8925
8926 /* the parentage chains form a tree.
8927 * the verifier states are added to state lists at given insn and
8928 * pushed into state stack for future exploration.
8929 * when the verifier reaches bpf_exit insn some of the verifer states
8930 * stored in the state lists have their final liveness state already,
8931 * but a lot of states will get revised from liveness point of view when
8932 * the verifier explores other branches.
8933 * 1: r0 = 1
8934 * 2: if r1 == 100 goto pc+1
8935 * 3: r0 = 2
8936 * 4: exit
8937 * when the verifier reaches exit insn the register r0 in the state list of
8938 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
8939 * of insn 2 and goes exploring further. At the insn 4 it will walk the
8940 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
8941 *
8942 * Since the verifier pushes the branch states as it sees them while exploring
8943 * the program the condition of walking the branch instruction for the second
8944 * time means that all states below this branch were already explored and
8945 * their final liveness markes are already propagated.
8946 * Hence when the verifier completes the search of state list in is_state_visited()
8947 * we can call this clean_live_states() function to mark all liveness states
8948 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
8949 * will not be used.
8950 * This function also clears the registers and stack for states that !READ
8951 * to simplify state merging.
8952 *
8953 * Important note here that walking the same branch instruction in the callee
8954 * doesn't meant that the states are DONE. The verifier has to compare
8955 * the callsites
8956 */
clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur)8957 static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur)
8958 {
8959 struct bpf_verifier_state_list *sl;
8960 int i;
8961
8962 sl = *explored_state(env, insn);
8963 while (sl) {
8964 if (sl->state.branches) {
8965 goto next;
8966 }
8967 if (sl->state.insn_idx != insn || sl->state.curframe != cur->curframe) {
8968 goto next;
8969 }
8970 for (i = 0; i <= cur->curframe; i++) {
8971 if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) {
8972 goto next;
8973 }
8974 }
8975 clean_verifier_state(env, &sl->state);
8976 next:
8977 sl = sl->next;
8978 }
8979 }
8980
8981 /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)8982 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8983 struct bpf_id_pair *idmap)
8984 {
8985 bool equal;
8986
8987 if (!(rold->live & REG_LIVE_READ)) {
8988 /* explored state didn't use this */
8989 return true;
8990 }
8991
8992 equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
8993
8994 if (rold->type == PTR_TO_STACK) {
8995 /* two stack pointers are equal only if they're pointing to
8996 * the same stack frame, since fp-8 in foo != fp-8 in bar
8997 */
8998 return equal && rold->frameno == rcur->frameno;
8999 }
9000
9001 if (equal) {
9002 return true;
9003 }
9004
9005 if (rold->type == NOT_INIT) {
9006 /* explored state can't have used this */
9007 return true;
9008 }
9009 if (rcur->type == NOT_INIT) {
9010 return false;
9011 }
9012 switch (base_type(rold->type)) {
9013 case SCALAR_VALUE:
9014 if (env->explore_alu_limits) {
9015 return false;
9016 }
9017 if (rcur->type == SCALAR_VALUE) {
9018 if (!rold->precise && !rcur->precise) {
9019 return true;
9020 }
9021 /* new val must satisfy old val knowledge */
9022 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
9023 } else {
9024 /* We're trying to use a pointer in place of a scalar.
9025 * Even if the scalar was unbounded, this could lead to
9026 * pointer leaks because scalars are allowed to leak
9027 * while pointers are not. We could make this safe in
9028 * special cases if root is calling us, but it's
9029 * probably not worth the hassle.
9030 */
9031 return false;
9032 }
9033 case PTR_TO_MAP_VALUE:
9034 /* a PTR_TO_MAP_VALUE could be safe to use as a
9035 * PTR_TO_MAP_VALUE_OR_NULL into the same map.
9036 * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
9037 * checked, doing so could have affected others with the same
9038 * id, and we can't check for that because we lost the id when
9039 * we converted to a PTR_TO_MAP_VALUE.
9040 */
9041 if (type_may_be_null(rold->type)) {
9042 if (!type_may_be_null(rcur->type)) {
9043 return false;
9044 }
9045 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) {
9046 return false;
9047 }
9048 /* Check our ids match any regs they're supposed to */
9049 return check_ids(rold->id, rcur->id, idmap);
9050 }
9051
9052 /* If the new min/max/var_off satisfy the old ones and
9053 * everything else matches, we are OK.
9054 * 'id' is not compared, since it's only used for maps with
9055 * bpf_spin_lock inside map element and in such cases if
9056 * the rest of the prog is valid for one map element then
9057 * it's valid for all map elements regardless of the key
9058 * used in bpf_map_lookup()
9059 */
9060 return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) &&
9061 tnum_in(rold->var_off, rcur->var_off);
9062 case PTR_TO_PACKET_META:
9063 case PTR_TO_PACKET:
9064 if (rcur->type != rold->type) {
9065 return false;
9066 }
9067 /* We must have at least as much range as the old ptr
9068 * did, so that any accesses which were safe before are
9069 * still safe. This is true even if old range < old off,
9070 * since someone could have accessed through (ptr - k), or
9071 * even done ptr -= k in a register, to get a safe access.
9072 */
9073 if (rold->range > rcur->range) {
9074 return false;
9075 }
9076 /* If the offsets don't match, we can't trust our alignment;
9077 * nor can we be sure that we won't fall out of range.
9078 */
9079 if (rold->off != rcur->off) {
9080 return false;
9081 }
9082 /* id relations must be preserved */
9083 if (rold->id && !check_ids(rold->id, rcur->id, idmap)) {
9084 return false;
9085 }
9086 /* new val must satisfy old val knowledge */
9087 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
9088 case PTR_TO_CTX:
9089 case CONST_PTR_TO_MAP:
9090 case PTR_TO_PACKET_END:
9091 case PTR_TO_FLOW_KEYS:
9092 case PTR_TO_SOCKET:
9093 case PTR_TO_SOCK_COMMON:
9094 case PTR_TO_TCP_SOCK:
9095 case PTR_TO_XDP_SOCK:
9096 /* Only valid matches are exact, which memcmp() above
9097 * would have accepted
9098 */
9099 default:
9100 /* Don't know what's going on, just say it's not safe */
9101 return false;
9102 }
9103
9104 /* Shouldn't get here; if we do, say it's not safe */
9105 WARN_ON_ONCE(1);
9106 return false;
9107 }
9108
stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur, struct bpf_id_pair *idmap)9109 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur,
9110 struct bpf_id_pair *idmap)
9111 {
9112 int i, spi;
9113
9114 /* walk slots of the explored stack and ignore any additional
9115 * slots in the current stack, since explored(safe) state
9116 * didn't use them
9117 */
9118 for (i = 0; i < old->allocated_stack; i++) {
9119 spi = i / BPF_REG_SIZE;
9120
9121 if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9122 i += BPF_REG_SIZE - 1;
9123 /* explored state didn't use this */
9124 continue;
9125 }
9126
9127 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) {
9128 continue;
9129 }
9130
9131 /* explored stack has more populated slots than current stack
9132 * and these slots were used
9133 */
9134 if (i >= cur->allocated_stack) {
9135 return false;
9136 }
9137
9138 /* if old state was safe with misc data in the stack
9139 * it will be safe with zero-initialized stack.
9140 * The opposite is not true
9141 */
9142 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
9143 cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) {
9144 continue;
9145 }
9146 if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) {
9147 /* Ex: old explored (safe) state has STACK_SPILL in
9148 * this stack slot, but current has STACK_MISC ->
9149 * this verifier states are not equivalent,
9150 * return false to continue verification of this path
9151 */
9152 return false;
9153 }
9154 if (i % BPF_REG_SIZE) {
9155 continue;
9156 }
9157 if (old->stack[spi].slot_type[0] != STACK_SPILL) {
9158 continue;
9159 }
9160 if (!regsafe(env, &old->stack[spi].spilled_ptr, &cur->stack[spi].spilled_ptr, idmap)) {
9161 /* when explored and current stack slot are both storing
9162 * spilled registers, check that stored pointers types
9163 * are the same as well.
9164 * Ex: explored safe path could have stored
9165 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
9166 * but current path has stored:
9167 * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
9168 * such verifier states are not equivalent.
9169 * return false to continue verification of this path
9170 */
9171 return false;
9172 }
9173 }
9174 return true;
9175 }
9176
refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)9177 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9178 {
9179 if (old->acquired_refs != cur->acquired_refs) {
9180 return false;
9181 }
9182 return !memcmp(old->refs, cur->refs, sizeof(*old->refs) * old->acquired_refs);
9183 }
9184
9185 /* compare two verifier states
9186 *
9187 * all states stored in state_list are known to be valid, since
9188 * verifier reached 'bpf_exit' instruction through them
9189 *
9190 * this function is called when verifier exploring different branches of
9191 * execution popped from the state stack. If it sees an old state that has
9192 * more strict register state and more strict stack state then this execution
9193 * branch doesn't need to be explored further, since verifier already
9194 * concluded that more strict state leads to valid finish.
9195 *
9196 * Therefore two states are equivalent if register state is more conservative
9197 * and explored stack state is more conservative than the current one.
9198 * Example:
9199 * explored current
9200 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
9201 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
9202 *
9203 * In other words if current stack state (one being explored) has more
9204 * valid slots than old one that already passed validation, it means
9205 * the verifier can stop exploring and conclude that current state is valid too
9206 *
9207 * Similarly with registers. If explored state has register type as invalid
9208 * whereas register type in current state is meaningful, it means that
9209 * the current state will reach 'bpf_exit' instruction safely
9210 */
func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur)9211 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur)
9212 {
9213 int i;
9214
9215 memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
9216 for (i = 0; i < MAX_BPF_REG; i++) {
9217 if (!regsafe(env, &old->regs[i], &cur->regs[i], env->idmap_scratch)) {
9218 return false;
9219 }
9220 }
9221
9222 if (!stacksafe(env, old, cur, env->idmap_scratch)) {
9223 return false;
9224 }
9225
9226 if (!refsafe(old, cur)) {
9227 return false;
9228 }
9229
9230 return true;
9231 }
9232
states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, struct bpf_verifier_state *cur)9233 static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
9234 {
9235 int i;
9236
9237 if (old->curframe != cur->curframe) {
9238 return false;
9239 }
9240
9241 /* Verification state from speculative execution simulation
9242 * must never prune a non-speculative execution one.
9243 */
9244 if (old->speculative && !cur->speculative) {
9245 return false;
9246 }
9247
9248 if (old->active_spin_lock != cur->active_spin_lock) {
9249 return false;
9250 }
9251
9252 /* for states to be equal callsites have to be the same
9253 * and all frame states need to be equivalent
9254 */
9255 for (i = 0; i <= old->curframe; i++) {
9256 if (old->frame[i]->callsite != cur->frame[i]->callsite) {
9257 return false;
9258 }
9259 if (!func_states_equal(env, old->frame[i], cur->frame[i])) {
9260 return false;
9261 }
9262 }
9263 return true;
9264 }
9265
9266 /* Return 0 if no propagation happened. Return negative error code if error
9267 * happened. Otherwise, return the propagated bit.
9268 */
propagate_liveness_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, struct bpf_reg_state *parent_reg)9269 static int propagate_liveness_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
9270 struct bpf_reg_state *parent_reg)
9271 {
9272 u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9273 u8 flag = reg->live & REG_LIVE_READ;
9274 int err;
9275
9276 /* When comes here, read flags of PARENT_REG or REG could be any of
9277 * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9278 * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9279 */
9280 if (parent_flag == REG_LIVE_READ64 ||
9281 /* Or if there is no read flag from REG. */
9282 !flag ||
9283 /* Or if the read flag from REG is the same as PARENT_REG. */
9284 parent_flag == flag) {
9285 return 0;
9286 }
9287
9288 err = mark_reg_read(env, reg, parent_reg, flag);
9289 if (err) {
9290 return err;
9291 }
9292
9293 return flag;
9294 }
9295
9296 /* A write screens off any subsequent reads; but write marks come from the
9297 * straight-line code between a state and its parent. When we arrive at an
9298 * equivalent state (jump target or such) we didn't arrive by the straight-line
9299 * code, so read marks in the state must propagate to the parent regardless
9300 * of the state's write marks. That's what 'parent == state->parent' comparison
9301 * in mark_reg_read() is for.
9302 */
propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, struct bpf_verifier_state *vparent)9303 static int propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
9304 struct bpf_verifier_state *vparent)
9305 {
9306 struct bpf_reg_state *state_reg, *parent_reg;
9307 struct bpf_func_state *state, *parent;
9308 int i, frame, err = 0;
9309
9310 if (vparent->curframe != vstate->curframe) {
9311 WARN(1, "propagate_live: parent frame %d current frame %d\n", vparent->curframe, vstate->curframe);
9312 return -EFAULT;
9313 }
9314 /* Propagate read liveness of registers... */
9315 BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
9316 for (frame = 0; frame <= vstate->curframe; frame++) {
9317 parent = vparent->frame[frame];
9318 state = vstate->frame[frame];
9319 parent_reg = parent->regs;
9320 state_reg = state->regs;
9321 /* We don't need to worry about FP liveness, it's read-only */
9322 for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9323 err = propagate_liveness_reg(env, &state_reg[i], &parent_reg[i]);
9324 if (err < 0) {
9325 return err;
9326 }
9327 if (err == REG_LIVE_READ64) {
9328 mark_insn_zext(env, &parent_reg[i]);
9329 }
9330 }
9331
9332 /* Propagate stack slots. */
9333 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9334 parent_reg = &parent->stack[i].spilled_ptr;
9335 state_reg = &state->stack[i].spilled_ptr;
9336 err = propagate_liveness_reg(env, state_reg, parent_reg);
9337 if (err < 0) {
9338 return err;
9339 }
9340 }
9341 }
9342 return 0;
9343 }
9344
9345 /* find precise scalars in the previous equivalent state and
9346 * propagate them into the current state
9347 */
propagate_precision(struct bpf_verifier_env *env, const struct bpf_verifier_state *old)9348 static int propagate_precision(struct bpf_verifier_env *env, const struct bpf_verifier_state *old)
9349 {
9350 struct bpf_reg_state *state_reg;
9351 struct bpf_func_state *state;
9352 int i, err = 0;
9353
9354 state = old->frame[old->curframe];
9355 state_reg = state->regs;
9356 for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9357 if (state_reg->type != SCALAR_VALUE || !state_reg->precise) {
9358 continue;
9359 }
9360 if (env->log.level & BPF_LOG_LEVEL2) {
9361 verbose(env, "propagating r%d\n", i);
9362 }
9363 err = mark_chain_precision(env, i);
9364 if (err < 0) {
9365 return err;
9366 }
9367 }
9368
9369 for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9370 if (state->stack[i].slot_type[0] != STACK_SPILL) {
9371 continue;
9372 }
9373 state_reg = &state->stack[i].spilled_ptr;
9374 if (state_reg->type != SCALAR_VALUE || !state_reg->precise) {
9375 continue;
9376 }
9377 if (env->log.level & BPF_LOG_LEVEL2) {
9378 verbose(env, "propagating fp%d\n", (-i - 1) * BPF_REG_SIZE);
9379 }
9380 err = mark_chain_precision_stack(env, i);
9381 if (err < 0) {
9382 return err;
9383 }
9384 }
9385 return 0;
9386 }
9387
states_maybe_looping(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)9388 static bool states_maybe_looping(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
9389 {
9390 struct bpf_func_state *fold, *fcur;
9391 int i, fr = cur->curframe;
9392
9393 if (old->curframe != fr) {
9394 return false;
9395 }
9396
9397 fold = old->frame[fr];
9398 fcur = cur->frame[fr];
9399 for (i = 0; i < MAX_BPF_REG; i++) {
9400 if (memcmp(&fold->regs[i], &fcur->regs[i], offsetof(struct bpf_reg_state, parent))) {
9401 return false;
9402 }
9403 }
9404 return true;
9405 }
9406
is_state_visited(struct bpf_verifier_env *env, int insn_idx)9407 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9408 {
9409 struct bpf_verifier_state_list *new_sl;
9410 struct bpf_verifier_state_list *sl, **pprev;
9411 struct bpf_verifier_state *cur = env->cur_state, *new;
9412 int i, j, err, states_cnt = 0;
9413 bool add_new_state = env->test_state_freq ? true : false;
9414
9415 cur->last_insn_idx = env->prev_insn_idx;
9416 if (!env->insn_aux_data[insn_idx].prune_point) {
9417 /* this 'insn_idx' instruction wasn't marked, so we will not
9418 * be doing state search here
9419 */
9420 return 0;
9421 }
9422
9423 /* bpf progs typically have pruning point every 4 instructions
9424 * http://vger.kernel.org/bpfconf2019.html#session-1
9425 * Do not add new state for future pruning if the verifier hasn't seen
9426 * at least 2 jumps and at least 8 instructions.
9427 * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9428 * In tests that amounts to up to 50% reduction into total verifier
9429 * memory consumption and 20% verifier time speedup.
9430 */
9431 if (env->jmps_processed - env->prev_jmps_processed >= 2 && env->insn_processed - env->prev_insn_processed >= 8) {
9432 add_new_state = true;
9433 }
9434
9435 pprev = explored_state(env, insn_idx);
9436 sl = *pprev;
9437
9438 clean_live_states(env, insn_idx, cur);
9439
9440 while (sl) {
9441 states_cnt++;
9442 if (sl->state.insn_idx != insn_idx) {
9443 goto next;
9444 }
9445 if (sl->state.branches) {
9446 if (states_maybe_looping(&sl->state, cur) && states_equal(env, &sl->state, cur)) {
9447 verbose_linfo(env, insn_idx, "; ");
9448 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9449 return -EINVAL;
9450 }
9451 /* if the verifier is processing a loop, avoid adding new state
9452 * too often, since different loop iterations have distinct
9453 * states and may not help future pruning.
9454 * This threshold shouldn't be too low to make sure that
9455 * a loop with large bound will be rejected quickly.
9456 * The most abusive loop will be:
9457 * r1 += 1
9458 * if r1 < 1000000 goto pc-2
9459 * 1M insn_procssed limit / 100 == 10k peak states.
9460 * This threshold shouldn't be too high either, since states
9461 * at the end of the loop are likely to be useful in pruning.
9462 */
9463 if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9464 env->insn_processed - env->prev_insn_processed < 100) {
9465 add_new_state = false;
9466 }
9467 goto miss;
9468 }
9469 if (states_equal(env, &sl->state, cur)) {
9470 sl->hit_cnt++;
9471 /* reached equivalent register/stack state,
9472 * prune the search.
9473 * Registers read by the continuation are read by us.
9474 * If we have any write marks in env->cur_state, they
9475 * will prevent corresponding reads in the continuation
9476 * from reaching our parent (an explored_state). Our
9477 * own state will get the read marks recorded, but
9478 * they'll be immediately forgotten as we're pruning
9479 * this state and will pop a new one.
9480 */
9481 err = propagate_liveness(env, &sl->state, cur);
9482
9483 /* if previous state reached the exit with precision and
9484 * current state is equivalent to it (except precsion marks)
9485 * the precision needs to be propagated back in
9486 * the current state.
9487 */
9488 err = err ?: push_jmp_history(env, cur);
9489 err = err ?: propagate_precision(env, &sl->state);
9490 if (err) {
9491 return err;
9492 }
9493 return 1;
9494 }
9495 miss:
9496 /* when new state is not going to be added do not increase miss count.
9497 * Otherwise several loop iterations will remove the state
9498 * recorded earlier. The goal of these heuristics is to have
9499 * states from some iterations of the loop (some in the beginning
9500 * and some at the end) to help pruning.
9501 */
9502 if (add_new_state) {
9503 sl->miss_cnt++;
9504 }
9505 /* heuristic to determine whether this state is beneficial
9506 * to keep checking from state equivalence point of view.
9507 * Higher numbers increase max_states_per_insn and verification time,
9508 * but do not meaningfully decrease insn_processed.
9509 */
9510 if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9511 /* the state is unlikely to be useful. Remove it to
9512 * speed up verification
9513 */
9514 *pprev = sl->next;
9515 if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9516 u32 br = sl->state.branches;
9517
9518 WARN_ONCE(br, "BUG live_done but branches_to_explore %d\n", br);
9519 free_verifier_state(&sl->state, false);
9520 kfree(sl);
9521 env->peak_states--;
9522 } else {
9523 /* cannot free this state, since parentage chain may
9524 * walk it later. Add it for free_list instead to
9525 * be freed at the end of verification
9526 */
9527 sl->next = env->free_list;
9528 env->free_list = sl;
9529 }
9530 sl = *pprev;
9531 continue;
9532 }
9533 next:
9534 pprev = &sl->next;
9535 sl = *pprev;
9536 }
9537
9538 if (env->max_states_per_insn < states_cnt) {
9539 env->max_states_per_insn = states_cnt;
9540 }
9541
9542 if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) {
9543 return push_jmp_history(env, cur);
9544 }
9545
9546 if (!add_new_state) {
9547 return push_jmp_history(env, cur);
9548 }
9549
9550 /* There were no equivalent states, remember the current one.
9551 * Technically the current state is not proven to be safe yet,
9552 * but it will either reach outer most bpf_exit (which means it's safe)
9553 * or it will be rejected. When there are no loops the verifier won't be
9554 * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9555 * again on the way to bpf_exit.
9556 * When looping the sl->state.branches will be > 0 and this state
9557 * will not be considered for equivalence until branches == 0.
9558 */
9559 new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9560 if (!new_sl) {
9561 return -ENOMEM;
9562 }
9563 env->total_states++;
9564 env->peak_states++;
9565 env->prev_jmps_processed = env->jmps_processed;
9566 env->prev_insn_processed = env->insn_processed;
9567
9568 /* add new state to the head of linked list */
9569 new = &new_sl->state;
9570 err = copy_verifier_state(new, cur);
9571 if (err) {
9572 free_verifier_state(new, false);
9573 kfree(new_sl);
9574 return err;
9575 }
9576 new->insn_idx = insn_idx;
9577 WARN_ONCE(new->branches != 1, "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9578
9579 cur->parent = new;
9580 cur->first_insn_idx = insn_idx;
9581 clear_jmp_history(cur);
9582 new_sl->next = *explored_state(env, insn_idx);
9583 *explored_state(env, insn_idx) = new_sl;
9584 /* connect new state to parentage chain. Current frame needs all
9585 * registers connected. Only r6 - r9 of the callers are alive (pushed
9586 * to the stack implicitly by JITs) so in callers' frames connect just
9587 * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9588 * the state of the call instruction (with WRITTEN set), and r0 comes
9589 * from callee with its full parentage chain, anyway.
9590 */
9591 /* clear write marks in current state: the writes we did are not writes
9592 * our child did, so they don't screen off its reads from us.
9593 * (There are no read marks in current state, because reads always mark
9594 * their parent and current state never has children yet. Only
9595 * explored_states can get read marks.)
9596 */
9597 for (j = 0; j <= cur->curframe; j++) {
9598 for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9599 cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9600 }
9601 for (i = 0; i < BPF_REG_FP; i++) {
9602 cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9603 }
9604 }
9605
9606 /* all stack frames are accessible from callee, clear them all */
9607 for (j = 0; j <= cur->curframe; j++) {
9608 struct bpf_func_state *frame = cur->frame[j];
9609 struct bpf_func_state *newframe = new->frame[j];
9610
9611 for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9612 frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9613 frame->stack[i].spilled_ptr.parent = &newframe->stack[i].spilled_ptr;
9614 }
9615 }
9616 return 0;
9617 }
9618
9619 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)9620 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9621 {
9622 switch (base_type(type)) {
9623 case PTR_TO_CTX:
9624 case PTR_TO_SOCKET:
9625 case PTR_TO_SOCK_COMMON:
9626 case PTR_TO_TCP_SOCK:
9627 case PTR_TO_XDP_SOCK:
9628 case PTR_TO_BTF_ID:
9629 return false;
9630 default:
9631 return true;
9632 }
9633 }
9634
9635 /* If an instruction was previously used with particular pointer types, then we
9636 * need to be careful to avoid cases such as the below, where it may be ok
9637 * for one branch accessing the pointer, but not ok for the other branch:
9638 *
9639 * R1 = sock_ptr
9640 * goto X;
9641 * ...
9642 * R1 = some_other_valid_ptr;
9643 * goto X;
9644 * ...
9645 * R2 = *(u32 *)(R1 + 0);
9646 */
reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)9647 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9648 {
9649 return src != prev && (!reg_type_mismatch_ok(src) || !reg_type_mismatch_ok(prev));
9650 }
9651
do_check(struct bpf_verifier_env *env)9652 static int do_check(struct bpf_verifier_env *env)
9653 {
9654 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9655 struct bpf_verifier_state *state = env->cur_state;
9656 struct bpf_insn *insns = env->prog->insnsi;
9657 struct bpf_reg_state *regs;
9658 int insn_cnt = env->prog->len;
9659 bool do_print_state = false;
9660 int prev_insn_idx = -1;
9661
9662 for (;;) {
9663 struct bpf_insn *insn;
9664 u8 class;
9665 int err;
9666
9667 env->prev_insn_idx = prev_insn_idx;
9668 if (env->insn_idx >= insn_cnt) {
9669 verbose(env, "invalid insn idx %d insn_cnt %d\n", env->insn_idx, insn_cnt);
9670 return -EFAULT;
9671 }
9672
9673 insn = &insns[env->insn_idx];
9674 class = BPF_CLASS(insn->code);
9675
9676 if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9677 verbose(env, "BPF program is too large. Processed %d insn\n", env->insn_processed);
9678 return -E2BIG;
9679 }
9680
9681 err = is_state_visited(env, env->insn_idx);
9682 if (err < 0) {
9683 return err;
9684 }
9685 if (err == 1) {
9686 /* found equivalent state, can prune the search */
9687 if (env->log.level & BPF_LOG_LEVEL) {
9688 if (do_print_state) {
9689 verbose(env, "\nfrom %d to %d%s: safe\n", env->prev_insn_idx, env->insn_idx,
9690 env->cur_state->speculative ? " (speculative execution)" : "");
9691 } else {
9692 verbose(env, "%d: safe\n", env->insn_idx);
9693 }
9694 }
9695 goto process_bpf_exit;
9696 }
9697
9698 if (signal_pending(current)) {
9699 return -EAGAIN;
9700 }
9701
9702 if (need_resched()) {
9703 cond_resched();
9704 }
9705
9706 if ((env->log.level & BPF_LOG_LEVEL2) || ((env->log.level & BPF_LOG_LEVEL) && do_print_state)) {
9707 if (env->log.level & BPF_LOG_LEVEL2) {
9708 verbose(env, "%d:", env->insn_idx);
9709 } else {
9710 verbose(env, "\nfrom %d to %d%s:", env->prev_insn_idx, env->insn_idx,
9711 env->cur_state->speculative ? " (speculative execution)" : "");
9712 }
9713 print_verifier_state(env, state->frame[state->curframe]);
9714 do_print_state = false;
9715 }
9716
9717 if (env->log.level & BPF_LOG_LEVEL) {
9718 const struct bpf_insn_cbs cbs = {
9719 .cb_print = verbose,
9720 .private_data = env,
9721 };
9722
9723 verbose_linfo(env, env->insn_idx, "; ");
9724 verbose(env, "%d: ", env->insn_idx);
9725 print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9726 }
9727
9728 if (bpf_prog_is_dev_bound(env->prog->aux)) {
9729 err = bpf_prog_offload_verify_insn(env, env->insn_idx, env->prev_insn_idx);
9730 if (err) {
9731 return err;
9732 }
9733 }
9734
9735 regs = cur_regs(env);
9736 sanitize_mark_insn_seen(env);
9737 prev_insn_idx = env->insn_idx;
9738
9739 if (class == BPF_ALU || class == BPF_ALU64) {
9740 err = check_alu_op(env, insn);
9741 if (err) {
9742 return err;
9743 }
9744 } else if (class == BPF_LDX) {
9745 enum bpf_reg_type *prev_src_type, src_reg_type;
9746
9747 /* check for reserved fields is already done */
9748
9749 /* check src operand */
9750 err = check_reg_arg(env, insn->src_reg, SRC_OP);
9751 if (err) {
9752 return err;
9753 }
9754
9755 err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9756 if (err) {
9757 return err;
9758 }
9759
9760 src_reg_type = regs[insn->src_reg].type;
9761
9762 /* check that memory (src_reg + off) is readable,
9763 * the state of dst_reg will be updated by this func
9764 */
9765 err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ,
9766 insn->dst_reg, false);
9767 if (err) {
9768 return err;
9769 }
9770
9771 prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9772
9773 if (*prev_src_type == NOT_INIT) {
9774 /* saw a valid insn
9775 * dst_reg = *(u32 *)(src_reg + off)
9776 * save type to validate intersecting paths
9777 */
9778 *prev_src_type = src_reg_type;
9779 } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9780 /* ABuser program is trying to use the same insn
9781 * dst_reg = *(u32*) (src_reg + off)
9782 * with different pointer types:
9783 * src_reg == ctx in one branch and
9784 * src_reg == stack|map in some other branch.
9785 * Reject it.
9786 */
9787 verbose(env, "same insn cannot be used with different pointers\n");
9788 return -EINVAL;
9789 }
9790 } else if (class == BPF_STX) {
9791 enum bpf_reg_type *prev_dst_type, dst_reg_type;
9792 if (BPF_MODE(insn->code) == BPF_XADD) {
9793 err = check_xadd(env, env->insn_idx, insn);
9794 if (err) {
9795 return err;
9796 }
9797 env->insn_idx++;
9798 continue;
9799 }
9800
9801 /* check src1 operand */
9802 err = check_reg_arg(env, insn->src_reg, SRC_OP);
9803 if (err) {
9804 return err;
9805 }
9806 /* check src2 operand */
9807 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9808 if (err) {
9809 return err;
9810 }
9811
9812 dst_reg_type = regs[insn->dst_reg].type;
9813
9814 /* check that memory (dst_reg + off) is writeable */
9815 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE,
9816 insn->src_reg, false);
9817 if (err) {
9818 return err;
9819 }
9820
9821 prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9822
9823 if (*prev_dst_type == NOT_INIT) {
9824 *prev_dst_type = dst_reg_type;
9825 } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9826 verbose(env, "same insn cannot be used with different pointers\n");
9827 return -EINVAL;
9828 }
9829 } else if (class == BPF_ST) {
9830 if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
9831 verbose(env, "BPF_ST uses reserved fields\n");
9832 return -EINVAL;
9833 }
9834 /* check src operand */
9835 err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9836 if (err) {
9837 return err;
9838 }
9839 if (is_ctx_reg(env, insn->dst_reg)) {
9840 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg,
9841 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
9842 return -EACCES;
9843 }
9844
9845 /* check that memory (dst_reg + off) is writeable */
9846 err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1,
9847 false);
9848 if (err) {
9849 return err;
9850 }
9851 } else if (class == BPF_JMP || class == BPF_JMP32) {
9852 u8 opcode = BPF_OP(insn->code);
9853 env->jmps_processed++;
9854 if (opcode == BPF_CALL) {
9855 if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 ||
9856 (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || insn->dst_reg != BPF_REG_0 ||
9857 class == BPF_JMP32) {
9858 verbose(env, "BPF_CALL uses reserved fields\n");
9859 return -EINVAL;
9860 }
9861
9862 if (env->cur_state->active_spin_lock &&
9863 (insn->src_reg == BPF_PSEUDO_CALL || insn->imm != BPF_FUNC_spin_unlock)) {
9864 verbose(env, "function calls are not allowed while holding a lock\n");
9865 return -EINVAL;
9866 }
9867 if (insn->src_reg == BPF_PSEUDO_CALL) {
9868 err = check_func_call(env, insn, &env->insn_idx);
9869 } else {
9870 err = check_helper_call(env, insn->imm, env->insn_idx);
9871 }
9872 if (err) {
9873 return err;
9874 }
9875 } else if (opcode == BPF_JA) {
9876 if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 ||
9877 insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
9878 verbose(env, "BPF_JA uses reserved fields\n");
9879 return -EINVAL;
9880 }
9881 env->insn_idx += insn->off + 1;
9882 continue;
9883 } else if (opcode == BPF_EXIT) {
9884 if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 ||
9885 insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
9886 verbose(env, "BPF_EXIT uses reserved fields\n");
9887 return -EINVAL;
9888 }
9889 if (env->cur_state->active_spin_lock) {
9890 verbose(env, "bpf_spin_unlock is missing\n");
9891 return -EINVAL;
9892 }
9893 if (state->curframe) {
9894 /* exit from nested function */
9895 err = prepare_func_exit(env, &env->insn_idx);
9896 if (err) {
9897 return err;
9898 }
9899 do_print_state = true;
9900 continue;
9901 }
9902
9903 err = check_reference_leak(env);
9904 if (err) {
9905 return err;
9906 }
9907
9908 err = check_return_code(env);
9909 if (err) {
9910 return err;
9911 }
9912 process_bpf_exit:
9913 update_branch_counts(env, env->cur_state);
9914 err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log);
9915 if (err < 0) {
9916 if (err != -ENOENT) {
9917 return err;
9918 }
9919 break;
9920 } else {
9921 do_print_state = true;
9922 continue;
9923 }
9924 } else {
9925 err = check_cond_jmp_op(env, insn, &env->insn_idx);
9926 if (err) {
9927 return err;
9928 }
9929 }
9930 } else if (class == BPF_LD) {
9931 u8 mode = BPF_MODE(insn->code);
9932 if (mode == BPF_ABS || mode == BPF_IND) {
9933 err = check_ld_abs(env, insn);
9934 if (err) {
9935 return err;
9936 }
9937 } else if (mode == BPF_IMM) {
9938 err = check_ld_imm(env, insn);
9939 if (err) {
9940 return err;
9941 }
9942 env->insn_idx++;
9943 sanitize_mark_insn_seen(env);
9944 } else {
9945 verbose(env, "invalid BPF_LD mode\n");
9946 return -EINVAL;
9947 }
9948 } else {
9949 verbose(env, "unknown insn class %d\n", class);
9950 return -EINVAL;
9951 }
9952 env->insn_idx++;
9953 }
9954
9955 return 0;
9956 }
9957
9958 /* replace pseudo btf_id with kernel symbol address */
check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux)9959 static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux)
9960 {
9961 const struct btf_var_secinfo *vsi;
9962 const struct btf_type *datasec;
9963 const struct btf_type *t;
9964 const char *sym_name;
9965 bool percpu = false;
9966 u32 type, id = insn->imm;
9967 s32 datasec_id;
9968 u64 addr;
9969 int i;
9970
9971 if (!btf_vmlinux) {
9972 verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
9973 return -EINVAL;
9974 }
9975
9976 if (insn[1].imm != 0) {
9977 verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
9978 return -EINVAL;
9979 }
9980
9981 t = btf_type_by_id(btf_vmlinux, id);
9982 if (!t) {
9983 verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
9984 return -ENOENT;
9985 }
9986
9987 if (!btf_type_is_var(t)) {
9988 verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
9989 return -EINVAL;
9990 }
9991
9992 sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
9993 addr = kallsyms_lookup_name(sym_name);
9994 if (!addr) {
9995 verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", sym_name);
9996 return -ENOENT;
9997 }
9998
9999 datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", BTF_KIND_DATASEC);
10000 if (datasec_id > 0) {
10001 datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10002 for_each_vsi(i, datasec, vsi)
10003 {
10004 if (vsi->type == id) {
10005 percpu = true;
10006 break;
10007 }
10008 }
10009 }
10010
10011 insn[0].imm = (u32)addr;
10012 insn[1].imm = addr >> VERIFIER_THIRTYTWO;
10013
10014 type = t->type;
10015 t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10016 if (percpu) {
10017 aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10018 aux->btf_var.btf_id = type;
10019 } else if (!btf_type_is_struct(t)) {
10020 const struct btf_type *ret;
10021 const char *tname;
10022 u32 tsize;
10023
10024 /* resolve the type size of ksym. */
10025 ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10026 if (IS_ERR(ret)) {
10027 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10028 verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret));
10029 return -EINVAL;
10030 }
10031 aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
10032 aux->btf_var.mem_size = tsize;
10033 } else {
10034 aux->btf_var.reg_type = PTR_TO_BTF_ID;
10035 aux->btf_var.btf_id = type;
10036 }
10037 return 0;
10038 }
10039
check_map_prealloc(struct bpf_map *map)10040 static int check_map_prealloc(struct bpf_map *map)
10041 {
10042 return (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10043 map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
10044 !(map->map_flags & BPF_F_NO_PREALLOC);
10045 }
10046
is_tracing_prog_type(enum bpf_prog_type type)10047 static bool is_tracing_prog_type(enum bpf_prog_type type)
10048 {
10049 switch (type) {
10050 case BPF_PROG_TYPE_KPROBE:
10051 case BPF_PROG_TYPE_TRACEPOINT:
10052 case BPF_PROG_TYPE_PERF_EVENT:
10053 case BPF_PROG_TYPE_RAW_TRACEPOINT:
10054 return true;
10055 default:
10056 return false;
10057 }
10058 }
10059
is_preallocated_map(struct bpf_map *map)10060 static bool is_preallocated_map(struct bpf_map *map)
10061 {
10062 if (!check_map_prealloc(map)) {
10063 return false;
10064 }
10065 if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) {
10066 return false;
10067 }
10068 return true;
10069 }
10070
check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog)10071 static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog)
10072
10073 {
10074 enum bpf_prog_type prog_type = resolve_prog_type(prog);
10075 /*
10076 * Validate that trace type programs use preallocated hash maps.
10077 *
10078 * For programs attached to PERF events this is mandatory as the
10079 * perf NMI can hit any arbitrary code sequence.
10080 *
10081 * All other trace types using preallocated hash maps are unsafe as
10082 * well because tracepoint or kprobes can be inside locked regions
10083 * of the memory allocator or at a place where a recursion into the
10084 * memory allocator would see inconsistent state.
10085 *
10086 * On RT enabled kernels run-time allocation of all trace type
10087 * programs is strictly prohibited due to lock type constraints. On
10088 * !RT kernels it is allowed for backwards compatibility reasons for
10089 * now, but warnings are emitted so developers are made aware of
10090 * the unsafety and can fix their programs before this is enforced.
10091 */
10092 if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10093 if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
10094 verbose(env, "perf_event programs can only use preallocated hash map\n");
10095 return -EINVAL;
10096 }
10097 if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10098 verbose(env, "trace type programs can only use preallocated hash map\n");
10099 return -EINVAL;
10100 }
10101 WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10102 verbose(
10103 env,
10104 "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10105 }
10106
10107 if ((is_tracing_prog_type(prog_type) || prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && map_value_has_spin_lock(map)) {
10108 verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10109 return -EINVAL;
10110 }
10111
10112 if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) {
10113 verbose(env, "offload device mismatch between prog and map\n");
10114 return -EINVAL;
10115 }
10116
10117 if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10118 verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10119 return -EINVAL;
10120 }
10121
10122 if (prog->aux->sleepable) {
10123 switch (map->map_type) {
10124 case BPF_MAP_TYPE_HASH:
10125 case BPF_MAP_TYPE_LRU_HASH:
10126 case BPF_MAP_TYPE_ARRAY:
10127 if (!is_preallocated_map(map)) {
10128 verbose(env, "Sleepable programs can only use preallocated hash maps\n");
10129 return -EINVAL;
10130 }
10131 break;
10132 default:
10133 verbose(env, "Sleepable programs can only use array and hash maps\n");
10134 return -EINVAL;
10135 }
10136 }
10137
10138 return 0;
10139 }
10140
bpf_map_is_cgroup_storage(struct bpf_map *map)10141 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10142 {
10143 return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10144 }
10145
10146 /* find and rewrite pseudo imm in ld_imm64 instructions:
10147 *
10148 * 1. if it accesses map FD, replace it with actual map pointer.
10149 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10150 *
10151 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
10152 */
resolve_pseudo_ldimm64(struct bpf_verifier_env *env)10153 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
10154 {
10155 struct bpf_insn *insn = env->prog->insnsi;
10156 int insn_cnt = env->prog->len;
10157 int i, j, err;
10158
10159 err = bpf_prog_calc_tag(env->prog);
10160 if (err) {
10161 return err;
10162 }
10163
10164 for (i = 0; i < insn_cnt; i++, insn++) {
10165 if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
10166 verbose(env, "BPF_LDX uses reserved fields\n");
10167 return -EINVAL;
10168 }
10169
10170 if (BPF_CLASS(insn->code) == BPF_STX &&
10171 ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
10172 verbose(env, "BPF_STX uses reserved fields\n");
10173 return -EINVAL;
10174 }
10175
10176 if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10177 struct bpf_insn_aux_data *aux;
10178 struct bpf_map *map;
10179 struct fd f;
10180 u64 addr;
10181
10182 if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
10183 insn[1].off != 0) {
10184 verbose(env, "invalid bpf_ld_imm64 insn\n");
10185 return -EINVAL;
10186 }
10187
10188 if (insn[0].src_reg == 0) {
10189 /* valid generic load 64-bit imm */
10190 goto next_insn;
10191 }
10192
10193 if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10194 aux = &env->insn_aux_data[i];
10195 err = check_pseudo_btf_id(env, insn, aux);
10196 if (err) {
10197 return err;
10198 }
10199 goto next_insn;
10200 }
10201
10202 /* In final convert_pseudo_ld_imm64() step, this is
10203 * converted into regular 64-bit imm load insn.
10204 */
10205 if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10206 (insn[0].src_reg == BPF_PSEUDO_MAP_FD && insn[1].imm != 0)) {
10207 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
10208 return -EINVAL;
10209 }
10210
10211 f = fdget(insn[0].imm);
10212 map = __bpf_map_get(f);
10213 if (IS_ERR(map)) {
10214 verbose(env, "fd %d is not pointing to valid bpf_map\n", insn[0].imm);
10215 return PTR_ERR(map);
10216 }
10217
10218 err = check_map_prog_compatibility(env, map, env->prog);
10219 if (err) {
10220 fdput(f);
10221 return err;
10222 }
10223
10224 aux = &env->insn_aux_data[i];
10225 if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10226 addr = (unsigned long)map;
10227 } else {
10228 u32 off = insn[1].imm;
10229
10230 if (off >= BPF_MAX_VAR_OFF) {
10231 verbose(env, "direct value offset of %u is not allowed\n", off);
10232 fdput(f);
10233 return -EINVAL;
10234 }
10235
10236 if (!map->ops->map_direct_value_addr) {
10237 verbose(env, "no direct value access support for this map type\n");
10238 fdput(f);
10239 return -EINVAL;
10240 }
10241
10242 err = map->ops->map_direct_value_addr(map, &addr, off);
10243 if (err) {
10244 verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", map->value_size, off);
10245 fdput(f);
10246 return err;
10247 }
10248
10249 aux->map_off = off;
10250 addr += off;
10251 }
10252
10253 insn[0].imm = (u32)addr;
10254 insn[1].imm = addr >> VERIFIER_THIRTYTWO;
10255
10256 /* check whether we recorded this map already */
10257 for (j = 0; j < env->used_map_cnt; j++) {
10258 if (env->used_maps[j] == map) {
10259 aux->map_index = j;
10260 fdput(f);
10261 goto next_insn;
10262 }
10263 }
10264
10265 if (env->used_map_cnt >= MAX_USED_MAPS) {
10266 fdput(f);
10267 return -E2BIG;
10268 }
10269
10270 /* hold the map. If the program is rejected by verifier,
10271 * the map will be released by release_maps() or it
10272 * will be used by the valid program until it's unloaded
10273 * and all maps are released in free_used_maps()
10274 */
10275 bpf_map_inc(map);
10276
10277 aux->map_index = env->used_map_cnt;
10278 env->used_maps[env->used_map_cnt++] = map;
10279
10280 if (bpf_map_is_cgroup_storage(map) && bpf_cgroup_storage_assign(env->prog->aux, map)) {
10281 verbose(env, "only one cgroup storage of each type is allowed\n");
10282 fdput(f);
10283 return -EBUSY;
10284 }
10285
10286 fdput(f);
10287 next_insn:
10288 insn++;
10289 i++;
10290 continue;
10291 }
10292
10293 /* Basic sanity check before we invest more work here. */
10294 if (!bpf_opcode_in_insntable(insn->code)) {
10295 verbose(env, "unknown opcode %02x\n", insn->code);
10296 return -EINVAL;
10297 }
10298 }
10299
10300 /* now all pseudo BPF_LD_IMM64 instructions load valid
10301 * 'struct bpf_map *' into a register instead of user map_fd.
10302 * These pointers will be used later by verifier to validate map access.
10303 */
10304 return 0;
10305 }
10306
10307 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env *env)10308 static void release_maps(struct bpf_verifier_env *env)
10309 {
10310 __bpf_free_used_maps(env->prog->aux, env->used_maps, env->used_map_cnt);
10311 }
10312
10313 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env *env)10314 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
10315 {
10316 struct bpf_insn *insn = env->prog->insnsi;
10317 int insn_cnt = env->prog->len;
10318 int i;
10319
10320 for (i = 0; i < insn_cnt; i++, insn++) {
10321 if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) {
10322 insn->src_reg = 0;
10323 }
10324 }
10325 }
10326
10327 /* single env->prog->insni[off] instruction was replaced with the range
10328 * insni[off, off + cnt). Adjust corresponding insn_aux_data by copying
10329 * [0, off) and [off, end) to new locations, so the patched range stays zero
10330 */
adjust_insn_aux_data(struct bpf_verifier_env *env, struct bpf_insn_aux_data *new_data, struct bpf_prog *new_prog, u32 off, u32 cnt)10331 static void adjust_insn_aux_data(struct bpf_verifier_env *env, struct bpf_insn_aux_data *new_data,
10332 struct bpf_prog *new_prog, u32 off, u32 cnt)
10333 {
10334 struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10335 struct bpf_insn *insn = new_prog->insnsi;
10336 u32 old_seen = old_data[off].seen;
10337 u32 prog_len;
10338 int i;
10339
10340 /* aux info at OFF always needs adjustment, no matter fast path
10341 * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10342 * original insn at old prog.
10343 */
10344 old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10345
10346 if (cnt == 1) {
10347 return;
10348 }
10349 prog_len = new_prog->len;
10350
10351 memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
10352 memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
10353 for (i = off; i < off + cnt - 1; i++) {
10354 /* Expand insni[off]'s seen count to the patched range. */
10355 new_data[i].seen = old_seen;
10356 new_data[i].zext_dst = insn_has_def32(env, insn + i);
10357 }
10358 env->insn_aux_data = new_data;
10359 vfree(old_data);
10360 }
10361
adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)10362 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
10363 {
10364 int i;
10365
10366 if (len == 1) {
10367 return;
10368 }
10369 /* NOTE: fake 'exit' subprog should be updated as well. */
10370 for (i = 0; i <= env->subprog_cnt; i++) {
10371 if (env->subprog_info[i].start <= off) {
10372 continue;
10373 }
10374 env->subprog_info[i].start += len - 1;
10375 }
10376 }
10377
adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)10378 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10379 {
10380 struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10381 int i, sz = prog->aux->size_poke_tab;
10382 struct bpf_jit_poke_descriptor *desc;
10383
10384 for (i = 0; i < sz; i++) {
10385 desc = &tab[i];
10386 if (desc->insn_idx <= off) {
10387 continue;
10388 }
10389 desc->insn_idx += len - 1;
10390 }
10391 }
10392
bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch, u32 len)10393 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch,
10394 u32 len)
10395 {
10396 struct bpf_prog *new_prog;
10397 struct bpf_insn_aux_data *new_data = NULL;
10398
10399 if (len > 1) {
10400 new_data = vzalloc(array_size(env->prog->len + len - 1, sizeof(struct bpf_insn_aux_data)));
10401 if (!new_data) {
10402 return NULL;
10403 }
10404 }
10405
10406 new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10407 if (IS_ERR(new_prog)) {
10408 if (PTR_ERR(new_prog) == -ERANGE) {
10409 verbose(env, "insn %d cannot be patched due to 16-bit range\n", env->insn_aux_data[off].orig_idx);
10410 }
10411 vfree(new_data);
10412 return NULL;
10413 }
10414 adjust_insn_aux_data(env, new_data, new_prog, off, len);
10415 adjust_subprog_starts(env, off, len);
10416 adjust_poke_descs(new_prog, off, len);
10417 return new_prog;
10418 }
10419
adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)10420 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)
10421 {
10422 int i, j;
10423
10424 /* find first prog starting at or after off (first to remove) */
10425 for (i = 0; i < env->subprog_cnt; i++) {
10426 if (env->subprog_info[i].start >= off) {
10427 break;
10428 }
10429 }
10430 /* find first prog starting at or after off + cnt (first to stay) */
10431 for (j = i; j < env->subprog_cnt; j++) {
10432 if (env->subprog_info[j].start >= off + cnt) {
10433 break;
10434 }
10435 }
10436 /* if j doesn't start exactly at off + cnt, we are just removing
10437 * the front of previous prog
10438 */
10439 if (env->subprog_info[j].start != off + cnt) {
10440 j--;
10441 }
10442
10443 if (j > i) {
10444 struct bpf_prog_aux *aux = env->prog->aux;
10445 int move;
10446
10447 /* move fake 'exit' subprog as well */
10448 move = env->subprog_cnt + 1 - j;
10449
10450 memmove(env->subprog_info + i, env->subprog_info + j, sizeof(*env->subprog_info) * move);
10451 env->subprog_cnt -= j - i;
10452
10453 /* remove func_info */
10454 if (aux->func_info) {
10455 move = aux->func_info_cnt - j;
10456
10457 memmove(aux->func_info + i, aux->func_info + j, sizeof(*aux->func_info) * move);
10458 aux->func_info_cnt -= j - i;
10459 /* func_info->insn_off is set after all code rewrites,
10460 * in adjust_btf_func() - no need to adjust
10461 */
10462 }
10463 } else {
10464 /* convert i from "first prog to remove" to "first to adjust" */
10465 if (env->subprog_info[i].start == off) {
10466 i++;
10467 }
10468 }
10469
10470 /* update fake 'exit' subprog as well */
10471 for (; i <= env->subprog_cnt; i++) {
10472 env->subprog_info[i].start -= cnt;
10473 }
10474
10475 return 0;
10476 }
10477
bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)10478 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)
10479 {
10480 struct bpf_prog *prog = env->prog;
10481 u32 i, l_off, l_cnt, nr_linfo;
10482 struct bpf_line_info *linfo;
10483
10484 nr_linfo = prog->aux->nr_linfo;
10485 if (!nr_linfo) {
10486 return 0;
10487 }
10488
10489 linfo = prog->aux->linfo;
10490
10491 /* find first line info to remove, count lines to be removed */
10492 for (i = 0; i < nr_linfo; i++) {
10493 if (linfo[i].insn_off >= off) {
10494 break;
10495 }
10496 }
10497
10498 l_off = i;
10499 l_cnt = 0;
10500 for (; i < nr_linfo; i++) {
10501 if (linfo[i].insn_off < off + cnt) {
10502 l_cnt++;
10503 } else {
10504 break;
10505 }
10506 }
10507
10508 /* First live insn doesn't match first live linfo, it needs to "inherit"
10509 * last removed linfo. prog is already modified, so prog->len == off
10510 * means no live instructions after (tail of the program was removed).
10511 */
10512 if (prog->len != off && l_cnt && (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10513 l_cnt--;
10514 linfo[--i].insn_off = off + cnt;
10515 }
10516
10517 /* remove the line info which refer to the removed instructions */
10518 if (l_cnt) {
10519 memmove(linfo + l_off, linfo + i, sizeof(*linfo) * (nr_linfo - i));
10520
10521 prog->aux->nr_linfo -= l_cnt;
10522 nr_linfo = prog->aux->nr_linfo;
10523 }
10524
10525 /* pull all linfo[i].insn_off >= off + cnt in by cnt */
10526 for (i = l_off; i < nr_linfo; i++) {
10527 linfo[i].insn_off -= cnt;
10528 }
10529
10530 /* fix up all subprogs (incl. 'exit') which start >= off */
10531 for (i = 0; i <= env->subprog_cnt; i++) {
10532 if (env->subprog_info[i].linfo_idx > l_off) {
10533 /* program may have started in the removed region but
10534 * may not be fully removed
10535 */
10536 if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) {
10537 env->subprog_info[i].linfo_idx -= l_cnt;
10538 } else {
10539 env->subprog_info[i].linfo_idx = l_off;
10540 }
10541 }
10542 }
10543
10544 return 0;
10545 }
10546
verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)10547 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10548 {
10549 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10550 unsigned int orig_prog_len = env->prog->len;
10551 int err;
10552
10553 if (bpf_prog_is_dev_bound(env->prog->aux)) {
10554 bpf_prog_offload_remove_insns(env, off, cnt);
10555 }
10556
10557 err = bpf_remove_insns(env->prog, off, cnt);
10558 if (err) {
10559 return err;
10560 }
10561
10562 err = adjust_subprog_starts_after_remove(env, off, cnt);
10563 if (err) {
10564 return err;
10565 }
10566
10567 err = bpf_adj_linfo_after_remove(env, off, cnt);
10568 if (err) {
10569 return err;
10570 }
10571
10572 memmove(aux_data + off, aux_data + off + cnt, sizeof(*aux_data) * (orig_prog_len - off - cnt));
10573
10574 return 0;
10575 }
10576
10577 /* The verifier does more data flow analysis than llvm and will not
10578 * explore branches that are dead at run time. Malicious programs can
10579 * have dead code too. Therefore replace all dead at-run-time code
10580 * with 'ja -1'.
10581 *
10582 * Just nops are not optimal, e.g. if they would sit at the end of the
10583 * program and through another bug we would manage to jump there, then
10584 * we'd execute beyond program memory otherwise. Returning exception
10585 * code also wouldn't work since we can have subprogs where the dead
10586 * code could be located.
10587 */
sanitize_dead_code(struct bpf_verifier_env *env)10588 static void sanitize_dead_code(struct bpf_verifier_env *env)
10589 {
10590 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10591 struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
10592 struct bpf_insn *insn = env->prog->insnsi;
10593 const int insn_cnt = env->prog->len;
10594 int i;
10595
10596 for (i = 0; i < insn_cnt; i++) {
10597 if (aux_data[i].seen) {
10598 continue;
10599 }
10600 memcpy(insn + i, &trap, sizeof(trap));
10601 aux_data[i].zext_dst = false;
10602 }
10603 }
10604
insn_is_cond_jump(u8 code)10605 static bool insn_is_cond_jump(u8 code)
10606 {
10607 u8 op;
10608
10609 if (BPF_CLASS(code) == BPF_JMP32) {
10610 return true;
10611 }
10612
10613 if (BPF_CLASS(code) != BPF_JMP) {
10614 return false;
10615 }
10616
10617 op = BPF_OP(code);
10618 return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10619 }
10620
opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)10621 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10622 {
10623 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10624 struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10625 struct bpf_insn *insn = env->prog->insnsi;
10626 const int insn_cnt = env->prog->len;
10627 int i;
10628
10629 for (i = 0; i < insn_cnt; i++, insn++) {
10630 if (!insn_is_cond_jump(insn->code)) {
10631 continue;
10632 }
10633
10634 if (!aux_data[i + 1].seen) {
10635 ja.off = insn->off;
10636 } else if (!aux_data[i + 1 + insn->off].seen) {
10637 ja.off = 0;
10638 } else {
10639 continue;
10640 }
10641
10642 if (bpf_prog_is_dev_bound(env->prog->aux)) {
10643 bpf_prog_offload_replace_insn(env, i, &ja);
10644 }
10645
10646 memcpy(insn, &ja, sizeof(ja));
10647 }
10648 }
10649
opt_remove_dead_code(struct bpf_verifier_env *env)10650 static int opt_remove_dead_code(struct bpf_verifier_env *env)
10651 {
10652 struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10653 int insn_cnt = env->prog->len;
10654 int i, err;
10655
10656 for (i = 0; i < insn_cnt; i++) {
10657 int j;
10658
10659 j = 0;
10660 while (i + j < insn_cnt && !aux_data[i + j].seen) {
10661 j++;
10662 }
10663 if (!j) {
10664 continue;
10665 }
10666
10667 err = verifier_remove_insns(env, i, j);
10668 if (err) {
10669 return err;
10670 }
10671 insn_cnt = env->prog->len;
10672 }
10673
10674 return 0;
10675 }
10676
opt_remove_nops(struct bpf_verifier_env *env)10677 static int opt_remove_nops(struct bpf_verifier_env *env)
10678 {
10679 const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10680 struct bpf_insn *insn = env->prog->insnsi;
10681 int insn_cnt = env->prog->len;
10682 int i, err;
10683
10684 for (i = 0; i < insn_cnt; i++) {
10685 if (memcmp(&insn[i], &ja, sizeof(ja))) {
10686 continue;
10687 }
10688
10689 err = verifier_remove_insns(env, i, 1);
10690 if (err) {
10691 return err;
10692 }
10693 insn_cnt--;
10694 i--;
10695 }
10696
10697 return 0;
10698 }
10699
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr)10700 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr)
10701 {
10702 struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10703 struct bpf_insn_aux_data *aux = env->insn_aux_data;
10704 int i, patch_len, delta = 0, len = env->prog->len;
10705 struct bpf_insn *insns = env->prog->insnsi;
10706 struct bpf_prog *new_prog;
10707 bool rnd_hi32;
10708
10709 rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10710 zext_patch[1] = BPF_ZEXT_REG(0);
10711 rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10712 rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10713 rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10714 for (i = 0; i < len; i++) {
10715 int adj_idx = i + delta;
10716 struct bpf_insn insn;
10717
10718 insn = insns[adj_idx];
10719 if (!aux[adj_idx].zext_dst) {
10720 u8 code, class;
10721 u32 imm_rnd;
10722
10723 if (!rnd_hi32) {
10724 continue;
10725 }
10726
10727 code = insn.code;
10728 class = BPF_CLASS(code);
10729 if (insn_no_def(&insn)) {
10730 continue;
10731 }
10732
10733 /* NOTE: arg "reg" (the fourth one) is only used for
10734 * BPF_STX which has been ruled out in above
10735 * check, it is safe to pass NULL here.
10736 */
10737 if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10738 if (class == BPF_LD && BPF_MODE(code) == BPF_IMM) {
10739 i++;
10740 }
10741 continue;
10742 }
10743
10744 /* ctx load could be transformed into wider load. */
10745 if (class == BPF_LDX && aux[adj_idx].ptr_type == PTR_TO_CTX) {
10746 continue;
10747 }
10748
10749 imm_rnd = get_random_int();
10750 rnd_hi32_patch[0] = insn;
10751 rnd_hi32_patch[1].imm = imm_rnd;
10752 rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10753 patch = rnd_hi32_patch;
10754 patch_len = VERIFIER_FOUR;
10755 goto apply_patch_buffer;
10756 }
10757
10758 if (!bpf_jit_needs_zext()) {
10759 continue;
10760 }
10761
10762 zext_patch[0] = insn;
10763 zext_patch[1].dst_reg = insn.dst_reg;
10764 zext_patch[1].src_reg = insn.dst_reg;
10765 patch = zext_patch;
10766 patch_len = 2;
10767 apply_patch_buffer:
10768 new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10769 if (!new_prog) {
10770 return -ENOMEM;
10771 }
10772 env->prog = new_prog;
10773 insns = new_prog->insnsi;
10774 aux = env->insn_aux_data;
10775 delta += patch_len - 1;
10776 }
10777
10778 return 0;
10779 }
10780
10781 /* convert load instructions that access fields of a context type into a
10782 * sequence of instructions that access fields of the underlying structure:
10783 * struct __sk_buff -> struct sk_buff
10784 * struct bpf_sock_ops -> struct sock
10785 */
convert_ctx_accesses(struct bpf_verifier_env *env)10786 static int convert_ctx_accesses(struct bpf_verifier_env *env)
10787 {
10788 const struct bpf_verifier_ops *ops = env->ops;
10789 int i, cnt, size, ctx_field_size, delta = 0;
10790 const int insn_cnt = env->prog->len;
10791 struct bpf_insn insn_buf[VERIFIER_SIXTEEN], *insn;
10792 u32 target_size, size_default, off;
10793 struct bpf_prog *new_prog;
10794 enum bpf_access_type type;
10795 bool is_narrower_load;
10796
10797 if (ops->gen_prologue || env->seen_direct_write) {
10798 if (!ops->gen_prologue) {
10799 verbose(env, "bpf verifier is misconfigured\n");
10800 return -EINVAL;
10801 }
10802 cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog);
10803 if (cnt >= ARRAY_SIZE(insn_buf)) {
10804 verbose(env, "bpf verifier is misconfigured\n");
10805 return -EINVAL;
10806 } else if (cnt) {
10807 new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10808 if (!new_prog) {
10809 return -ENOMEM;
10810 }
10811
10812 env->prog = new_prog;
10813 delta += cnt - 1;
10814 }
10815 }
10816
10817 if (bpf_prog_is_dev_bound(env->prog->aux)) {
10818 return 0;
10819 }
10820
10821 insn = env->prog->insnsi + delta;
10822
10823 for (i = 0; i < insn_cnt; i++, insn++) {
10824 bpf_convert_ctx_access_t convert_ctx_access;
10825 bool ctx_access;
10826
10827 if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
10828 insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
10829 type = BPF_READ;
10830 ctx_access = true;
10831 } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
10832 insn->code == (BPF_STX | BPF_MEM | BPF_W) || insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
10833 insn->code == (BPF_ST | BPF_MEM | BPF_B) || insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
10834 insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
10835 type = BPF_WRITE;
10836 ctx_access = BPF_CLASS(insn->code) == BPF_STX;
10837 } else {
10838 continue;
10839 }
10840
10841 if (type == BPF_WRITE && env->insn_aux_data[i + delta].sanitize_stack_spill) {
10842 struct bpf_insn patch[] = {
10843 *insn,
10844 BPF_ST_NOSPEC(),
10845 };
10846
10847 cnt = ARRAY_SIZE(patch);
10848 new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
10849 if (!new_prog) {
10850 return -ENOMEM;
10851 }
10852
10853 delta += cnt - 1;
10854 env->prog = new_prog;
10855 insn = new_prog->insnsi + i + delta;
10856 continue;
10857 }
10858
10859 if (!ctx_access) {
10860 continue;
10861 }
10862
10863 switch (env->insn_aux_data[i + delta].ptr_type) {
10864 case PTR_TO_CTX:
10865 if (!ops->convert_ctx_access) {
10866 continue;
10867 }
10868 convert_ctx_access = ops->convert_ctx_access;
10869 break;
10870 case PTR_TO_SOCKET:
10871 case PTR_TO_SOCK_COMMON:
10872 convert_ctx_access = bpf_sock_convert_ctx_access;
10873 break;
10874 case PTR_TO_TCP_SOCK:
10875 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10876 break;
10877 case PTR_TO_XDP_SOCK:
10878 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
10879 break;
10880 case PTR_TO_BTF_ID:
10881 if (type == BPF_READ) {
10882 insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
10883 env->prog->aux->num_exentries++;
10884 } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
10885 verbose(env, "Writes through BTF pointers are not allowed\n");
10886 return -EINVAL;
10887 }
10888 continue;
10889 default:
10890 continue;
10891 }
10892
10893 ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
10894 size = BPF_LDST_BYTES(insn);
10895
10896 /* If the read access is a narrower load of the field,
10897 * convert to a 4/8-byte load, to minimum program type specific
10898 * convert_ctx_access changes. If conversion is successful,
10899 * we will apply proper mask to the result.
10900 */
10901 is_narrower_load = size < ctx_field_size;
10902 size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
10903 off = insn->off;
10904 if (is_narrower_load) {
10905 u8 size_code;
10906
10907 if (type == BPF_WRITE) {
10908 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
10909 return -EINVAL;
10910 }
10911
10912 size_code = BPF_H;
10913 if (ctx_field_size == VERIFIER_FOUR) {
10914 size_code = BPF_W;
10915 } else if (ctx_field_size == VERIFIER_EIGHT) {
10916 size_code = BPF_DW;
10917 }
10918
10919 insn->off = off & ~(size_default - 1);
10920 insn->code = BPF_LDX | BPF_MEM | size_code;
10921 }
10922
10923 target_size = 0;
10924 cnt = convert_ctx_access(type, insn, insn_buf, env->prog, &target_size);
10925 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || (ctx_field_size && !target_size)) {
10926 verbose(env, "bpf verifier is misconfigured\n");
10927 return -EINVAL;
10928 }
10929
10930 if (is_narrower_load && size < target_size) {
10931 u8 shift = bpf_ctx_narrow_access_offset(off, size, size_default) * VERIFIER_EIGHT;
10932 if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
10933 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
10934 return -EINVAL;
10935 }
10936 if (ctx_field_size <= VERIFIER_FOUR) {
10937 if (shift) {
10938 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, insn->dst_reg, shift);
10939 }
10940 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, ((1 << size) * VERIFIER_EIGHT) - 1);
10941 } else {
10942 if (shift) {
10943 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, insn->dst_reg, shift);
10944 }
10945 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, ((1ULL << size) * VERIFIER_EIGHT) - 1);
10946 }
10947 }
10948
10949 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10950 if (!new_prog) {
10951 return -ENOMEM;
10952 }
10953
10954 delta += cnt - 1;
10955
10956 /* keep walking new program and skip insns we just inserted */
10957 env->prog = new_prog;
10958 insn = new_prog->insnsi + i + delta;
10959 }
10960
10961 return 0;
10962 }
10963
jit_subprogs(struct bpf_verifier_env *env)10964 static int jit_subprogs(struct bpf_verifier_env *env)
10965 {
10966 struct bpf_prog *prog = env->prog, **func, *tmp;
10967 int i, j, subprog_start, subprog_end = 0, len, subprog;
10968 struct bpf_map *map_ptr;
10969 struct bpf_insn *insn;
10970 void *old_bpf_func;
10971 int err, num_exentries;
10972
10973 if (env->subprog_cnt <= 1) {
10974 return 0;
10975 }
10976
10977 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
10978 if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
10979 continue;
10980 }
10981 /* Upon error here we cannot fall back to interpreter but
10982 * need a hard reject of the program. Thus -EFAULT is
10983 * propagated in any case.
10984 */
10985 subprog = find_subprog(env, i + insn->imm + 1);
10986 if (subprog < 0) {
10987 WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", i + insn->imm + 1);
10988 return -EFAULT;
10989 }
10990 /* temporarily remember subprog id inside insn instead of
10991 * aux_data, since next loop will split up all insns into funcs
10992 */
10993 insn->off = subprog;
10994 /* remember original imm in case JIT fails and fallback
10995 * to interpreter will be needed
10996 */
10997 env->insn_aux_data[i].call_imm = insn->imm;
10998 /* point imm to __bpf_call_base+1 from JITs point of view */
10999 insn->imm = 1;
11000 }
11001
11002 err = bpf_prog_alloc_jited_linfo(prog);
11003 if (err) {
11004 goto out_undo_insn;
11005 }
11006
11007 err = -ENOMEM;
11008 func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
11009 if (!func) {
11010 goto out_undo_insn;
11011 }
11012
11013 for (i = 0; i < env->subprog_cnt; i++) {
11014 subprog_start = subprog_end;
11015 subprog_end = env->subprog_info[i + 1].start;
11016
11017 len = subprog_end - subprog_start;
11018 /* BPF_PROG_RUN doesn't call subprogs directly,
11019 * hence main prog stats include the runtime of subprogs.
11020 * subprogs don't have IDs and not reachable via prog_get_next_id
11021 * func[i]->aux->stats will never be accessed and stays NULL
11022 */
11023 func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
11024 if (!func[i]) {
11025 goto out_free;
11026 }
11027 memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], len * sizeof(struct bpf_insn));
11028 func[i]->type = prog->type;
11029 func[i]->len = len;
11030 if (bpf_prog_calc_tag(func[i])) {
11031 goto out_free;
11032 }
11033 func[i]->is_func = 1;
11034 func[i]->aux->func_idx = i;
11035 /* Below members will be freed only at prog->aux */
11036 func[i]->aux->btf = prog->aux->btf;
11037 func[i]->aux->func_info = prog->aux->func_info;
11038 func[i]->aux->poke_tab = prog->aux->poke_tab;
11039 func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
11040
11041 for (j = 0; j < prog->aux->size_poke_tab; j++) {
11042 struct bpf_jit_poke_descriptor *poke;
11043
11044 poke = &prog->aux->poke_tab[j];
11045 if (poke->insn_idx < subprog_end && poke->insn_idx >= subprog_start) {
11046 poke->aux = func[i]->aux;
11047 }
11048 }
11049
11050 /* Use bpf_prog_F_tag to indicate functions in stack traces.
11051 * Long term would need debug info to populate names
11052 */
11053 func[i]->aux->name[0] = 'F';
11054 func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
11055 func[i]->jit_requested = 1;
11056 func[i]->aux->linfo = prog->aux->linfo;
11057 func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11058 func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11059 func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11060 num_exentries = 0;
11061 insn = func[i]->insnsi;
11062 for (j = 0; j < func[i]->len; j++, insn++) {
11063 if (BPF_CLASS(insn->code) == BPF_LDX && BPF_MODE(insn->code) == BPF_PROBE_MEM) {
11064 num_exentries++;
11065 }
11066 }
11067 func[i]->aux->num_exentries = num_exentries;
11068 func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
11069 func[i] = bpf_int_jit_compile(func[i]);
11070 if (!func[i]->jited) {
11071 err = -ENOTSUPP;
11072 goto out_free;
11073 }
11074 cond_resched();
11075 }
11076
11077 /* at this point all bpf functions were successfully JITed
11078 * now populate all bpf_calls with correct addresses and
11079 * run last pass of JIT
11080 */
11081 for (i = 0; i < env->subprog_cnt; i++) {
11082 insn = func[i]->insnsi;
11083 for (j = 0; j < func[i]->len; j++, insn++) {
11084 if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11085 continue;
11086 }
11087 subprog = insn->off;
11088 insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - __bpf_call_base;
11089 }
11090
11091 /* we use the aux data to keep a list of the start addresses
11092 * of the JITed images for each function in the program
11093 *
11094 * for some architectures, such as powerpc64, the imm field
11095 * might not be large enough to hold the offset of the start
11096 * address of the callee's JITed image from __bpf_call_base
11097 *
11098 * in such cases, we can lookup the start address of a callee
11099 * by using its subprog id, available from the off field of
11100 * the call instruction, as an index for this list
11101 */
11102 func[i]->aux->func = func;
11103 func[i]->aux->func_cnt = env->subprog_cnt;
11104 }
11105 for (i = 0; i < env->subprog_cnt; i++) {
11106 old_bpf_func = func[i]->bpf_func;
11107 tmp = bpf_int_jit_compile(func[i]);
11108 if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
11109 verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
11110 err = -ENOTSUPP;
11111 goto out_free;
11112 }
11113 cond_resched();
11114 }
11115
11116 /* finally lock prog and jit images for all functions and
11117 * populate kallsysm
11118 */
11119 for (i = 0; i < env->subprog_cnt; i++) {
11120 bpf_prog_lock_ro(func[i]);
11121 bpf_prog_kallsyms_add(func[i]);
11122 }
11123
11124 /* Last step: make now unused interpreter insns from main
11125 * prog consistent for later dump requests, so they can
11126 * later look the same as if they were interpreted only.
11127 */
11128 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11129 if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11130 continue;
11131 }
11132 insn->off = env->insn_aux_data[i].call_imm;
11133 subprog = find_subprog(env, i + insn->off + 1);
11134 insn->imm = subprog;
11135 }
11136
11137 prog->jited = 1;
11138 prog->bpf_func = func[0]->bpf_func;
11139 prog->aux->func = func;
11140 prog->aux->func_cnt = env->subprog_cnt;
11141 bpf_prog_free_unused_jited_linfo(prog);
11142 return 0;
11143 out_free:
11144 /* We failed JIT'ing, so at this point we need to unregister poke
11145 * descriptors from subprogs, so that kernel is not attempting to
11146 * patch it anymore as we're freeing the subprog JIT memory.
11147 */
11148 for (i = 0; i < prog->aux->size_poke_tab; i++) {
11149 map_ptr = prog->aux->poke_tab[i].tail_call.map;
11150 map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11151 }
11152 /* At this point we're guaranteed that poke descriptors are not
11153 * live anymore. We can just unlink its descriptor table as it's
11154 * released with the main prog.
11155 */
11156 for (i = 0; i < env->subprog_cnt; i++) {
11157 if (!func[i]) {
11158 continue;
11159 }
11160 func[i]->aux->poke_tab = NULL;
11161 bpf_jit_free(func[i]);
11162 }
11163 kfree(func);
11164 out_undo_insn:
11165 /* cleanup main prog to be interpreted */
11166 prog->jit_requested = 0;
11167 for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11168 if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11169 continue;
11170 }
11171 insn->off = 0;
11172 insn->imm = env->insn_aux_data[i].call_imm;
11173 }
11174 bpf_prog_free_jited_linfo(prog);
11175 return err;
11176 }
11177
fixup_call_args(struct bpf_verifier_env *env)11178 static int fixup_call_args(struct bpf_verifier_env *env)
11179 {
11180 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11181 struct bpf_prog *prog = env->prog;
11182 struct bpf_insn *insn = prog->insnsi;
11183 int i, depth;
11184 #endif
11185 int err = 0;
11186
11187 if (env->prog->jit_requested && !bpf_prog_is_dev_bound(env->prog->aux)) {
11188 err = jit_subprogs(env);
11189 if (err == 0) {
11190 return 0;
11191 }
11192 if (err == -EFAULT) {
11193 return err;
11194 }
11195 }
11196 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11197 if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11198 /* When JIT fails the progs with bpf2bpf calls and tail_calls
11199 * have to be rejected, since interpreter doesn't support them yet.
11200 */
11201 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11202 return -EINVAL;
11203 }
11204 for (i = 0; i < prog->len; i++, insn++) {
11205 if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11206 continue;
11207 }
11208 depth = get_callee_stack_depth(env, insn, i);
11209 if (depth < 0) {
11210 return depth;
11211 }
11212 bpf_patch_call_args(insn, depth);
11213 }
11214 err = 0;
11215 #endif
11216 return err;
11217 }
11218
11219 /* fixup insn->imm field of bpf_call instructions
11220 * and inline eligible helpers as explicit sequence of BPF instructions
11221 *
11222 * this function is called after eBPF program passed verification
11223 */
fixup_bpf_calls(struct bpf_verifier_env *env)11224 static int fixup_bpf_calls(struct bpf_verifier_env *env)
11225 {
11226 struct bpf_prog *prog = env->prog;
11227 bool expect_blinding = bpf_jit_blinding_enabled(prog);
11228 struct bpf_insn *insn = prog->insnsi;
11229 const struct bpf_func_proto *fn;
11230 const int insn_cnt = prog->len;
11231 const struct bpf_map_ops *ops;
11232 struct bpf_insn_aux_data *aux;
11233 struct bpf_insn insn_buf[VERIFIER_SIXTEEN];
11234 struct bpf_prog *new_prog;
11235 struct bpf_map *map_ptr;
11236 int i, ret, cnt, delta = 0;
11237
11238 for (i = 0; i < insn_cnt; i++, insn++) {
11239 if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
11240 insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
11241 bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
11242 bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11243 struct bpf_insn *patchlet;
11244 struct bpf_insn chk_and_div[] = {
11245 /* [R,W]x div 0 -> 0 */
11246 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JNE | BPF_K, insn->src_reg, 0, 2, 0),
11247 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
11248 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11249 *insn,
11250 };
11251 struct bpf_insn chk_and_mod[] = {
11252 /* [R,W]x mod 0 -> [R,W]x */
11253 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JEQ | BPF_K, insn->src_reg, 0, 1 + (is64 ? 0 : 1), 0),
11254 *insn,
11255 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11256 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
11257 };
11258
11259 patchlet = isdiv ? chk_and_div : chk_and_mod;
11260 cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod) - (is64 ? 0x2 : 0);
11261
11262 new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
11263 if (!new_prog) {
11264 return -ENOMEM;
11265 }
11266
11267 delta += cnt - 1;
11268 env->prog = prog = new_prog;
11269 insn = new_prog->insnsi + i + delta;
11270 continue;
11271 }
11272
11273 if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) {
11274 cnt = env->ops->gen_ld_abs(insn, insn_buf);
11275 if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11276 verbose(env, "bpf verifier is misconfigured\n");
11277 return -EINVAL;
11278 }
11279
11280 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11281 if (!new_prog) {
11282 return -ENOMEM;
11283 }
11284
11285 delta += cnt - 1;
11286 env->prog = prog = new_prog;
11287 insn = new_prog->insnsi + i + delta;
11288 continue;
11289 }
11290
11291 if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
11292 const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
11293 const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
11294 struct bpf_insn insn_buf_in[VERIFIER_SIXTEEN];
11295 struct bpf_insn *patch = &insn_buf_in[0];
11296 bool issrc, isneg, isimm;
11297 u32 off_reg;
11298
11299 aux = &env->insn_aux_data[i + delta];
11300 if (!aux->alu_state || aux->alu_state == BPF_ALU_NON_POINTER) {
11301 continue;
11302 }
11303
11304 isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
11305 issrc = (aux->alu_state & BPF_ALU_SANITIZE) == BPF_ALU_SANITIZE_SRC;
11306 isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
11307
11308 off_reg = issrc ? insn->src_reg : insn->dst_reg;
11309 if (isimm) {
11310 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11311 } else {
11312 if (isneg) {
11313 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11314 }
11315 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11316 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
11317 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
11318 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
11319 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, VERIFIER_SIXTYTHREE);
11320 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
11321 }
11322 if (!issrc) {
11323 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
11324 }
11325 insn->src_reg = BPF_REG_AX;
11326 if (isneg) {
11327 insn->code = insn->code == code_add ? code_sub : code_add;
11328 }
11329 *patch++ = *insn;
11330 if (issrc && isneg && !isimm) {
11331 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11332 }
11333 cnt = patch - insn_buf_in;
11334
11335 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf_in, cnt);
11336 if (!new_prog) {
11337 return -ENOMEM;
11338 }
11339
11340 delta += cnt - 1;
11341 env->prog = prog = new_prog;
11342 insn = new_prog->insnsi + i + delta;
11343 continue;
11344 }
11345
11346 if (insn->code != (BPF_JMP | BPF_CALL)) {
11347 continue;
11348 }
11349 if (insn->src_reg == BPF_PSEUDO_CALL) {
11350 continue;
11351 }
11352
11353 if (insn->imm == BPF_FUNC_get_route_realm) {
11354 prog->dst_needed = 1;
11355 }
11356 if (insn->imm == BPF_FUNC_get_prandom_u32) {
11357 bpf_user_rnd_init_once();
11358 }
11359 if (insn->imm == BPF_FUNC_override_return) {
11360 prog->kprobe_override = 1;
11361 }
11362 if (insn->imm == BPF_FUNC_tail_call) {
11363 /* If we tail call into other programs, we
11364 * cannot make any assumptions since they can
11365 * be replaced dynamically during runtime in
11366 * the program array.
11367 */
11368 prog->cb_access = 1;
11369 if (!allow_tail_call_in_subprogs(env)) {
11370 prog->aux->stack_depth = MAX_BPF_STACK;
11371 }
11372 prog->aux->max_pkt_offset = MAX_PACKET_OFF;
11373
11374 /* mark bpf_tail_call as different opcode to avoid
11375 * conditional branch in the interpeter for every normal
11376 * call and to prevent accidental JITing by JIT compiler
11377 * that doesn't support bpf_tail_call yet
11378 */
11379 insn->imm = 0;
11380 insn->code = BPF_JMP | BPF_TAIL_CALL;
11381
11382 aux = &env->insn_aux_data[i + delta];
11383 if (env->bpf_capable && !expect_blinding && prog->jit_requested && !bpf_map_key_poisoned(aux) &&
11384 !bpf_map_ptr_poisoned(aux) && !bpf_map_ptr_unpriv(aux)) {
11385 struct bpf_jit_poke_descriptor desc = {
11386 .reason = BPF_POKE_REASON_TAIL_CALL,
11387 .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11388 .tail_call.key = bpf_map_key_immediate(aux),
11389 .insn_idx = i + delta,
11390 };
11391
11392 ret = bpf_jit_add_poke_descriptor(prog, &desc);
11393 if (ret < 0) {
11394 verbose(env, "adding tail call poke descriptor failed\n");
11395 return ret;
11396 }
11397
11398 insn->imm = ret + 1;
11399 continue;
11400 }
11401
11402 if (!bpf_map_ptr_unpriv(aux)) {
11403 continue;
11404 }
11405
11406 /* instead of changing every JIT dealing with tail_call
11407 * emit two extra insns:
11408 * if (index >= max_entries) goto out;
11409 * index &= array->index_mask;
11410 * to avoid out-of-bounds cpu speculation
11411 */
11412 if (bpf_map_ptr_poisoned(aux)) {
11413 verbose(env, "tail_call abusing map_ptr\n");
11414 return -EINVAL;
11415 }
11416
11417 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11418 insn_buf[0x0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, map_ptr->max_entries, 0x2);
11419 insn_buf[0x1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, container_of(map_ptr, struct bpf_array, map)->index_mask);
11420 insn_buf[0x2] = *insn;
11421 cnt = VERIFIER_THREE;
11422 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11423 if (!new_prog) {
11424 return -ENOMEM;
11425 }
11426
11427 delta += cnt - 1;
11428 env->prog = prog = new_prog;
11429 insn = new_prog->insnsi + i + delta;
11430 continue;
11431 }
11432
11433 /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11434 * and other inlining handlers are currently limited to 64 bit
11435 * only.
11436 */
11437 if (prog->jit_requested && BITS_PER_LONG == VERIFIER_SIXTYFOUR &&
11438 (insn->imm == BPF_FUNC_map_lookup_elem || insn->imm == BPF_FUNC_map_update_elem ||
11439 insn->imm == BPF_FUNC_map_delete_elem || insn->imm == BPF_FUNC_map_push_elem ||
11440 insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem)) {
11441 aux = &env->insn_aux_data[i + delta];
11442 if (bpf_map_ptr_poisoned(aux)) {
11443 goto patch_call_imm;
11444 }
11445
11446 map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11447 ops = map_ptr->ops;
11448 if (insn->imm == BPF_FUNC_map_lookup_elem && ops->map_gen_lookup) {
11449 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11450 if (cnt == -EOPNOTSUPP) {
11451 goto patch_map_ops_generic;
11452 }
11453 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11454 verbose(env, "bpf verifier is misconfigured\n");
11455 return -EINVAL;
11456 }
11457
11458 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11459 if (!new_prog) {
11460 return -ENOMEM;
11461 }
11462
11463 delta += cnt - 1;
11464 env->prog = prog = new_prog;
11465 insn = new_prog->insnsi + i + delta;
11466 continue;
11467 }
11468
11469 BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, (void *(*)(struct bpf_map * map, void *key)) NULL));
11470 BUILD_BUG_ON(!__same_type(ops->map_delete_elem, (int (*)(struct bpf_map * map, void *key)) NULL));
11471 BUILD_BUG_ON(!__same_type(ops->map_update_elem,
11472 (int (*)(struct bpf_map * map, void *key, void *value, u64 flags)) NULL));
11473 BUILD_BUG_ON(
11474 !__same_type(ops->map_push_elem, (int (*)(struct bpf_map * map, void *value, u64 flags)) NULL));
11475 BUILD_BUG_ON(!__same_type(ops->map_pop_elem, (int (*)(struct bpf_map * map, void *value)) NULL));
11476 BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map * map, void *value)) NULL));
11477 patch_map_ops_generic:
11478 switch (insn->imm) {
11479 case BPF_FUNC_map_lookup_elem:
11480 insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - __bpf_call_base;
11481 continue;
11482 case BPF_FUNC_map_update_elem:
11483 insn->imm = BPF_CAST_CALL(ops->map_update_elem) - __bpf_call_base;
11484 continue;
11485 case BPF_FUNC_map_delete_elem:
11486 insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - __bpf_call_base;
11487 continue;
11488 case BPF_FUNC_map_push_elem:
11489 insn->imm = BPF_CAST_CALL(ops->map_push_elem) - __bpf_call_base;
11490 continue;
11491 case BPF_FUNC_map_pop_elem:
11492 insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - __bpf_call_base;
11493 continue;
11494 case BPF_FUNC_map_peek_elem:
11495 insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - __bpf_call_base;
11496 continue;
11497 default:
11498 break;
11499 }
11500
11501 goto patch_call_imm;
11502 }
11503
11504 if (prog->jit_requested && BITS_PER_LONG == VERIFIER_SIXTYFOUR && insn->imm == BPF_FUNC_jiffies64) {
11505 struct bpf_insn ld_jiffies_addr[2] = {
11506 BPF_LD_IMM64(BPF_REG_0, (unsigned long)&jiffies),
11507 };
11508
11509 insn_buf[0x0] = ld_jiffies_addr[0];
11510 insn_buf[0x1] = ld_jiffies_addr[1];
11511 insn_buf[0x2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
11512 cnt = VERIFIER_THREE;
11513
11514 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11515 if (!new_prog) {
11516 return -ENOMEM;
11517 }
11518
11519 delta += cnt - 1;
11520 env->prog = prog = new_prog;
11521 insn = new_prog->insnsi + i + delta;
11522 continue;
11523 }
11524
11525 patch_call_imm:
11526 fn = env->ops->get_func_proto(insn->imm, env->prog);
11527 /* all functions that have prototype and verifier allowed
11528 * programs to call them, must be real in-kernel functions
11529 */
11530 if (!fn->func) {
11531 verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(insn->imm), insn->imm);
11532 return -EFAULT;
11533 }
11534 insn->imm = fn->func - __bpf_call_base;
11535 }
11536
11537 /* Since poke tab is now finalized, publish aux to tracker. */
11538 for (i = 0; i < prog->aux->size_poke_tab; i++) {
11539 map_ptr = prog->aux->poke_tab[i].tail_call.map;
11540 if (!map_ptr->ops->map_poke_track || !map_ptr->ops->map_poke_untrack || !map_ptr->ops->map_poke_run) {
11541 verbose(env, "bpf verifier is misconfigured\n");
11542 return -EINVAL;
11543 }
11544
11545 ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11546 if (ret < 0) {
11547 verbose(env, "tracking tail call prog failed\n");
11548 return ret;
11549 }
11550 }
11551
11552 return 0;
11553 }
11554
free_states(struct bpf_verifier_env *env)11555 static void free_states(struct bpf_verifier_env *env)
11556 {
11557 struct bpf_verifier_state_list *sl, *sln;
11558 int i;
11559
11560 sl = env->free_list;
11561 while (sl) {
11562 sln = sl->next;
11563 free_verifier_state(&sl->state, false);
11564 kfree(sl);
11565 sl = sln;
11566 }
11567 env->free_list = NULL;
11568
11569 if (!env->explored_states) {
11570 return;
11571 }
11572
11573 for (i = 0; i < state_htab_size(env); i++) {
11574 sl = env->explored_states[i];
11575
11576 while (sl) {
11577 sln = sl->next;
11578 free_verifier_state(&sl->state, false);
11579 kfree(sl);
11580 sl = sln;
11581 }
11582 env->explored_states[i] = NULL;
11583 }
11584 }
11585
do_check_common(struct bpf_verifier_env *env, int subprog)11586 static int do_check_common(struct bpf_verifier_env *env, int subprog)
11587 {
11588 bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11589 struct bpf_verifier_state *state;
11590 struct bpf_reg_state *regs;
11591 int ret, i;
11592
11593 env->prev_linfo = NULL;
11594 env->pass_cnt++;
11595
11596 state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11597 if (!state) {
11598 return -ENOMEM;
11599 }
11600 state->curframe = 0;
11601 state->speculative = false;
11602 state->branches = 1;
11603 state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11604 if (!state->frame[0]) {
11605 kfree(state);
11606 return -ENOMEM;
11607 }
11608 env->cur_state = state;
11609 init_func_state(env, state->frame[0], BPF_MAIN_FUNC /* callsite */, 0 /* frameno */, subprog);
11610
11611 regs = state->frame[state->curframe]->regs;
11612 if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11613 ret = btf_prepare_func_args(env, subprog, regs);
11614 if (ret) {
11615 goto out;
11616 }
11617 for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11618 if (regs[i].type == PTR_TO_CTX) {
11619 mark_reg_known_zero(env, regs, i);
11620 } else if (regs[i].type == SCALAR_VALUE) {
11621 mark_reg_unknown(env, regs, i);
11622 }
11623 }
11624 } else {
11625 /* 1st arg to a function */
11626 regs[BPF_REG_1].type = PTR_TO_CTX;
11627 mark_reg_known_zero(env, regs, BPF_REG_1);
11628 ret = btf_check_func_arg_match(env, subprog, regs);
11629 if (ret == -EFAULT) {
11630 /* unlikely verifier bug. abort.
11631 * ret == 0 and ret < 0 are sadly acceptable for
11632 * main() function due to backward compatibility.
11633 * Like socket filter program may be written as:
11634 * int bpf_prog(struct pt_regs *ctx)
11635 * and never dereference that ctx in the program.
11636 * 'struct pt_regs' is a type mismatch for socket
11637 * filter that should be using 'struct __sk_buff'.
11638 */
11639 goto out;
11640 }
11641 }
11642
11643 ret = do_check(env);
11644 out:
11645 /* check for NULL is necessary, since cur_state can be freed inside
11646 * do_check() under memory pressure.
11647 */
11648 if (env->cur_state) {
11649 free_verifier_state(env->cur_state, true);
11650 env->cur_state = NULL;
11651 }
11652 while (!pop_stack(env, NULL, NULL, false)) {
11653 ;
11654 }
11655 if (!ret && pop_log) {
11656 bpf_vlog_reset(&env->log, 0);
11657 }
11658 free_states(env);
11659 return ret;
11660 }
11661
11662 /* Verify all global functions in a BPF program one by one based on their BTF.
11663 * All global functions must pass verification. Otherwise the whole program is rejected.
11664 * Consider:
11665 * int bar(int);
11666 * int foo(int f)
11667 * {
11668 * return bar(f);
11669 * }
11670 * int bar(int b)
11671 * {
11672 * ...
11673 * }
11674 * foo() will be verified first for R1=any_scalar_value. During verification it
11675 * will be assumed that bar() already verified successfully and call to bar()
11676 * from foo() will be checked for type match only. Later bar() will be verified
11677 * independently to check that it's safe for R1=any_scalar_value.
11678 */
do_check_subprogs(struct bpf_verifier_env *env)11679 static int do_check_subprogs(struct bpf_verifier_env *env)
11680 {
11681 struct bpf_prog_aux *aux = env->prog->aux;
11682 int i, ret;
11683
11684 if (!aux->func_info) {
11685 return 0;
11686 }
11687
11688 for (i = 1; i < env->subprog_cnt; i++) {
11689 if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) {
11690 continue;
11691 }
11692 env->insn_idx = env->subprog_info[i].start;
11693 WARN_ON_ONCE(env->insn_idx == 0);
11694 ret = do_check_common(env, i);
11695 if (ret) {
11696 return ret;
11697 } else if (env->log.level & BPF_LOG_LEVEL) {
11698 verbose(env, "Func#%d is safe for any args that match its prototype\n", i);
11699 }
11700 }
11701 return 0;
11702 }
11703
do_check_main(struct bpf_verifier_env *env)11704 static int do_check_main(struct bpf_verifier_env *env)
11705 {
11706 int ret;
11707
11708 env->insn_idx = 0;
11709 ret = do_check_common(env, 0);
11710 if (!ret) {
11711 env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11712 }
11713 return ret;
11714 }
11715
print_verification_stats(struct bpf_verifier_env *env)11716 static void print_verification_stats(struct bpf_verifier_env *env)
11717 {
11718 int i;
11719
11720 if (env->log.level & BPF_LOG_STATS) {
11721 verbose(env, "verification time %lld usec\n", div_u64(env->verification_time, VERIFIER_ONETHOUSAND));
11722 verbose(env, "stack depth ");
11723 for (i = 0; i < env->subprog_cnt; i++) {
11724 u32 depth = env->subprog_info[i].stack_depth;
11725
11726 verbose(env, "%d", depth);
11727 if (i + 1 < env->subprog_cnt) {
11728 verbose(env, "+");
11729 }
11730 }
11731 verbose(env, "\n");
11732 }
11733 verbose(env,
11734 "processed %d insns (limit %d) max_states_per_insn %d "
11735 "total_states %d peak_states %d mark_read %d\n",
11736 env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, env->max_states_per_insn, env->total_states,
11737 env->peak_states, env->longest_mark_read_walk);
11738 }
11739
check_struct_ops_btf_id(struct bpf_verifier_env *env)11740 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11741 {
11742 const struct btf_type *t, *func_proto;
11743 const struct bpf_struct_ops *st_ops;
11744 const struct btf_member *member;
11745 struct bpf_prog *prog = env->prog;
11746 u32 btf_id, member_idx;
11747 const char *mname;
11748
11749 if (!prog->gpl_compatible) {
11750 verbose(env, "struct ops programs must have a GPL compatible license\n");
11751 return -EINVAL;
11752 }
11753
11754 btf_id = prog->aux->attach_btf_id;
11755 st_ops = bpf_struct_ops_find(btf_id);
11756 if (!st_ops) {
11757 verbose(env, "attach_btf_id %u is not a supported struct\n", btf_id);
11758 return -ENOTSUPP;
11759 }
11760
11761 t = st_ops->type;
11762 member_idx = prog->expected_attach_type;
11763 if (member_idx >= btf_type_vlen(t)) {
11764 verbose(env, "attach to invalid member idx %u of struct %s\n", member_idx, st_ops->name);
11765 return -EINVAL;
11766 }
11767
11768 member = &btf_type_member(t)[member_idx];
11769 mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11770 func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL);
11771 if (!func_proto) {
11772 verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", mname, member_idx, st_ops->name);
11773 return -EINVAL;
11774 }
11775
11776 if (st_ops->check_member) {
11777 int err = st_ops->check_member(t, member);
11778 if (err) {
11779 verbose(env, "attach to unsupported member %s of struct %s\n", mname, st_ops->name);
11780 return err;
11781 }
11782 }
11783
11784 prog->aux->attach_func_proto = func_proto;
11785 prog->aux->attach_func_name = mname;
11786 env->ops = st_ops->verifier_ops;
11787
11788 return 0;
11789 }
11790 #define SECURITY_PREFIX "security_"
11791
check_attach_modify_return(unsigned long addr, const char *func_name)11792 static int check_attach_modify_return(unsigned long addr, const char *func_name)
11793 {
11794 if (within_error_injection_list(addr) || !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) {
11795 return 0;
11796 }
11797 return -EINVAL;
11798 }
11799
11800 /* non exhaustive list of sleepable bpf_lsm_*() functions */
11801 BTF_SET_START(btf_sleepable_lsm_hooks)
11802 #ifdef CONFIG_BPF_LSM
11803 BTF_ID(func, bpf_lsm_bprm_committed_creds)
11804 #else
11805 BTF_ID_UNUSED
11806 #endif
11807 BTF_SET_END(btf_sleepable_lsm_hooks)
11808
check_sleepable_lsm_hook(u32 btf_id)11809 static int check_sleepable_lsm_hook(u32 btf_id)
11810 {
11811 return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11812 }
11813
11814 /* list of non-sleepable functions that are otherwise on
11815 * ALLOW_ERROR_INJECTION list
11816 */
11817 BTF_SET_START(btf_non_sleepable_error_inject)
11818 /* Three functions below can be called from sleepable and non-sleepable context.
11819 * Assume non-sleepable from bpf safety point of view.
11820 */
11821 BTF_ID(func, __add_to_page_cache_locked)
11822 BTF_ID(func, should_fail_alloc_page)
11823 BTF_ID(func, should_failslab)
11824 BTF_SET_END(btf_non_sleepable_error_inject)
11825
check_non_sleepable_error_inject(u32 btf_id)11826 static int check_non_sleepable_error_inject(u32 btf_id)
11827 {
11828 return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11829 }
11830
bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, u32 btf_id, struct bpf_attach_target_info *tgt_info)11831 int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog,
11832 u32 btf_id, struct bpf_attach_target_info *tgt_info)
11833 {
11834 bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
11835 const char prefix[] = "btf_trace_";
11836 int ret = 0, subprog = -1, i;
11837 const struct btf_type *t;
11838 bool conservative = true;
11839 const char *tname;
11840 struct btf *btf;
11841 long addr = 0;
11842
11843 if (!btf_id) {
11844 bpf_log(log, "Tracing programs must provide btf_id\n");
11845 return -EINVAL;
11846 }
11847 btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
11848 if (!btf) {
11849 bpf_log(log, "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
11850 return -EINVAL;
11851 }
11852 t = btf_type_by_id(btf, btf_id);
11853 if (!t) {
11854 bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
11855 return -EINVAL;
11856 }
11857 tname = btf_name_by_offset(btf, t->name_off);
11858 if (!tname) {
11859 bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
11860 return -EINVAL;
11861 }
11862 if (tgt_prog) {
11863 struct bpf_prog_aux *aux = tgt_prog->aux;
11864
11865 for (i = 0; i < aux->func_info_cnt; i++) {
11866 if (aux->func_info[i].type_id == btf_id) {
11867 subprog = i;
11868 break;
11869 }
11870 }
11871 if (subprog == -1) {
11872 bpf_log(log, "Subprog %s doesn't exist\n", tname);
11873 return -EINVAL;
11874 }
11875 conservative = aux->func_info_aux[subprog].unreliable;
11876 if (prog_extension) {
11877 if (conservative) {
11878 bpf_log(log, "Cannot replace static functions\n");
11879 return -EINVAL;
11880 }
11881 if (!prog->jit_requested) {
11882 bpf_log(log, "Extension programs should be JITed\n");
11883 return -EINVAL;
11884 }
11885 }
11886 if (!tgt_prog->jited) {
11887 bpf_log(log, "Can attach to only JITed progs\n");
11888 return -EINVAL;
11889 }
11890 if (tgt_prog->type == prog->type) {
11891 /* Cannot fentry/fexit another fentry/fexit program.
11892 * Cannot attach program extension to another extension.
11893 * It's ok to attach fentry/fexit to extension program.
11894 */
11895 bpf_log(log, "Cannot recursively attach\n");
11896 return -EINVAL;
11897 }
11898 if (tgt_prog->type == BPF_PROG_TYPE_TRACING && prog_extension &&
11899 (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
11900 /* Program extensions can extend all program types
11901 * except fentry/fexit. The reason is the following.
11902 * The fentry/fexit programs are used for performance
11903 * analysis, stats and can be attached to any program
11904 * type except themselves. When extension program is
11905 * replacing XDP function it is necessary to allow
11906 * performance analysis of all functions. Both original
11907 * XDP program and its program extension. Hence
11908 * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
11909 * allowed. If extending of fentry/fexit was allowed it
11910 * would be possible to create long call chain
11911 * fentry->extension->fentry->extension beyond
11912 * reasonable stack size. Hence extending fentry is not
11913 * allowed.
11914 */
11915 bpf_log(log, "Cannot extend fentry/fexit\n");
11916 return -EINVAL;
11917 }
11918 } else {
11919 if (prog_extension) {
11920 bpf_log(log, "Cannot replace kernel functions\n");
11921 return -EINVAL;
11922 }
11923 }
11924
11925 switch (prog->expected_attach_type) {
11926 case BPF_TRACE_RAW_TP:
11927 if (tgt_prog) {
11928 bpf_log(log, "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
11929 return -EINVAL;
11930 }
11931 if (!btf_type_is_typedef(t)) {
11932 bpf_log(log, "attach_btf_id %u is not a typedef\n", btf_id);
11933 return -EINVAL;
11934 }
11935 if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
11936 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", btf_id, tname);
11937 return -EINVAL;
11938 }
11939 tname += sizeof(prefix) - 1;
11940 t = btf_type_by_id(btf, t->type);
11941 if (!btf_type_is_ptr(t)) {
11942 /* should never happen in valid vmlinux build */
11943 return -EINVAL;
11944 }
11945 t = btf_type_by_id(btf, t->type);
11946 if (!btf_type_is_func_proto(t)) {
11947 /* should never happen in valid vmlinux build */
11948 return -EINVAL;
11949 }
11950
11951 break;
11952 case BPF_TRACE_ITER:
11953 if (!btf_type_is_func(t)) {
11954 bpf_log(log, "attach_btf_id %u is not a function\n", btf_id);
11955 return -EINVAL;
11956 }
11957 t = btf_type_by_id(btf, t->type);
11958 if (!btf_type_is_func_proto(t)) {
11959 return -EINVAL;
11960 }
11961 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
11962 if (ret) {
11963 return ret;
11964 }
11965 break;
11966 default:
11967 if (!prog_extension) {
11968 return -EINVAL;
11969 }
11970 fallthrough;
11971 case BPF_MODIFY_RETURN:
11972 case BPF_LSM_MAC:
11973 case BPF_TRACE_FENTRY:
11974 case BPF_TRACE_FEXIT:
11975 if (!btf_type_is_func(t)) {
11976 bpf_log(log, "attach_btf_id %u is not a function\n", btf_id);
11977 return -EINVAL;
11978 }
11979 if (prog_extension && btf_check_type_match(log, prog, btf, t)) {
11980 return -EINVAL;
11981 }
11982 t = btf_type_by_id(btf, t->type);
11983 if (!btf_type_is_func_proto(t)) {
11984 return -EINVAL;
11985 }
11986
11987 if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
11988 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
11989 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) {
11990 return -EINVAL;
11991 }
11992
11993 if (tgt_prog && conservative) {
11994 t = NULL;
11995 }
11996
11997 ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
11998 if (ret < 0) {
11999 return ret;
12000 }
12001
12002 if (tgt_prog) {
12003 if (subprog == 0) {
12004 addr = (long)tgt_prog->bpf_func;
12005 } else {
12006 addr = (long)tgt_prog->aux->func[subprog]->bpf_func;
12007 }
12008 } else {
12009 addr = kallsyms_lookup_name(tname);
12010 if (!addr) {
12011 bpf_log(log, "The address of function %s cannot be found\n", tname);
12012 return -ENOENT;
12013 }
12014 }
12015
12016 if (prog->aux->sleepable) {
12017 ret = -EINVAL;
12018 switch (prog->type) {
12019 case BPF_PROG_TYPE_TRACING:
12020 /* fentry/fexit/fmod_ret progs can be sleepable only if they are
12021 * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12022 */
12023 if (!check_non_sleepable_error_inject(btf_id) && within_error_injection_list(addr)) {
12024 ret = 0;
12025 }
12026 break;
12027 case BPF_PROG_TYPE_LSM:
12028 /* LSM progs check that they are attached to bpf_lsm_*() funcs.
12029 * Only some of them are sleepable.
12030 */
12031 if (check_sleepable_lsm_hook(btf_id)) {
12032 ret = 0;
12033 }
12034 break;
12035 default:
12036 break;
12037 }
12038 if (ret) {
12039 bpf_log(log, "%s is not sleepable\n", tname);
12040 return ret;
12041 }
12042 } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12043 if (tgt_prog) {
12044 bpf_log(log, "can't modify return codes of BPF programs\n");
12045 return -EINVAL;
12046 }
12047 ret = check_attach_modify_return(addr, tname);
12048 if (ret) {
12049 bpf_log(log, "%s() is not modifiable\n", tname);
12050 return ret;
12051 }
12052 }
12053
12054 break;
12055 }
12056 tgt_info->tgt_addr = addr;
12057 tgt_info->tgt_name = tname;
12058 tgt_info->tgt_type = t;
12059 return 0;
12060 }
12061
check_attach_btf_id(struct bpf_verifier_env *env)12062 static int check_attach_btf_id(struct bpf_verifier_env *env)
12063 {
12064 struct bpf_prog *prog = env->prog;
12065 struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12066 struct bpf_attach_target_info tgt_info = {};
12067 u32 btf_id = prog->aux->attach_btf_id;
12068 struct bpf_trampoline *tr;
12069 int ret;
12070 u64 key;
12071
12072 if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM) {
12073 verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12074 return -EINVAL;
12075 }
12076
12077 if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
12078 return check_struct_ops_btf_id(env);
12079 }
12080
12081 if (prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_EXT) {
12082 return 0;
12083 }
12084
12085 ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12086 if (ret) {
12087 return ret;
12088 }
12089
12090 if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12091 /* to make freplace equivalent to their targets, they need to
12092 * inherit env->ops and expected_attach_type for the rest of the
12093 * verification
12094 */
12095 env->ops = bpf_verifier_ops[tgt_prog->type];
12096 prog->expected_attach_type = tgt_prog->expected_attach_type;
12097 }
12098
12099 /* store info about the attachment target that will be used later */
12100 prog->aux->attach_func_proto = tgt_info.tgt_type;
12101 prog->aux->attach_func_name = tgt_info.tgt_name;
12102
12103 if (tgt_prog) {
12104 prog->aux->saved_dst_prog_type = tgt_prog->type;
12105 prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12106 }
12107
12108 if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12109 prog->aux->attach_btf_trace = true;
12110 return 0;
12111 } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12112 if (!bpf_iter_prog_supported(prog)) {
12113 return -EINVAL;
12114 }
12115 return 0;
12116 }
12117
12118 if (prog->type == BPF_PROG_TYPE_LSM) {
12119 ret = bpf_lsm_verify_prog(&env->log, prog);
12120 if (ret < 0) {
12121 return ret;
12122 }
12123 }
12124
12125 key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12126 tr = bpf_trampoline_get(key, &tgt_info);
12127 if (!tr) {
12128 return -ENOMEM;
12129 }
12130
12131 prog->aux->dst_trampoline = tr;
12132 return 0;
12133 }
12134
bpf_get_btf_vmlinux(void)12135 struct btf *bpf_get_btf_vmlinux(void)
12136 {
12137 if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12138 mutex_lock(&bpf_verifier_lock);
12139 if (!btf_vmlinux) {
12140 btf_vmlinux = btf_parse_vmlinux();
12141 }
12142 mutex_unlock(&bpf_verifier_lock);
12143 }
12144 return btf_vmlinux;
12145 }
12146
bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr)12147 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr)
12148 {
12149 u64 start_time = ktime_get_ns();
12150 struct bpf_verifier_env *env;
12151 struct bpf_verifier_log *log;
12152 int i, len, ret = -EINVAL;
12153 bool is_priv;
12154
12155 /* no program is valid */
12156 if (ARRAY_SIZE(bpf_verifier_ops) == 0) {
12157 return -EINVAL;
12158 }
12159
12160 /* 'struct bpf_verifier_env' can be global, but since it's not small,
12161 * allocate/free it every time bpf_check() is called
12162 */
12163 env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
12164 if (!env) {
12165 return -ENOMEM;
12166 }
12167 log = &env->log;
12168
12169 len = (*prog)->len;
12170 env->insn_aux_data = vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
12171 ret = -ENOMEM;
12172 if (!env->insn_aux_data) {
12173 goto err_free_env;
12174 }
12175 for (i = 0; i < len; i++) {
12176 env->insn_aux_data[i].orig_idx = i;
12177 }
12178 env->prog = *prog;
12179 env->ops = bpf_verifier_ops[env->prog->type];
12180 is_priv = bpf_capable();
12181
12182 bpf_get_btf_vmlinux();
12183
12184 /* grab the mutex to protect few globals used by verifier */
12185 if (!is_priv) {
12186 mutex_lock(&bpf_verifier_lock);
12187 }
12188
12189 if (attr->log_level || attr->log_buf || attr->log_size) {
12190 /* user requested verbose verifier output
12191 * and supplied buffer to store the verification trace
12192 */
12193 log->level = attr->log_level;
12194 log->ubuf = (char __user *)(unsigned long)attr->log_buf;
12195 log->len_total = attr->log_size;
12196
12197 /* log attributes have to be sane */
12198 if (!bpf_verifier_log_attr_valid(log)) {
12199 ret = -EINVAL;
12200 goto err_unlock;
12201 }
12202 }
12203
12204 if (IS_ERR(btf_vmlinux)) {
12205 /* Either gcc or pahole or kernel are broken. */
12206 verbose(env, "in-kernel BTF is malformed\n");
12207 ret = PTR_ERR(btf_vmlinux);
12208 goto skip_full_check;
12209 }
12210
12211 env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
12212 if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
12213 env->strict_alignment = true;
12214 }
12215 if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) {
12216 env->strict_alignment = false;
12217 }
12218
12219 env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12220 env->allow_uninit_stack = bpf_allow_uninit_stack();
12221 env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12222 env->bypass_spec_v1 = bpf_bypass_spec_v1();
12223 env->bypass_spec_v4 = bpf_bypass_spec_v4();
12224 env->bpf_capable = bpf_capable();
12225
12226 if (is_priv) {
12227 env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
12228 }
12229
12230 env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER);
12231 ret = -ENOMEM;
12232 if (!env->explored_states) {
12233 goto skip_full_check;
12234 }
12235
12236 ret = check_subprogs(env);
12237 if (ret < 0) {
12238 goto skip_full_check;
12239 }
12240
12241 ret = check_btf_info(env, attr, uattr);
12242 if (ret < 0) {
12243 goto skip_full_check;
12244 }
12245
12246 ret = check_attach_btf_id(env);
12247 if (ret) {
12248 goto skip_full_check;
12249 }
12250
12251 ret = resolve_pseudo_ldimm64(env);
12252 if (ret < 0) {
12253 goto skip_full_check;
12254 }
12255
12256 if (bpf_prog_is_dev_bound(env->prog->aux)) {
12257 ret = bpf_prog_offload_verifier_prep(env->prog);
12258 if (ret) {
12259 goto skip_full_check;
12260 }
12261 }
12262
12263 ret = check_cfg(env);
12264 if (ret < 0) {
12265 goto skip_full_check;
12266 }
12267
12268 ret = do_check_subprogs(env);
12269 ret = ret ?: do_check_main(env);
12270
12271 if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) {
12272 ret = bpf_prog_offload_finalize(env);
12273 }
12274
12275 skip_full_check:
12276 kvfree(env->explored_states);
12277
12278 if (ret == 0) {
12279 ret = check_max_stack_depth(env);
12280 }
12281
12282 /* instruction rewrites happen after this point */
12283 if (is_priv) {
12284 if (ret == 0) {
12285 opt_hard_wire_dead_code_branches(env);
12286 }
12287 if (ret == 0) {
12288 ret = opt_remove_dead_code(env);
12289 }
12290 if (ret == 0) {
12291 ret = opt_remove_nops(env);
12292 }
12293 } else {
12294 if (ret == 0) {
12295 sanitize_dead_code(env);
12296 }
12297 }
12298
12299 if (ret == 0) {
12300 /* program is valid, convert *(u32*)(ctx + off) accesses */
12301 ret = convert_ctx_accesses(env);
12302 }
12303
12304 if (ret == 0) {
12305 ret = fixup_bpf_calls(env);
12306 }
12307
12308 /* do 32-bit optimization after insn patching has done so those patched
12309 * insns could be handled correctly.
12310 */
12311 if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12312 ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12313 env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret : false;
12314 }
12315
12316 if (ret == 0) {
12317 ret = fixup_call_args(env);
12318 }
12319
12320 env->verification_time = ktime_get_ns() - start_time;
12321 print_verification_stats(env);
12322
12323 if (log->level && bpf_verifier_log_full(log)) {
12324 ret = -ENOSPC;
12325 }
12326 if (log->level && !log->ubuf) {
12327 ret = -EFAULT;
12328 goto err_release_maps;
12329 }
12330
12331 if (ret == 0 && env->used_map_cnt) {
12332 /* if program passed verifier, update used_maps in bpf_prog_info */
12333 env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, sizeof(env->used_maps[0]), GFP_KERNEL);
12334
12335 if (!env->prog->aux->used_maps) {
12336 ret = -ENOMEM;
12337 goto err_release_maps;
12338 }
12339
12340 memcpy(env->prog->aux->used_maps, env->used_maps, sizeof(env->used_maps[0]) * env->used_map_cnt);
12341 env->prog->aux->used_map_cnt = env->used_map_cnt;
12342
12343 /* program is valid. Convert pseudo bpf_ld_imm64 into generic
12344 * bpf_ld_imm64 instructions
12345 */
12346 convert_pseudo_ld_imm64(env);
12347 }
12348
12349 if (ret == 0) {
12350 adjust_btf_func(env);
12351 }
12352
12353 err_release_maps:
12354 if (!env->prog->aux->used_maps) {
12355 /* if we didn't copy map pointers into bpf_prog_info, release
12356 * them now. Otherwise free_used_maps() will release them.
12357 */
12358 release_maps(env);
12359 }
12360
12361 /* extension progs temporarily inherit the attach_type of their targets
12362 for verification purposes, so set it back to zero before returning
12363 */
12364 if (env->prog->type == BPF_PROG_TYPE_EXT) {
12365 env->prog->expected_attach_type = 0;
12366 }
12367
12368 *prog = env->prog;
12369 err_unlock:
12370 if (!is_priv) {
12371 mutex_unlock(&bpf_verifier_lock);
12372 }
12373 vfree(env->insn_aux_data);
12374 err_free_env:
12375 kfree(env);
12376 return ret;
12377 }
12378