1 // SPDX-License-Identifier: GPL-2.0-only
2 /* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3  * Copyright (c) 2016 Facebook
4  * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5  */
6 #include <uapi/linux/btf.h>
7 #include <linux/kernel.h>
8 #include <linux/types.h>
9 #include <linux/slab.h>
10 #include <linux/bpf.h>
11 #include <linux/btf.h>
12 #include <linux/bpf_verifier.h>
13 #include <linux/filter.h>
14 #include <net/netlink.h>
15 #include <linux/file.h>
16 #include <linux/vmalloc.h>
17 #include <linux/stringify.h>
18 #include <linux/bsearch.h>
19 #include <linux/sort.h>
20 #include <linux/perf_event.h>
21 #include <linux/ctype.h>
22 #include <linux/error-injection.h>
23 #include <linux/bpf_lsm.h>
24 #include <linux/btf_ids.h>
25 
26 #include "disasm.h"
27 
28 static const struct bpf_verifier_ops *const bpf_verifier_ops[] = {
29 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) [_id] = &_name##_verifier_ops,
30 #define BPF_MAP_TYPE(_id, _ops)
31 #define BPF_LINK_TYPE(_id, _name)
32 #include <linux/bpf_types.h>
33 #undef BPF_PROG_TYPE
34 #undef BPF_MAP_TYPE
35 #undef BPF_LINK_TYPE
36 };
37 
38 /* bpf_check() is a static code analyzer that walks eBPF program
39  * instruction by instruction and updates register/stack state.
40  * All paths of conditional branches are analyzed until 'bpf_exit' insn.
41  *
42  * The first pass is depth-first-search to check that the program is a DAG.
43  * It rejects the following programs:
44  * - larger than BPF_MAXINSNS insns
45  * - if loop is present (detected via back-edge)
46  * - unreachable insns exist (shouldn't be a forest. program = one function)
47  * - out of bounds or malformed jumps
48  * The second pass is all possible path descent from the 1st insn.
49  * Since it's analyzing all pathes through the program, the length of the
50  * analysis is limited to 64k insn, which may be hit even if total number of
51  * insn is less then 4K, but there are too many branches that change stack/regs.
52  * Number of 'branches to be analyzed' is limited to 1k
53  *
54  * On entry to each instruction, each register has a type, and the instruction
55  * changes the types of the registers depending on instruction semantics.
56  * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
57  * copied to R1.
58  *
59  * All registers are 64-bit.
60  * R0 - return register
61  * R1-R5 argument passing registers
62  * R6-R9 callee saved registers
63  * R10 - frame pointer read-only
64  *
65  * At the start of BPF program the register R1 contains a pointer to bpf_context
66  * and has type PTR_TO_CTX.
67  *
68  * Verifier tracks arithmetic operations on pointers in case:
69  *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
70  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
71  * 1st insn copies R10 (which has FRAME_PTR) type into R1
72  * and 2nd arithmetic instruction is pattern matched to recognize
73  * that it wants to construct a pointer to some element within stack.
74  * So after 2nd insn, the register R1 has type PTR_TO_STACK
75  * (and -20 constant is saved for further stack bounds checking).
76  * Meaning that this reg is a pointer to stack plus known immediate constant.
77  *
78  * Most of the time the registers have SCALAR_VALUE type, which
79  * means the register has some value, but it's not a valid pointer.
80  * (like pointer plus pointer becomes SCALAR_VALUE type)
81  *
82  * When verifier sees load or store instructions the type of base register
83  * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
84  * four pointer types recognized by check_mem_access() function.
85  *
86  * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
87  * and the range of [ptr, ptr + map's value_size) is accessible.
88  *
89  * registers used to pass values to function calls are checked against
90  * function argument constraints.
91  *
92  * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
93  * It means that the register type passed to this function must be
94  * PTR_TO_STACK and it will be used inside the function as
95  * 'pointer to map element key'
96  *
97  * For example the argument constraints for bpf_map_lookup_elem():
98  *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
99  *   .arg1_type = ARG_CONST_MAP_PTR,
100  *   .arg2_type = ARG_PTR_TO_MAP_KEY,
101  *
102  * ret_type says that this function returns 'pointer to map elem value or null'
103  * function expects 1st argument to be a const pointer to 'struct bpf_map' and
104  * 2nd argument should be a pointer to stack, which will be used inside
105  * the helper function as a pointer to map element key.
106  *
107  * On the kernel side the helper function looks like:
108  * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
109  * {
110  *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
111  *    void *key = (void *) (unsigned long) r2;
112  *    void *value;
113  *
114  *    here kernel can access 'key' and 'map' pointers safely, knowing that
115  *    [key, key + map->key_size) bytes are valid and were initialized on
116  *    the stack of eBPF program.
117  * }
118  *
119  * Corresponding eBPF program may look like:
120  *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
121  *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
122  *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
123  *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
124  * here verifier looks at prototype of map_lookup_elem() and sees:
125  * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
126  * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
127  *
128  * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
129  * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
130  * and were initialized prior to this call.
131  * If it's ok, then verifier allows this BPF_CALL insn and looks at
132  * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
133  * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
134  * returns ether pointer to map value or NULL.
135  *
136  * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
137  * insn, the register holding that pointer in the true branch changes state to
138  * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
139  * branch. See check_cond_jmp_op().
140  *
141  * After the call R0 is set to return type of the function and registers R1-R5
142  * are set to NOT_INIT to indicate that they are no longer readable.
143  *
144  * The following reference types represent a potential reference to a kernel
145  * resource which, after first being allocated, must be checked and freed by
146  * the BPF program:
147  * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
148  *
149  * When the verifier sees a helper call return a reference type, it allocates a
150  * pointer id for the reference and stores it in the current function state.
151  * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
152  * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
153  * passes through a NULL-check conditional. For the branch wherein the state is
154  * changed to CONST_IMM, the verifier releases the reference.
155  *
156  * For each helper function that allocates a reference, such as
157  * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
158  * bpf_sk_release(). When a reference type passes into the release function,
159  * the verifier also releases the reference. If any unchecked or unreleased
160  * reference remains at the end of the program, the verifier rejects it.
161  */
162 
163 /* verifier_state + insn_idx are pushed to stack when branch is encountered */
164 struct bpf_verifier_stack_elem {
165     /* verifer state is 'st'
166      * before processing instruction 'insn_idx'
167      * and after processing instruction 'prev_insn_idx'
168      */
169     struct bpf_verifier_state st;
170     int insn_idx;
171     int prev_insn_idx;
172     struct bpf_verifier_stack_elem *next;
173     /* length of verifier log at the time this state was pushed on stack */
174     u32 log_pos;
175 };
176 
177 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
178 #define BPF_COMPLEXITY_LIMIT_STATES 64
179 
180 #define BPF_MAP_KEY_POISON (1ULL << 63)
181 #define BPF_MAP_KEY_SEEN (1ULL << 62)
182 
183 #define BPF_MAP_PTR_UNPRIV 1UL
184 #define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + POISON_POINTER_DELTA))
185 #define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
186 
187 #define VERIFIER_TWO 2
188 #define VERIFIER_THREE 3
189 #define VERIFIER_FOUR 4
190 #define VERIFIER_EIGHT 8
191 #define VERIFIER_SIXTEEN 16
192 #define VERIFIER_THIRTYONE 31
193 #define VERIFIER_THIRTYTWO 32
194 #define VERIFIER_SIXTYTHREE 63
195 #define VERIFIER_SIXTYFOUR 64
196 #define VERIFIER_ONEHUNDREDTWENTYEIGHT 128
197 #define VERIFIER_TWOHUNDREDFIFTYSIX 256
198 #define VERIFIER_ONETHOUSAND 1000
199 
bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)200 static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
201 {
202     return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
203 }
204 
bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)205 static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
206 {
207     return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
208 }
209 
bpf_map_ptr_store(struct bpf_insn_aux_data *aux, const struct bpf_map *map, bool unpriv)210 static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, const struct bpf_map *map, bool unpriv)
211 {
212     BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
213     unpriv |= bpf_map_ptr_unpriv(aux);
214     aux->map_ptr_state = (unsigned long)map | (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
215 }
216 
bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)217 static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
218 {
219     return aux->map_key_state & BPF_MAP_KEY_POISON;
220 }
221 
bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)222 static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
223 {
224     return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
225 }
226 
bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)227 static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
228 {
229     return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
230 }
231 
bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)232 static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
233 {
234     bool poisoned = bpf_map_key_poisoned(aux);
235 
236     aux->map_key_state = state | BPF_MAP_KEY_SEEN | (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
237 }
238 
239 struct bpf_call_arg_meta {
240     struct bpf_map *map_ptr;
241     bool raw_mode;
242     bool pkt_access;
243     int regno;
244     int access_size;
245     int mem_size;
246     u64 msize_max_value;
247     int ref_obj_id;
248     int func_id;
249     u32 btf_id;
250     u32 ret_btf_id;
251 };
252 
253 struct btf *btf_vmlinux;
254 
255 static DEFINE_MUTEX(bpf_verifier_lock);
256 
find_linfo(const struct bpf_verifier_env *env, u32 insn_off)257 static const struct bpf_line_info *find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
258 {
259     const struct bpf_line_info *linfo;
260     const struct bpf_prog *prog;
261     u32 i, nr_linfo;
262 
263     prog = env->prog;
264     nr_linfo = prog->aux->nr_linfo;
265 
266     if (!nr_linfo || insn_off >= prog->len) {
267         return NULL;
268     }
269 
270     linfo = prog->aux->linfo;
271     for (i = 1; i < nr_linfo; i++) {
272         if (insn_off < linfo[i].insn_off) {
273             break;
274         }
275     }
276 
277     return &linfo[i - 1];
278 }
279 
bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args)280 void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args)
281 {
282     unsigned int n;
283 
284     n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
285 
286     WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, "verifier log line truncated - local buffer too short\n");
287 
288     n = min(log->len_total - log->len_used - 1, n);
289     log->kbuf[n] = '\0';
290 
291     if (log->level == BPF_LOG_KERNEL) {
292         pr_err("BPF:%s\n", log->kbuf);
293         return;
294     }
295     if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) {
296         log->len_used += n;
297     } else {
298         log->ubuf = NULL;
299     }
300 }
301 
bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)302 static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
303 {
304     char zero = 0;
305 
306     if (!bpf_verifier_log_needed(log)) {
307         return;
308     }
309 
310     log->len_used = new_pos;
311     if (put_user(zero, log->ubuf + new_pos)) {
312         log->ubuf = NULL;
313     }
314 }
315 
316 /* log_level controls verbosity level of eBPF verifier.
317  * bpf_verifier_log_write() is used to dump the verification trace to the log,
318  * so the user can figure out what's wrong with the program
319  */
bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...)320 __printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...)
321 {
322     va_list args;
323 
324     if (!bpf_verifier_log_needed(&env->log)) {
325         return;
326     }
327 
328     va_start(args, fmt);
329     bpf_verifier_vlog(&env->log, fmt, args);
330     va_end(args);
331 }
332 EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
333 
verbose(void *private_data, const char *fmt, ...)334 __printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
335 {
336     struct bpf_verifier_env *env = private_data;
337     va_list args;
338 
339     if (!bpf_verifier_log_needed(&env->log)) {
340         return;
341     }
342 
343     va_start(args, fmt);
344     bpf_verifier_vlog(&env->log, fmt, args);
345     va_end(args);
346 }
347 
bpf_log(struct bpf_verifier_log *log, const char *fmt, ...)348 __printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...)
349 {
350     va_list args;
351 
352     if (!bpf_verifier_log_needed(log)) {
353         return;
354     }
355 
356     va_start(args, fmt);
357     bpf_verifier_vlog(log, fmt, args);
358     va_end(args);
359 }
360 
ltrim(const char *s)361 static const char *ltrim(const char *s)
362 {
363     while (isspace(*s)) {
364         s++;
365     }
366 
367     return s;
368 }
369 
verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...)370 __printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...)
371 {
372     const struct bpf_line_info *linfo;
373 
374     if (!bpf_verifier_log_needed(&env->log)) {
375         return;
376     }
377 
378     linfo = find_linfo(env, insn_off);
379     if (!linfo || linfo == env->prev_linfo) {
380         return;
381     }
382 
383     if (prefix_fmt) {
384         va_list args;
385 
386         va_start(args, prefix_fmt);
387         bpf_verifier_vlog(&env->log, prefix_fmt, args);
388         va_end(args);
389     }
390 
391     verbose(env, "%s\n", ltrim(btf_name_by_offset(env->prog->aux->btf, linfo->line_off)));
392 
393     env->prev_linfo = linfo;
394 }
395 
type_is_pkt_pointer(enum bpf_reg_type type)396 static bool type_is_pkt_pointer(enum bpf_reg_type type)
397 {
398     return type == PTR_TO_PACKET || type == PTR_TO_PACKET_META;
399 }
400 
type_is_sk_pointer(enum bpf_reg_type type)401 static bool type_is_sk_pointer(enum bpf_reg_type type)
402 {
403     return type == PTR_TO_SOCKET || type == PTR_TO_SOCK_COMMON || type == PTR_TO_TCP_SOCK || type == PTR_TO_XDP_SOCK;
404 }
405 
reg_type_not_null(enum bpf_reg_type type)406 static bool reg_type_not_null(enum bpf_reg_type type)
407 {
408     return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || type == PTR_TO_SOCK_COMMON;
409 }
410 
reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)411 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
412 {
413     return reg->type == PTR_TO_MAP_VALUE && map_value_has_spin_lock(reg->map_ptr);
414 }
415 
reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)416 static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
417 {
418     return base_type(type) == PTR_TO_SOCKET || base_type(type) == PTR_TO_TCP_SOCK || base_type(type) == PTR_TO_MEM;
419 }
420 
type_is_rdonly_mem(u32 type)421 static bool type_is_rdonly_mem(u32 type)
422 {
423     return type & MEM_RDONLY;
424 }
425 
arg_type_may_be_refcounted(enum bpf_arg_type type)426 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
427 {
428     return type == ARG_PTR_TO_SOCK_COMMON;
429 }
430 
type_may_be_null(u32 type)431 static bool type_may_be_null(u32 type)
432 {
433     return type & PTR_MAYBE_NULL;
434 }
435 
436 /* Determine whether the function releases some resources allocated by another
437  * function call. The first reference type argument will be assumed to be
438  * released by release_reference().
439  */
is_release_function(enum bpf_func_id func_id)440 static bool is_release_function(enum bpf_func_id func_id)
441 {
442     return func_id == BPF_FUNC_sk_release || func_id == BPF_FUNC_ringbuf_submit || func_id == BPF_FUNC_ringbuf_discard;
443 }
444 
may_be_acquire_function(enum bpf_func_id func_id)445 static bool may_be_acquire_function(enum bpf_func_id func_id)
446 {
447     return func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp ||
448            func_id == BPF_FUNC_skc_lookup_tcp || func_id == BPF_FUNC_map_lookup_elem ||
449            func_id == BPF_FUNC_ringbuf_reserve;
450 }
451 
is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map)452 static bool is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map)
453 {
454     enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
455 
456     if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp ||
457         func_id == BPF_FUNC_ringbuf_reserve) {
458         return true;
459     }
460 
461     if (func_id == BPF_FUNC_map_lookup_elem &&
462         (map_type == BPF_MAP_TYPE_SOCKMAP || map_type == BPF_MAP_TYPE_SOCKHASH)) {
463         return true;
464     }
465 
466     return false;
467 }
468 
is_ptr_cast_function(enum bpf_func_id func_id)469 static bool is_ptr_cast_function(enum bpf_func_id func_id)
470 {
471     return func_id == BPF_FUNC_tcp_sock || func_id == BPF_FUNC_sk_fullsock || func_id == BPF_FUNC_skc_to_tcp_sock ||
472            func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock ||
473            func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock;
474 }
475 
476 /* string representation of 'enum bpf_reg_type'
477  *
478  * Note that reg_type_str() can not appear more than once in a single verbose()
479  * statement.
480  */
reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)481 static const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
482 {
483     char postfix[VERIFIER_SIXTEEN] = {0}, prefix[VERIFIER_SIXTEEN] = {0};
484     static const char *const str[] = {
485         [NOT_INIT] = "?",
486         [SCALAR_VALUE] = "inv",
487         [PTR_TO_CTX] = "ctx",
488         [CONST_PTR_TO_MAP] = "map_ptr",
489         [PTR_TO_MAP_VALUE] = "map_value",
490         [PTR_TO_STACK] = "fp",
491         [PTR_TO_PACKET] = "pkt",
492         [PTR_TO_PACKET_META] = "pkt_meta",
493         [PTR_TO_PACKET_END] = "pkt_end",
494         [PTR_TO_FLOW_KEYS] = "flow_keys",
495         [PTR_TO_SOCKET] = "sock",
496         [PTR_TO_SOCK_COMMON] = "sock_common",
497         [PTR_TO_TCP_SOCK] = "tcp_sock",
498         [PTR_TO_TP_BUFFER] = "tp_buffer",
499         [PTR_TO_XDP_SOCK] = "xdp_sock",
500         [PTR_TO_BTF_ID] = "ptr_",
501         [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
502         [PTR_TO_MEM] = "mem",
503         [PTR_TO_BUF] = "buf",
504     };
505 
506     if (type & PTR_MAYBE_NULL) {
507         if (base_type(type) == PTR_TO_BTF_ID || base_type(type) == PTR_TO_PERCPU_BTF_ID) {
508             strncpy(postfix, "or_null_", VERIFIER_SIXTEEN);
509         } else {
510             strncpy(postfix, "_or_null", VERIFIER_SIXTEEN);
511         }
512     }
513 
514     if (type & MEM_RDONLY) {
515         strncpy(prefix, "rdonly_", VERIFIER_SIXTEEN);
516     }
517     if (type & MEM_ALLOC) {
518         strncpy(prefix, "alloc_", VERIFIER_SIXTEEN);
519     }
520 
521     (void)snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix);
522     return env->type_str_buf;
523 }
524 
525 static char slot_type_char[] = {
526     [STACK_INVALID] = '?',
527     [STACK_SPILL] = 'r',
528     [STACK_MISC] = 'm',
529     [STACK_ZERO] = '0',
530 };
531 
print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live)532 static void print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live)
533 {
534     if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) {
535         verbose(env, "_");
536     }
537     if (live & REG_LIVE_READ) {
538         verbose(env, "r");
539     }
540     if (live & REG_LIVE_WRITTEN) {
541         verbose(env, "w");
542     }
543     if (live & REG_LIVE_DONE) {
544         verbose(env, "D");
545     }
546 }
547 
func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)548 static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
549 {
550     struct bpf_verifier_state *cur = env->cur_state;
551 
552     return cur->frame[reg->frameno];
553 }
554 
kernel_type_name(u32 id)555 const char *kernel_type_name(u32 id)
556 {
557     return btf_name_by_offset(btf_vmlinux, btf_type_by_id(btf_vmlinux, id)->name_off);
558 }
559 
print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)560 static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)
561 {
562     const struct bpf_reg_state *reg;
563     enum bpf_reg_type t;
564     int i;
565 
566     if (state->frameno) {
567         verbose(env, " frame%d:", state->frameno);
568     }
569     for (i = 0; i < MAX_BPF_REG; i++) {
570         reg = &state->regs[i];
571         t = reg->type;
572         if (t == NOT_INIT) {
573             continue;
574         }
575         verbose(env, " R%d", i);
576         print_liveness(env, reg->live);
577         verbose(env, "=%s", reg_type_str(env, t));
578         if (t == SCALAR_VALUE && reg->precise) {
579             verbose(env, "P");
580         }
581         if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) {
582             /* reg->off should be 0 for SCALAR_VALUE */
583             verbose(env, "%lld", reg->var_off.value + reg->off);
584         } else {
585             if (base_type(t) == PTR_TO_BTF_ID || base_type(t) == PTR_TO_PERCPU_BTF_ID) {
586                 verbose(env, "%s", kernel_type_name(reg->btf_id));
587             }
588             verbose(env, "(id=%d", reg->id);
589             if (reg_type_may_be_refcounted_or_null(t)) {
590                 verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
591             }
592             if (t != SCALAR_VALUE) {
593                 verbose(env, ",off=%d", reg->off);
594             }
595             if (type_is_pkt_pointer(t)) {
596                 verbose(env, ",r=%d", reg->range);
597             } else if (base_type(t) == CONST_PTR_TO_MAP || base_type(t) == PTR_TO_MAP_VALUE) {
598                 verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size);
599             }
600             if (tnum_is_const(reg->var_off)) {
601                 /* Typically an immediate SCALAR_VALUE, but
602                  * could be a pointer whose offset is too big
603                  * for reg->off
604                  */
605                 verbose(env, ",imm=%llx", reg->var_off.value);
606             } else {
607                 if (reg->smin_value != reg->umin_value && reg->smin_value != S64_MIN) {
608                     verbose(env, ",smin_value=%lld", (long long)reg->smin_value);
609                 }
610                 if (reg->smax_value != reg->umax_value && reg->smax_value != S64_MAX) {
611                     verbose(env, ",smax_value=%lld", (long long)reg->smax_value);
612                 }
613                 if (reg->umin_value != 0) {
614                     verbose(env, ",umin_value=%llu", (unsigned long long)reg->umin_value);
615                 }
616                 if (reg->umax_value != U64_MAX) {
617                     verbose(env, ",umax_value=%llu", (unsigned long long)reg->umax_value);
618                 }
619                 if (!tnum_is_unknown(reg->var_off)) {
620                     char tn_buf[48];
621 
622                     tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
623                     verbose(env, ",var_off=%s", tn_buf);
624                 }
625                 if (reg->s32_min_value != reg->smin_value && reg->s32_min_value != S32_MIN) {
626                     verbose(env, ",s32_min_value=%d", (int)(reg->s32_min_value));
627                 }
628                 if (reg->s32_max_value != reg->smax_value && reg->s32_max_value != S32_MAX) {
629                     verbose(env, ",s32_max_value=%d", (int)(reg->s32_max_value));
630                 }
631                 if (reg->u32_min_value != reg->umin_value && reg->u32_min_value != U32_MIN) {
632                     verbose(env, ",u32_min_value=%d", (int)(reg->u32_min_value));
633                 }
634                 if (reg->u32_max_value != reg->umax_value && reg->u32_max_value != U32_MAX) {
635                     verbose(env, ",u32_max_value=%d", (int)(reg->u32_max_value));
636                 }
637             }
638             verbose(env, ")");
639         }
640     }
641     for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
642         char types_buf[BPF_REG_SIZE + 1];
643         bool valid = false;
644         int j;
645 
646         for (j = 0; j < BPF_REG_SIZE; j++) {
647             if (state->stack[i].slot_type[j] != STACK_INVALID) {
648                 valid = true;
649             }
650             types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
651         }
652         types_buf[BPF_REG_SIZE] = 0;
653         if (!valid) {
654             continue;
655         }
656         verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
657         print_liveness(env, state->stack[i].spilled_ptr.live);
658         if (state->stack[i].slot_type[0] == STACK_SPILL) {
659             reg = &state->stack[i].spilled_ptr;
660             t = reg->type;
661             verbose(env, "=%s", reg_type_str(env, t));
662             if (t == SCALAR_VALUE && reg->precise) {
663                 verbose(env, "P");
664             }
665             if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
666                 verbose(env, "%lld", reg->var_off.value + reg->off);
667             }
668         } else {
669             verbose(env, "=%s", types_buf);
670         }
671     }
672     if (state->acquired_refs && state->refs[0].id) {
673         verbose(env, " refs=%d", state->refs[0].id);
674         for (i = 1; i < state->acquired_refs; i++) {
675             if (state->refs[i].id) {
676                 verbose(env, ",%d", state->refs[i].id);
677             }
678         }
679     }
680     verbose(env, "\n");
681 }
682 
683 #define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)                                                                        \
684     static int copy_##NAME##_state(struct bpf_func_state *dst, const struct bpf_func_state *src)                       \
685     {                                                                                                                  \
686         if (!src->FIELD)                                                                                               \
687             return 0;                                                                                                  \
688         if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {                                                                   \
689             /* internal bug, make state invalid to reject the program */                                               \
690             memset(dst, 0, sizeof(*dst));                                                                              \
691             return -EFAULT;                                                                                            \
692         }                                                                                                              \
693         memcpy(dst->FIELD, src->FIELD, sizeof(*src->FIELD) * (src->COUNT / (SIZE)));                                   \
694         return 0;                                                                                                      \
695     }
696 /* copy_reference_state() */
697 COPY_STATE_FN(reference, acquired_refs, refs, 1)
698 /* copy_stack_state() */
699 COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
700 #undef COPY_STATE_FN
701 
702 #define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)                                                                     \
703     static int realloc_##NAME##_state(struct bpf_func_state *state, int size, bool copy_old)                           \
704     {                                                                                                                  \
705         u32 old_size = state->COUNT;                                                                                   \
706         struct bpf_##NAME##_state *new_##FIELD;                                                                        \
707         int slot = size / (SIZE);                                                                                      \
708                                                                                                                        \
709         if (size <= old_size || !size) {                                                                               \
710             if (copy_old)                                                                                              \
711                 return 0;                                                                                              \
712             state->COUNT = slot * (SIZE);                                                                              \
713             if (!size && old_size) {                                                                                   \
714                 kfree(state->FIELD);                                                                                   \
715                 state->FIELD = NULL;                                                                                   \
716             }                                                                                                          \
717             return 0;                                                                                                  \
718         }                                                                                                              \
719         new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), GFP_KERNEL);                              \
720         if (!new_##FIELD)                                                                                              \
721             return -ENOMEM;                                                                                            \
722         if (copy_old) {                                                                                                \
723             if (state->FIELD)                                                                                          \
724                 memcpy(new_##FIELD, state->FIELD, sizeof(*new_##FIELD) * (old_size / (SIZE)));                         \
725             memset(new_##FIELD + old_size / (SIZE), 0, sizeof(*new_##FIELD) * (size - old_size) / (SIZE));             \
726         }                                                                                                              \
727         state->COUNT = slot * (SIZE);                                                                                  \
728         kfree(state->FIELD);                                                                                           \
729         state->FIELD = new_##FIELD;                                                                                    \
730         return 0;                                                                                                      \
731     }
732 /* realloc_reference_state() */
733 REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
734 /* realloc_stack_state() */
735 REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
736 #undef REALLOC_STATE_FN
737 
738 /* do_check() starts with zero-sized stack in struct bpf_verifier_state to
739  * make it consume minimal amount of memory. check_stack_write() access from
740  * the program calls into realloc_func_state() to grow the stack size.
741  * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
742  * which realloc_stack_state() copies over. It points to previous
743  * bpf_verifier_state which is never reallocated.
744  */
realloc_func_state(struct bpf_func_state *state, int stack_size, int refs_size, bool copy_old)745 static int realloc_func_state(struct bpf_func_state *state, int stack_size, int refs_size, bool copy_old)
746 {
747     int err = realloc_reference_state(state, refs_size, copy_old);
748     if (err) {
749         return err;
750     }
751     return realloc_stack_state(state, stack_size, copy_old);
752 }
753 
754 /* Acquire a pointer id from the env and update the state->refs to include
755  * this new pointer reference.
756  * On success, returns a valid pointer id to associate with the register
757  * On failure, returns a negative errno.
758  */
acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)759 static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
760 {
761     struct bpf_func_state *state = cur_func(env);
762     int new_ofs = state->acquired_refs;
763     int id, err;
764 
765     err = realloc_reference_state(state, state->acquired_refs + 1, true);
766     if (err) {
767         return err;
768     }
769     id = ++env->id_gen;
770     state->refs[new_ofs].id = id;
771     state->refs[new_ofs].insn_idx = insn_idx;
772 
773     return id;
774 }
775 
776 /* release function corresponding to acquire_reference_state(). Idempotent. */
release_reference_state(struct bpf_func_state *state, int ptr_id)777 static int release_reference_state(struct bpf_func_state *state, int ptr_id)
778 {
779     int i, last_idx;
780 
781     last_idx = state->acquired_refs - 1;
782     for (i = 0; i < state->acquired_refs; i++) {
783         if (state->refs[i].id == ptr_id) {
784             if (last_idx && i != last_idx) {
785                 memcpy(&state->refs[i], &state->refs[last_idx], sizeof(*state->refs));
786             }
787             memset(&state->refs[last_idx], 0, sizeof(*state->refs));
788             state->acquired_refs--;
789             return 0;
790         }
791     }
792     return -EINVAL;
793 }
794 
transfer_reference_state(struct bpf_func_state *dst, struct bpf_func_state *src)795 static int transfer_reference_state(struct bpf_func_state *dst, struct bpf_func_state *src)
796 {
797     int err = realloc_reference_state(dst, src->acquired_refs, false);
798     if (err) {
799         return err;
800     }
801     err = copy_reference_state(dst, src);
802     if (err) {
803         return err;
804     }
805     return 0;
806 }
807 
free_func_state(struct bpf_func_state *state)808 static void free_func_state(struct bpf_func_state *state)
809 {
810     if (!state) {
811         return;
812     }
813     kfree(state->refs);
814     kfree(state->stack);
815     kfree(state);
816 }
817 
clear_jmp_history(struct bpf_verifier_state *state)818 static void clear_jmp_history(struct bpf_verifier_state *state)
819 {
820     kfree(state->jmp_history);
821     state->jmp_history = NULL;
822     state->jmp_history_cnt = 0;
823 }
824 
free_verifier_state(struct bpf_verifier_state *state, bool free_self)825 static void free_verifier_state(struct bpf_verifier_state *state, bool free_self)
826 {
827     int i;
828 
829     for (i = 0; i <= state->curframe; i++) {
830         free_func_state(state->frame[i]);
831         state->frame[i] = NULL;
832     }
833     clear_jmp_history(state);
834     if (free_self) {
835         kfree(state);
836     }
837 }
838 
839 /* copy verifier state from src to dst growing dst stack space
840  * when necessary to accommodate larger src stack
841  */
copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src)842 static int copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
843 {
844     int err;
845 
846     err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, false);
847     if (err) {
848         return err;
849     }
850     memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
851     err = copy_reference_state(dst, src);
852     if (err) {
853         return err;
854     }
855     return copy_stack_state(dst, src);
856 }
857 
copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src)858 static int copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src)
859 {
860     struct bpf_func_state *dst;
861     u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
862     int i, err;
863 
864     if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
865         kfree(dst_state->jmp_history);
866         dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
867         if (!dst_state->jmp_history) {
868             return -ENOMEM;
869         }
870     }
871     memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
872     dst_state->jmp_history_cnt = src->jmp_history_cnt;
873 
874     /* if dst has more stack frames then src frame, free them */
875     for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
876         free_func_state(dst_state->frame[i]);
877         dst_state->frame[i] = NULL;
878     }
879     dst_state->speculative = src->speculative;
880     dst_state->curframe = src->curframe;
881     dst_state->active_spin_lock = src->active_spin_lock;
882     dst_state->branches = src->branches;
883     dst_state->parent = src->parent;
884     dst_state->first_insn_idx = src->first_insn_idx;
885     dst_state->last_insn_idx = src->last_insn_idx;
886     for (i = 0; i <= src->curframe; i++) {
887         dst = dst_state->frame[i];
888         if (!dst) {
889             dst = kzalloc(sizeof(*dst), GFP_KERNEL);
890             if (!dst) {
891                 return -ENOMEM;
892             }
893             dst_state->frame[i] = dst;
894         }
895         err = copy_func_state(dst, src->frame[i]);
896         if (err) {
897             return err;
898         }
899     }
900     return 0;
901 }
902 
update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)903 static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
904 {
905     while (st) {
906         u32 br = --st->branches;
907 
908         /* WARN_ON(br > 1) technically makes sense here,
909          * but see comment in push_stack(), hence:
910          */
911         WARN_ONCE((int)br < 0, "BUG update_branch_counts:branches_to_explore=%d\n", br);
912         if (br) {
913             break;
914         }
915         st = st->parent;
916     }
917 }
918 
pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx, bool pop_log)919 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx, bool pop_log)
920 {
921     struct bpf_verifier_state *cur = env->cur_state;
922     struct bpf_verifier_stack_elem *elem, *head = env->head;
923     int err;
924 
925     if (env->head == NULL) {
926         return -ENOENT;
927     }
928 
929     if (cur) {
930         err = copy_verifier_state(cur, &head->st);
931         if (err) {
932             return err;
933         }
934     }
935     if (pop_log) {
936         bpf_vlog_reset(&env->log, head->log_pos);
937     }
938     if (insn_idx) {
939         *insn_idx = head->insn_idx;
940     }
941     if (prev_insn_idx) {
942         *prev_insn_idx = head->prev_insn_idx;
943     }
944     elem = head->next;
945     free_verifier_state(&head->st, false);
946     kfree(head);
947     env->head = elem;
948     env->stack_size--;
949     return 0;
950 }
951 
push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx, bool speculative)952 static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx,
953                                              bool speculative)
954 {
955     struct bpf_verifier_state *cur = env->cur_state;
956     struct bpf_verifier_stack_elem *elem;
957     int err;
958 
959     elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
960     if (!elem) {
961         goto err;
962     }
963 
964     elem->insn_idx = insn_idx;
965     elem->prev_insn_idx = prev_insn_idx;
966     elem->next = env->head;
967     elem->log_pos = env->log.len_used;
968     env->head = elem;
969     env->stack_size++;
970     err = copy_verifier_state(&elem->st, cur);
971     if (err) {
972         goto err;
973     }
974     elem->st.speculative |= speculative;
975     if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
976         verbose(env, "The sequence of %d jumps is too complex.\n", env->stack_size);
977         goto err;
978     }
979     if (elem->st.parent) {
980         ++elem->st.parent->branches;
981         /* WARN_ON(branches > 2) technically makes sense here,
982          * but
983          * 1. speculative states will bump 'branches' for non-branch
984          * instructions
985          * 2. is_state_visited() heuristics may decide not to create
986          * a new state for a sequence of branches and all such current
987          * and cloned states will be pointing to a single parent state
988          * which might have large 'branches' count.
989          */
990     }
991     return &elem->st;
992 err:
993     free_verifier_state(env->cur_state, true);
994     env->cur_state = NULL;
995     /* pop all elements and return */
996     while (!pop_stack(env, NULL, NULL, false)) {
997         ;
998     }
999     return NULL;
1000 }
1001 
1002 #define CALLER_SAVED_REGS 6
1003 static const int caller_saved[CALLER_SAVED_REGS] = {BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5};
1004 
1005 static void verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg);
1006 
1007 /* This helper doesn't clear reg->id */
verifier2_mark_reg_known(struct bpf_reg_state *reg, u64 imm)1008 static void verifier2_mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1009 {
1010     reg->var_off = tnum_const(imm);
1011     reg->smin_value = (s64)imm;
1012     reg->smax_value = (s64)imm;
1013     reg->umin_value = imm;
1014     reg->umax_value = imm;
1015 
1016     reg->s32_min_value = (s32)imm;
1017     reg->s32_max_value = (s32)imm;
1018     reg->u32_min_value = (u32)imm;
1019     reg->u32_max_value = (u32)imm;
1020 }
1021 
1022 /* Mark the unknown part of a register (variable offset or scalar value) as
1023  * known to have the value @imm.
1024  */
verifier_mark_reg_known(struct bpf_reg_state *reg, u64 imm)1025 static void verifier_mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1026 {
1027     /* Clear id, off, and union(map_ptr, range) */
1028     memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1029     verifier2_mark_reg_known(reg, imm);
1030 }
1031 
verifier_mark_reg32_known(struct bpf_reg_state *reg, u64 imm)1032 static void verifier_mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1033 {
1034     reg->var_off = tnum_const_subreg(reg->var_off, imm);
1035     reg->s32_min_value = (s32)imm;
1036     reg->s32_max_value = (s32)imm;
1037     reg->u32_min_value = (u32)imm;
1038     reg->u32_max_value = (u32)imm;
1039 }
1040 
1041 /* Mark the 'variable offset' part of a register as zero.  This should be
1042  * used only on registers holding a pointer type.
1043  */
verifier_mark_reg_known_zero(struct bpf_reg_state *reg)1044 static void verifier_mark_reg_known_zero(struct bpf_reg_state *reg)
1045 {
1046     verifier_mark_reg_known(reg, 0);
1047 }
1048 
verifier_mark_reg_const_zero(struct bpf_reg_state *reg)1049 static void verifier_mark_reg_const_zero(struct bpf_reg_state *reg)
1050 {
1051     verifier_mark_reg_known(reg, 0);
1052     reg->type = SCALAR_VALUE;
1053 }
1054 
mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)1055 static void mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1056 {
1057     if (WARN_ON(regno >= MAX_BPF_REG)) {
1058         verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1059         /* Something bad happened, let's kill all regs */
1060         for (regno = 0; regno < MAX_BPF_REG; regno++) {
1061             verifier_mark_reg_not_init(env, regs + regno);
1062         }
1063         return;
1064     }
1065     verifier_mark_reg_known_zero(regs + regno);
1066 }
1067 
reg_is_pkt_pointer(const struct bpf_reg_state *reg)1068 static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1069 {
1070     return type_is_pkt_pointer(reg->type);
1071 }
1072 
reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)1073 static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1074 {
1075     return reg_is_pkt_pointer(reg) || reg->type == PTR_TO_PACKET_END;
1076 }
1077 
1078 /* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, enum bpf_reg_type which)1079 static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, enum bpf_reg_type which)
1080 {
1081     /* The register can already have a range from prior markings.
1082      * This is fine as long as it hasn't been advanced from its
1083      * origin.
1084      */
1085     return reg->type == which && reg->id == 0 && reg->off == 0 && tnum_equals_const(reg->var_off, 0);
1086 }
1087 
1088 /* Reset the min/max bounds of a register */
verifier_mark_reg_unbounded(struct bpf_reg_state *reg)1089 static void verifier_mark_reg_unbounded(struct bpf_reg_state *reg)
1090 {
1091     reg->smin_value = S64_MIN;
1092     reg->smax_value = S64_MAX;
1093     reg->umin_value = 0;
1094     reg->umax_value = U64_MAX;
1095 
1096     reg->s32_min_value = S32_MIN;
1097     reg->s32_max_value = S32_MAX;
1098     reg->u32_min_value = 0;
1099     reg->u32_max_value = U32_MAX;
1100 }
1101 
verifier_mark_reg64_unbounded(struct bpf_reg_state *reg)1102 static void verifier_mark_reg64_unbounded(struct bpf_reg_state *reg)
1103 {
1104     reg->smin_value = S64_MIN;
1105     reg->smax_value = S64_MAX;
1106     reg->umin_value = 0;
1107     reg->umax_value = U64_MAX;
1108 }
1109 
verifier_mark_reg32_unbounded(struct bpf_reg_state *reg)1110 static void verifier_mark_reg32_unbounded(struct bpf_reg_state *reg)
1111 {
1112     reg->s32_min_value = S32_MIN;
1113     reg->s32_max_value = S32_MAX;
1114     reg->u32_min_value = 0;
1115     reg->u32_max_value = U32_MAX;
1116 }
1117 
verifier_update_reg32_bounds(struct bpf_reg_state *reg)1118 static void verifier_update_reg32_bounds(struct bpf_reg_state *reg)
1119 {
1120     struct tnum var32_off = tnum_subreg(reg->var_off);
1121 
1122     /* min signed is max(sign bit) | min(other bits) */
1123     reg->s32_min_value = max_t(s32, reg->s32_min_value, var32_off.value | (var32_off.mask & S32_MIN));
1124     /* max signed is min(sign bit) | max(other bits) */
1125     reg->s32_max_value = min_t(s32, reg->s32_max_value, var32_off.value | (var32_off.mask & S32_MAX));
1126     reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1127     reg->u32_max_value = min(reg->u32_max_value, (u32)(var32_off.value | var32_off.mask));
1128 }
1129 
verifier_update_reg64_bounds(struct bpf_reg_state *reg)1130 static void verifier_update_reg64_bounds(struct bpf_reg_state *reg)
1131 {
1132     /* min signed is max(sign bit) | min(other bits) */
1133     reg->smin_value = max_t(s64, reg->smin_value, reg->var_off.value | (reg->var_off.mask & S64_MIN));
1134     /* max signed is min(sign bit) | max(other bits) */
1135     reg->smax_value = min_t(s64, reg->smax_value, reg->var_off.value | (reg->var_off.mask & S64_MAX));
1136     reg->umin_value = max(reg->umin_value, reg->var_off.value);
1137     reg->umax_value = min(reg->umax_value, reg->var_off.value | reg->var_off.mask);
1138 }
1139 
verifier_update_reg_bounds(struct bpf_reg_state *reg)1140 static void verifier_update_reg_bounds(struct bpf_reg_state *reg)
1141 {
1142     verifier_update_reg32_bounds(reg);
1143     verifier_update_reg64_bounds(reg);
1144 }
1145 
1146 /* Uses signed min/max values to inform unsigned, and vice-versa */
verifier_reg32_deduce_bounds(struct bpf_reg_state *reg)1147 static void verifier_reg32_deduce_bounds(struct bpf_reg_state *reg)
1148 {
1149     /* Learn sign from signed bounds.
1150      * If we cannot cross the sign boundary, then signed and unsigned bounds
1151      * are the same, so combine.  This works even in the negative case, e.g.
1152      * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1153      */
1154     if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1155         reg->s32_min_value = reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
1156         reg->s32_max_value = reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
1157         return;
1158     }
1159     /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1160      * boundary, so we must be careful.
1161      */
1162     if ((s32)reg->u32_max_value >= 0) {
1163         /* Positive.  We can't learn anything from the smin, but smax
1164          * is positive, hence safe.
1165          */
1166         reg->s32_min_value = reg->u32_min_value;
1167         reg->s32_max_value = reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
1168     } else if ((s32)reg->u32_min_value < 0) {
1169         /* Negative.  We can't learn anything from the smax, but smin
1170          * is negative, hence safe.
1171          */
1172         reg->s32_min_value = reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
1173         reg->s32_max_value = reg->u32_max_value;
1174     }
1175 }
1176 
verifier_reg64_deduce_bounds(struct bpf_reg_state *reg)1177 static void verifier_reg64_deduce_bounds(struct bpf_reg_state *reg)
1178 {
1179     /* Learn sign from signed bounds.
1180      * If we cannot cross the sign boundary, then signed and unsigned bounds
1181      * are the same, so combine.  This works even in the negative case, e.g.
1182      * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1183      */
1184     if (reg->smin_value >= 0 || reg->smax_value < 0) {
1185         reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
1186         reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
1187         return;
1188     }
1189     /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1190      * boundary, so we must be careful.
1191      */
1192     if ((s64)reg->umax_value >= 0) {
1193         /* Positive.  We can't learn anything from the smin, but smax
1194          * is positive, hence safe.
1195          */
1196         reg->smin_value = reg->umin_value;
1197         reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
1198     } else if ((s64)reg->umin_value < 0) {
1199         /* Negative.  We can't learn anything from the smax, but smin
1200          * is negative, hence safe.
1201          */
1202         reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
1203         reg->smax_value = reg->umax_value;
1204     }
1205 }
1206 
verifier_reg_deduce_bounds(struct bpf_reg_state *reg)1207 static void verifier_reg_deduce_bounds(struct bpf_reg_state *reg)
1208 {
1209     verifier_reg32_deduce_bounds(reg);
1210     verifier_reg64_deduce_bounds(reg);
1211 }
1212 
1213 /* Attempts to improve var_off based on unsigned min/max information */
verifier_reg_bound_offset(struct bpf_reg_state *reg)1214 static void verifier_reg_bound_offset(struct bpf_reg_state *reg)
1215 {
1216     struct tnum var64_off = tnum_intersect(reg->var_off, tnum_range(reg->umin_value, reg->umax_value));
1217     struct tnum var32_off =
1218         tnum_intersect(tnum_subreg(reg->var_off), tnum_range(reg->u32_min_value, reg->u32_max_value));
1219 
1220     reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1221 }
1222 
reg_bounds_sync(struct bpf_reg_state *reg)1223 static void reg_bounds_sync(struct bpf_reg_state *reg)
1224 {
1225     /* We might have learned new bounds from the var_off. */
1226     verifier_update_reg_bounds(reg);
1227     /* We might have learned something about the sign bit. */
1228     verifier_reg_deduce_bounds(reg);
1229     /* We might have learned some bits from the bounds. */
1230     verifier_reg_bound_offset(reg);
1231     /* Intersecting with the old var_off might have improved our bounds
1232      * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1233      * then new var_off is (0; 0x7f...fc) which improves our umax.
1234      */
1235     verifier_update_reg_bounds(reg);
1236 }
verifier_reg32_bound_s64(s32 a)1237 static bool verifier_reg32_bound_s64(s32 a)
1238 {
1239     return a >= 0 && a <= S32_MAX;
1240 }
1241 
verifier_reg_assign_32_into_64(struct bpf_reg_state *reg)1242 static void verifier_reg_assign_32_into_64(struct bpf_reg_state *reg)
1243 {
1244     reg->umin_value = reg->u32_min_value;
1245     reg->umax_value = reg->u32_max_value;
1246 
1247     /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1248      * be positive otherwise set to worse case bounds and refine later
1249      * from tnum.
1250      */
1251     if (verifier_reg32_bound_s64(reg->s32_min_value) && verifier_reg32_bound_s64(reg->s32_max_value)) {
1252         reg->smin_value = reg->s32_min_value;
1253         reg->smax_value = reg->s32_max_value;
1254     } else {
1255         reg->smin_value = 0;
1256         reg->smax_value = U32_MAX;
1257     }
1258 }
1259 
verifier_reg_combine_32_into_64(struct bpf_reg_state *reg)1260 static void verifier_reg_combine_32_into_64(struct bpf_reg_state *reg)
1261 {
1262     /* special case when 64-bit register has upper 32-bit register
1263      * zeroed. Typically happens after zext or <<32, >>32 sequence
1264      * allowing us to use 32-bit bounds directly,
1265      */
1266     if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1267         verifier_reg_assign_32_into_64(reg);
1268     } else {
1269         /* Otherwise the best we can do is push lower 32bit known and
1270          * unknown bits into register (var_off set from jmp logic)
1271          * then learn as much as possible from the 64-bit tnum
1272          * known and unknown bits. The previous smin/smax bounds are
1273          * invalid here because of jmp32 compare so mark them unknown
1274          * so they do not impact tnum bounds calculation.
1275          */
1276         verifier_mark_reg64_unbounded(reg);
1277         verifier_update_reg_bounds(reg);
1278     }
1279 
1280     /* Intersecting with the old var_off might have improved our bounds
1281      * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1282      * then new var_off is (0; 0x7f...fc) which improves our umax.
1283      */
1284     reg_bounds_sync(reg);
1285 }
1286 
verifier_reg64_bound_s32(s64 a)1287 static bool verifier_reg64_bound_s32(s64 a)
1288 {
1289     return a > S32_MIN && a < S32_MAX;
1290 }
1291 
verifier_reg64_bound_u32(u64 a)1292 static bool verifier_reg64_bound_u32(u64 a)
1293 {
1294     return a > U32_MIN && a < U32_MAX;
1295 }
1296 
__reg_combine_64_into_32(struct bpf_reg_state *reg)1297 static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1298 {
1299     verifier_mark_reg32_unbounded(reg);
1300 
1301     if (verifier_reg64_bound_s32(reg->smin_value) && verifier_reg64_bound_s32(reg->smax_value)) {
1302         reg->s32_min_value = (s32)reg->smin_value;
1303         reg->s32_max_value = (s32)reg->smax_value;
1304     }
1305     if (verifier_reg64_bound_u32(reg->umin_value) && verifier_reg64_bound_u32(reg->umax_value)) {
1306         reg->u32_min_value = (u32)reg->umin_value;
1307         reg->u32_max_value = (u32)reg->umax_value;
1308     }
1309 
1310     /* Intersecting with the old var_off might have improved our bounds
1311      * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1312      * then new var_off is (0; 0x7f...fc) which improves our umax.
1313      */
1314     reg_bounds_sync(reg);
1315 }
1316 
1317 /* Mark a register as having a completely unknown (scalar) value. */
__mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)1318 static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1319 {
1320     /*
1321      * Clear type, id, off, and union(map_ptr, range) and
1322      * padding between 'type' and union
1323      */
1324     memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1325     reg->type = SCALAR_VALUE;
1326     reg->var_off = tnum_unknown;
1327     reg->frameno = 0;
1328     reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1329     verifier_mark_reg_unbounded(reg);
1330 }
1331 
mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)1332 static void mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1333 {
1334     if (WARN_ON(regno >= MAX_BPF_REG)) {
1335         verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1336         /* Something bad happened, let's kill all regs except FP */
1337         for (regno = 0; regno < BPF_REG_FP; regno++) {
1338             verifier_mark_reg_not_init(env, regs + regno);
1339         }
1340         return;
1341     }
1342     __mark_reg_unknown(env, regs + regno);
1343 }
1344 
verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)1345 static void verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1346 {
1347     __mark_reg_unknown(env, reg);
1348     reg->type = NOT_INIT;
1349 }
1350 
mark_reg_not_init(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)1351 static void mark_reg_not_init(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1352 {
1353     if (WARN_ON(regno >= MAX_BPF_REG)) {
1354         verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1355         /* Something bad happened, let's kill all regs except FP */
1356         for (regno = 0; regno < BPF_REG_FP; regno++) {
1357             verifier_mark_reg_not_init(env, regs + regno);
1358         }
1359         return;
1360     }
1361     verifier_mark_reg_not_init(env, regs + regno);
1362 }
1363 
mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno, enum bpf_reg_type reg_type, u32 btf_id)1364 static void mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
1365                             enum bpf_reg_type reg_type, u32 btf_id)
1366 {
1367     if (reg_type == SCALAR_VALUE) {
1368         mark_reg_unknown(env, regs, regno);
1369         return;
1370     }
1371     mark_reg_known_zero(env, regs, regno);
1372     regs[regno].type = PTR_TO_BTF_ID;
1373     regs[regno].btf_id = btf_id;
1374 }
1375 
1376 #define DEF_NOT_SUBREG (0)
init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state)1377 static void init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state)
1378 {
1379     struct bpf_reg_state *regs = state->regs;
1380     int i;
1381 
1382     for (i = 0; i < MAX_BPF_REG; i++) {
1383         mark_reg_not_init(env, regs, i);
1384         regs[i].live = REG_LIVE_NONE;
1385         regs[i].parent = NULL;
1386         regs[i].subreg_def = DEF_NOT_SUBREG;
1387     }
1388 
1389     /* frame pointer */
1390     regs[BPF_REG_FP].type = PTR_TO_STACK;
1391     mark_reg_known_zero(env, regs, BPF_REG_FP);
1392     regs[BPF_REG_FP].frameno = state->frameno;
1393 }
1394 
1395 #define BPF_MAIN_FUNC (-1)
init_func_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int callsite, int frameno, int subprogno)1396 static void init_func_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int callsite, int frameno,
1397                             int subprogno)
1398 {
1399     state->callsite = callsite;
1400     state->frameno = frameno;
1401     state->subprogno = subprogno;
1402     init_reg_state(env, state);
1403 }
1404 
1405 enum reg_arg_type {
1406     SRC_OP,        /* register is used as source operand */
1407     DST_OP,        /* register is used as destination operand */
1408     DST_OP_NO_MARK /* same as above, check only, don't mark */
1409 };
1410 
cmp_subprogs(const void *a, const void *b)1411 static int cmp_subprogs(const void *a, const void *b)
1412 {
1413     return ((struct bpf_subprog_info *)a)->start - ((struct bpf_subprog_info *)b)->start;
1414 }
1415 
find_subprog(struct bpf_verifier_env *env, int off)1416 static int find_subprog(struct bpf_verifier_env *env, int off)
1417 {
1418     struct bpf_subprog_info *p;
1419 
1420     p = bsearch(&off, env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs);
1421     if (!p) {
1422         return -ENOENT;
1423     }
1424     return p - env->subprog_info;
1425 }
1426 
add_subprog(struct bpf_verifier_env *env, int off)1427 static int add_subprog(struct bpf_verifier_env *env, int off)
1428 {
1429     int insn_cnt = env->prog->len;
1430     int ret;
1431 
1432     if (off >= insn_cnt || off < 0) {
1433         verbose(env, "call to invalid destination\n");
1434         return -EINVAL;
1435     }
1436     ret = find_subprog(env, off);
1437     if (ret >= 0) {
1438         return 0;
1439     }
1440     if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1441         verbose(env, "too many subprograms\n");
1442         return -E2BIG;
1443     }
1444     env->subprog_info[env->subprog_cnt++].start = off;
1445     sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1446     return 0;
1447 }
1448 
check_subprogs(struct bpf_verifier_env *env)1449 static int check_subprogs(struct bpf_verifier_env *env)
1450 {
1451     int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1452     struct bpf_subprog_info *subprog = env->subprog_info;
1453     struct bpf_insn *insn = env->prog->insnsi;
1454     int insn_cnt = env->prog->len;
1455 
1456     /* Add entry function. */
1457     ret = add_subprog(env, 0);
1458     if (ret < 0) {
1459         return ret;
1460     }
1461 
1462     /* determine subprog starts. The end is one before the next starts */
1463     for (i = 0; i < insn_cnt; i++) {
1464         if (insn[i].code != (BPF_JMP | BPF_CALL)) {
1465             continue;
1466         }
1467         if (insn[i].src_reg != BPF_PSEUDO_CALL) {
1468             continue;
1469         }
1470         if (!env->bpf_capable) {
1471             verbose(env, "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1472             return -EPERM;
1473         }
1474         ret = add_subprog(env, i + insn[i].imm + 1);
1475         if (ret < 0) {
1476             return ret;
1477         }
1478     }
1479 
1480     /* Add a fake 'exit' subprog which could simplify subprog iteration
1481      * logic. 'subprog_cnt' should not be increased.
1482      */
1483     subprog[env->subprog_cnt].start = insn_cnt;
1484 
1485     if (env->log.level & BPF_LOG_LEVEL2) {
1486         for (i = 0; i < env->subprog_cnt; i++) {
1487             verbose(env, "func#%d @%d\n", i, subprog[i].start);
1488         }
1489     }
1490 
1491     /* now check that all jumps are within the same subprog */
1492     subprog_start = subprog[cur_subprog].start;
1493     subprog_end = subprog[cur_subprog + 1].start;
1494     for (i = 0; i < insn_cnt; i++) {
1495         u8 code = insn[i].code;
1496 
1497         if (code == (BPF_JMP | BPF_CALL) && insn[i].imm == BPF_FUNC_tail_call && insn[i].src_reg != BPF_PSEUDO_CALL) {
1498             subprog[cur_subprog].has_tail_call = true;
1499         }
1500         if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) {
1501             subprog[cur_subprog].has_ld_abs = true;
1502         }
1503         if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) {
1504             goto next;
1505         }
1506         if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) {
1507             goto next;
1508         }
1509         off = i + insn[i].off + 1;
1510         if (off < subprog_start || off >= subprog_end) {
1511             verbose(env, "jump out of range from insn %d to %d\n", i, off);
1512             return -EINVAL;
1513         }
1514     next:
1515         if (i == subprog_end - 1) {
1516             /* to avoid fall-through from one subprog into another
1517              * the last insn of the subprog should be either exit
1518              * or unconditional jump back
1519              */
1520             if (code != (BPF_JMP | BPF_EXIT) && code != (BPF_JMP | BPF_JA)) {
1521                 verbose(env, "last insn is not an exit or jmp\n");
1522                 return -EINVAL;
1523             }
1524             subprog_start = subprog_end;
1525             cur_subprog++;
1526             if (cur_subprog < env->subprog_cnt) {
1527                 subprog_end = subprog[cur_subprog + 1].start;
1528             }
1529         }
1530     }
1531     return 0;
1532 }
1533 
1534 /* Parentage chain of this register (or stack slot) should take care of all
1535  * issues like callee-saved registers, stack slot allocation time, etc.
1536  */
mark_reg_read(struct bpf_verifier_env *env, const struct bpf_reg_state *state, struct bpf_reg_state *parent, u8 flag)1537 static int mark_reg_read(struct bpf_verifier_env *env, const struct bpf_reg_state *state, struct bpf_reg_state *parent,
1538                          u8 flag)
1539 {
1540     bool writes = parent == state->parent; /* Observe write marks */
1541     int cnt = 0;
1542 
1543     while (parent) {
1544         /* if read wasn't screened by an earlier write ... */
1545         if (writes && (state->live & REG_LIVE_WRITTEN)) {
1546             break;
1547         }
1548         if (parent->live & REG_LIVE_DONE) {
1549             verbose(env, "verifier BUG type %s var_off %lld off %d\n", reg_type_str(env, parent->type),
1550                     parent->var_off.value, parent->off);
1551             return -EFAULT;
1552         }
1553         /* The first condition is more likely to be true than the
1554          * second, checked it first.
1555          */
1556         if ((parent->live & REG_LIVE_READ) == flag || (parent->live & REG_LIVE_READ64)) {
1557             /* The parentage chain never changes and
1558              * this parent was already marked as LIVE_READ.
1559              * There is no need to keep walking the chain again and
1560              * keep re-marking all parents as LIVE_READ.
1561              * This case happens when the same register is read
1562              * multiple times without writes into it in-between.
1563              * Also, if parent has the stronger REG_LIVE_READ64 set,
1564              * then no need to set the weak REG_LIVE_READ32.
1565              */
1566             break;
1567         }
1568         /* ... then we depend on parent's value */
1569         parent->live |= flag;
1570         /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1571         if (flag == REG_LIVE_READ64) {
1572             parent->live &= ~REG_LIVE_READ32;
1573         }
1574         state = parent;
1575         parent = state->parent;
1576         writes = true;
1577         cnt++;
1578     }
1579 
1580     if (env->longest_mark_read_walk < cnt) {
1581         env->longest_mark_read_walk = cnt;
1582     }
1583     return 0;
1584 }
1585 
1586 /* This function is supposed to be used by the following 32-bit optimization
1587  * code only. It returns TRUE if the source or destination register operates
1588  * on 64-bit, otherwise return FALSE.
1589  */
is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg, enum reg_arg_type t)1590 static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg,
1591                      enum reg_arg_type t)
1592 {
1593     u8 code, class, op;
1594 
1595     code = insn->code;
1596     class = BPF_CLASS(code);
1597     op = BPF_OP(code);
1598     if (class == BPF_JMP) {
1599         /* BPF_EXIT for "main" will reach here. Return TRUE
1600          * conservatively.
1601          */
1602         if (op == BPF_EXIT) {
1603             return true;
1604         }
1605         if (op == BPF_CALL) {
1606             /* BPF to BPF call will reach here because of marking
1607              * caller saved clobber with DST_OP_NO_MARK for which we
1608              * don't care the register def because they are anyway
1609              * marked as NOT_INIT already.
1610              */
1611             if (insn->src_reg == BPF_PSEUDO_CALL) {
1612                 return false;
1613             }
1614             /* Helper call will reach here because of arg type
1615              * check, conservatively return TRUE.
1616              */
1617             if (t == SRC_OP) {
1618                 return true;
1619             }
1620 
1621             return false;
1622         }
1623     }
1624 
1625     if (class == BPF_ALU64 || class == BPF_JMP ||
1626         /* BPF_END always use BPF_ALU class. */
1627         (class == BPF_ALU && op == BPF_END && insn->imm == VERIFIER_SIXTYFOUR)) {
1628         return true;
1629     }
1630 
1631     if (class == BPF_ALU || class == BPF_JMP32) {
1632         return false;
1633     }
1634 
1635     if (class == BPF_LDX) {
1636         if (t != SRC_OP) {
1637             return BPF_SIZE(code) == BPF_DW;
1638         }
1639         /* LDX source must be ptr. */
1640         return true;
1641     }
1642 
1643     if (class == BPF_STX) {
1644         if (reg->type != SCALAR_VALUE) {
1645             return true;
1646         }
1647         return BPF_SIZE(code) == BPF_DW;
1648     }
1649 
1650     if (class == BPF_LD) {
1651         u8 mode = BPF_MODE(code);
1652         /* LD_IMM64 */
1653         if (mode == BPF_IMM) {
1654             return true;
1655         }
1656 
1657         /* Both LD_IND and LD_ABS return 32-bit data. */
1658         if (t != SRC_OP) {
1659             return false;
1660         }
1661 
1662         /* Implicit ctx ptr. */
1663         if (regno == BPF_REG_6) {
1664             return true;
1665         }
1666 
1667         /* Explicit source could be any width. */
1668         return true;
1669     }
1670 
1671     if (class == BPF_ST) {
1672         /* The only source register for BPF_ST is a ptr. */
1673         return true;
1674     }
1675 
1676     /* Conservatively return true at default. */
1677     return true;
1678 }
1679 
1680 /* Return TRUE if INSN doesn't have explicit value define. */
insn_no_def(struct bpf_insn *insn)1681 static bool insn_no_def(struct bpf_insn *insn)
1682 {
1683     u8 class = BPF_CLASS(insn->code);
1684 
1685     return (class == BPF_JMP || class == BPF_JMP32 || class == BPF_STX || class == BPF_ST);
1686 }
1687 
1688 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)1689 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1690 {
1691     if (insn_no_def(insn)) {
1692         return false;
1693     }
1694 
1695     return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1696 }
1697 
mark_insn_zext(struct bpf_verifier_env *env, struct bpf_reg_state *reg)1698 static void mark_insn_zext(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1699 {
1700     s32 def_idx = reg->subreg_def;
1701 
1702     if (def_idx == DEF_NOT_SUBREG) {
1703         return;
1704     }
1705 
1706     env->insn_aux_data[def_idx - 1].zext_dst = true;
1707     /* The dst will be zero extended, so won't be sub-register anymore. */
1708     reg->subreg_def = DEF_NOT_SUBREG;
1709 }
1710 
check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t)1711 static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t)
1712 {
1713     struct bpf_verifier_state *vstate = env->cur_state;
1714     struct bpf_func_state *state = vstate->frame[vstate->curframe];
1715     struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1716     struct bpf_reg_state *reg, *regs = state->regs;
1717     bool rw64;
1718 
1719     if (regno >= MAX_BPF_REG) {
1720         verbose(env, "R%d is invalid\n", regno);
1721         return -EINVAL;
1722     }
1723 
1724     reg = &regs[regno];
1725     rw64 = is_reg64(env, insn, regno, reg, t);
1726     if (t == SRC_OP) {
1727         /* check whether register used as source operand can be read */
1728         if (reg->type == NOT_INIT) {
1729             verbose(env, "R%d !read_ok\n", regno);
1730             return -EACCES;
1731         }
1732         /* We don't need to worry about FP liveness because it's read-only */
1733         if (regno == BPF_REG_FP) {
1734             return 0;
1735         }
1736 
1737         if (rw64) {
1738             mark_insn_zext(env, reg);
1739         }
1740 
1741         return mark_reg_read(env, reg, reg->parent, rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1742     } else {
1743         /* check whether register used as dest operand can be written to */
1744         if (regno == BPF_REG_FP) {
1745             verbose(env, "frame pointer is read only\n");
1746             return -EACCES;
1747         }
1748         reg->live |= REG_LIVE_WRITTEN;
1749         reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1750         if (t == DST_OP) {
1751             mark_reg_unknown(env, regs, regno);
1752         }
1753     }
1754     return 0;
1755 }
1756 
1757 /* for any branch, call, exit record the history of jmps in the given state */
push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur)1758 static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur)
1759 {
1760     u32 cnt = cur->jmp_history_cnt;
1761     struct bpf_idx_pair *p;
1762 
1763     cnt++;
1764     p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1765     if (!p) {
1766         return -ENOMEM;
1767     }
1768     p[cnt - 1].idx = env->insn_idx;
1769     p[cnt - 1].prev_idx = env->prev_insn_idx;
1770     cur->jmp_history = p;
1771     cur->jmp_history_cnt = cnt;
1772     return 0;
1773 }
1774 
1775 /* Backtrack one insn at a time. If idx is not at the top of recorded
1776  * history then previous instruction came from straight line execution.
1777  */
get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history)1778 static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history)
1779 {
1780     u32 cnt = *history;
1781 
1782     if (cnt && st->jmp_history[cnt - 1].idx == i) {
1783         i = st->jmp_history[cnt - 1].prev_idx;
1784         (*history)--;
1785     } else {
1786         i--;
1787     }
1788     return i;
1789 }
1790 
1791 /* For given verifier state backtrack_insn() is called from the last insn to
1792  * the first insn. Its purpose is to compute a bitmask of registers and
1793  * stack slots that needs precision in the parent verifier state.
1794  */
backtrack_insn(struct bpf_verifier_env *env, int idx, u32 *reg_mask, u64 *stack_mask)1795 static int backtrack_insn(struct bpf_verifier_env *env, int idx, u32 *reg_mask, u64 *stack_mask)
1796 {
1797     const struct bpf_insn_cbs cbs = {
1798         .cb_print = verbose,
1799         .private_data = env,
1800     };
1801     struct bpf_insn *insn = env->prog->insnsi + idx;
1802     u8 class = BPF_CLASS(insn->code);
1803     u8 opcode = BPF_OP(insn->code);
1804     u8 mode = BPF_MODE(insn->code);
1805     u32 dreg = 1u << insn->dst_reg;
1806     u32 sreg = 1u << insn->src_reg;
1807     u32 spi;
1808 
1809     if (insn->code == 0) {
1810         return 0;
1811     }
1812     if (env->log.level & BPF_LOG_LEVEL) {
1813         verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1814         verbose(env, "%d: ", idx);
1815         print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1816     }
1817 
1818     if (class == BPF_ALU || class == BPF_ALU64) {
1819         if (!(*reg_mask & dreg)) {
1820             return 0;
1821         }
1822         if (opcode == BPF_MOV) {
1823             if (BPF_SRC(insn->code) == BPF_X) {
1824                 /* dreg = sreg
1825                  * dreg needs precision after this insn
1826                  * sreg needs precision before this insn
1827                  */
1828                 *reg_mask &= ~dreg;
1829                 *reg_mask |= sreg;
1830             } else {
1831                 /* dreg = K
1832                  * dreg needs precision after this insn.
1833                  * Corresponding register is already marked
1834                  * as precise=true in this verifier state.
1835                  * No further markings in parent are necessary
1836                  */
1837                 *reg_mask &= ~dreg;
1838             }
1839         } else {
1840             if (BPF_SRC(insn->code) == BPF_X) {
1841                 /* dreg += sreg
1842                  * both dreg and sreg need precision
1843                  * before this insn
1844                  */
1845                 *reg_mask |= sreg;
1846             }
1847             /* else dreg += K
1848              * dreg still needs precision before this insn
1849              */
1850         }
1851     } else if (class == BPF_LDX) {
1852         if (!(*reg_mask & dreg)) {
1853             return 0;
1854         }
1855         *reg_mask &= ~dreg;
1856 
1857         /* scalars can only be spilled into stack w/o losing precision.
1858          * Load from any other memory can be zero extended.
1859          * The desire to keep that precision is already indicated
1860          * by 'precise' mark in corresponding register of this state.
1861          * No further tracking necessary.
1862          */
1863         if (insn->src_reg != BPF_REG_FP) {
1864             return 0;
1865         }
1866         if (BPF_SIZE(insn->code) != BPF_DW) {
1867             return 0;
1868         }
1869 
1870         /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1871          * that [fp - off] slot contains scalar that needs to be
1872          * tracked with precision
1873          */
1874         spi = (-insn->off - 1) / BPF_REG_SIZE;
1875         if (spi >= VERIFIER_SIXTYFOUR) {
1876             verbose(env, "BUG spi %d\n", spi);
1877             WARN_ONCE(1, "verifier backtracking bug");
1878             return -EFAULT;
1879         }
1880         *stack_mask |= 1ull << spi;
1881     } else if (class == BPF_STX || class == BPF_ST) {
1882         if (*reg_mask & dreg) {
1883             /* stx & st shouldn't be using _scalar_ dst_reg
1884              * to access memory. It means backtracking
1885              * encountered a case of pointer subtraction.
1886              */
1887             return -ENOTSUPP;
1888         }
1889         /* scalars can only be spilled into stack */
1890         if (insn->dst_reg != BPF_REG_FP) {
1891             return 0;
1892         }
1893         if (BPF_SIZE(insn->code) != BPF_DW) {
1894             return 0;
1895         }
1896         spi = (-insn->off - 1) / BPF_REG_SIZE;
1897         if (spi >= VERIFIER_SIXTYFOUR) {
1898             verbose(env, "BUG spi %d\n", spi);
1899             WARN_ONCE(1, "verifier backtracking bug");
1900             return -EFAULT;
1901         }
1902         if (!(*stack_mask & (1ull << spi))) {
1903             return 0;
1904         }
1905         *stack_mask &= ~(1ull << spi);
1906         if (class == BPF_STX) {
1907             *reg_mask |= sreg;
1908         }
1909     } else if (class == BPF_JMP || class == BPF_JMP32) {
1910         if (opcode == BPF_CALL) {
1911             if (insn->src_reg == BPF_PSEUDO_CALL) {
1912                 return -ENOTSUPP;
1913             }
1914             /* regular helper call sets R0 */
1915             *reg_mask &= ~1;
1916             if (*reg_mask & 0x3f) {
1917                 /* if backtracing was looking for registers R1-R5
1918                  * they should have been found already.
1919                  */
1920                 verbose(env, "BUG regs %x\n", *reg_mask);
1921                 WARN_ONCE(1, "verifier backtracking bug");
1922                 return -EFAULT;
1923             }
1924         } else if (opcode == BPF_EXIT) {
1925             return -ENOTSUPP;
1926         }
1927     } else if (class == BPF_LD) {
1928         if (!(*reg_mask & dreg)) {
1929             return 0;
1930         }
1931         *reg_mask &= ~dreg;
1932         /* It's ld_imm64 or ld_abs or ld_ind.
1933          * For ld_imm64 no further tracking of precision
1934          * into parent is necessary
1935          */
1936         if (mode == BPF_IND || mode == BPF_ABS) {
1937             /* to be analyzed */
1938             return -ENOTSUPP;
1939         }
1940     }
1941     return 0;
1942 }
1943 
1944 /* the scalar precision tracking algorithm:
1945  * . at the start all registers have precise=false.
1946  * . scalar ranges are tracked as normal through alu and jmp insns.
1947  * . once precise value of the scalar register is used in:
1948  *   .  ptr + scalar alu
1949  *   . if (scalar cond K|scalar)
1950  *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1951  *   backtrack through the verifier states and mark all registers and
1952  *   stack slots with spilled constants that these scalar regisers
1953  *   should be precise.
1954  * . during state pruning two registers (or spilled stack slots)
1955  *   are equivalent if both are not precise.
1956  *
1957  * Note the verifier cannot simply walk register parentage chain,
1958  * since many different registers and stack slots could have been
1959  * used to compute single precise scalar.
1960  *
1961  * The approach of starting with precise=true for all registers and then
1962  * backtrack to mark a register as not precise when the verifier detects
1963  * that program doesn't care about specific value (e.g., when helper
1964  * takes register as ARG_ANYTHING parameter) is not safe.
1965  *
1966  * It's ok to walk single parentage chain of the verifier states.
1967  * It's possible that this backtracking will go all the way till 1st insn.
1968  * All other branches will be explored for needing precision later.
1969  *
1970  * The backtracking needs to deal with cases like:
1971  *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1972  * r9 -= r8
1973  * r5 = r9
1974  * if r5 > 0x79f goto pc+7
1975  *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1976  * r5 += 1
1977  * ...
1978  * call bpf_perf_event_output#25
1979  *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1980  *
1981  * and this case:
1982  * r6 = 1
1983  * call foo // uses callee's r6 inside to compute r0
1984  * r0 += r6
1985  * if r0 == 0 goto
1986  *
1987  * to track above reg_mask/stack_mask needs to be independent for each frame.
1988  *
1989  * Also if parent's curframe > frame where backtracking started,
1990  * the verifier need to mark registers in both frames, otherwise callees
1991  * may incorrectly prune callers. This is similar to
1992  * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1993  *
1994  * For now backtracking falls back into conservative marking.
1995  */
mark_all_scalars_precise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)1996 static void mark_all_scalars_precise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1997 {
1998     struct bpf_func_state *func;
1999     struct bpf_reg_state *reg;
2000     int i, j;
2001 
2002     /* big hammer: mark all scalars precise in this path.
2003      * pop_stack may still get !precise scalars.
2004      */
2005     for (; st; st = st->parent) {
2006         for (i = 0; i <= st->curframe; i++) {
2007             func = st->frame[i];
2008             for (j = 0; j < BPF_REG_FP; j++) {
2009                 reg = &func->regs[j];
2010                 if (reg->type != SCALAR_VALUE) {
2011                     continue;
2012                 }
2013                 reg->precise = true;
2014             }
2015             for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2016                 if (func->stack[j].slot_type[0] != STACK_SPILL) {
2017                     continue;
2018                 }
2019                 reg = &func->stack[j].spilled_ptr;
2020                 if (reg->type != SCALAR_VALUE) {
2021                     continue;
2022                 }
2023                 reg->precise = true;
2024             }
2025         }
2026     }
2027 }
2028 
__mark_chain_precision(struct bpf_verifier_env *env, int regno, int spi)2029 static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, int spi)
2030 {
2031     struct bpf_verifier_state *st = env->cur_state;
2032     int first_idx = st->first_insn_idx;
2033     int last_idx = env->insn_idx;
2034     struct bpf_func_state *func;
2035     struct bpf_reg_state *reg;
2036     u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2037     u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2038     bool skip_first = true;
2039     bool new_marks = false;
2040     int i, err;
2041 
2042     if (!env->bpf_capable) {
2043         return 0;
2044     }
2045 
2046     func = st->frame[st->curframe];
2047     if (regno >= 0) {
2048         reg = &func->regs[regno];
2049         if (reg->type != SCALAR_VALUE) {
2050             WARN_ONCE(1, "backtracing misuse");
2051             return -EFAULT;
2052         }
2053         if (!reg->precise) {
2054             new_marks = true;
2055         } else {
2056             reg_mask = 0;
2057         }
2058         reg->precise = true;
2059     }
2060 
2061     while (spi >= 0) {
2062         if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2063             stack_mask = 0;
2064             break;
2065         }
2066         reg = &func->stack[spi].spilled_ptr;
2067         if (reg->type != SCALAR_VALUE) {
2068             stack_mask = 0;
2069             break;
2070         }
2071         if (!reg->precise) {
2072             new_marks = true;
2073         } else {
2074             stack_mask = 0;
2075         }
2076         reg->precise = true;
2077         break;
2078     }
2079 
2080     if (!new_marks) {
2081         return 0;
2082     }
2083     if (!reg_mask && !stack_mask) {
2084         return 0;
2085     }
2086     for (;;) {
2087         DECLARE_BITMAP(mask, VERIFIER_SIXTYFOUR);
2088         u32 history = st->jmp_history_cnt;
2089 
2090         if (env->log.level & BPF_LOG_LEVEL) {
2091             verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2092         }
2093         for (i = last_idx;;) {
2094             if (skip_first) {
2095                 err = 0;
2096                 skip_first = false;
2097             } else {
2098                 err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2099             }
2100             if (err == -ENOTSUPP) {
2101                 mark_all_scalars_precise(env, st);
2102                 return 0;
2103             } else if (err) {
2104                 return err;
2105             }
2106             if (!reg_mask && !stack_mask) {
2107                 /* Found assignment(s) into tracked register in this state.
2108                  * Since this state is already marked, just return.
2109                  * Nothing to be tracked further in the parent state.
2110                  */
2111                 return 0;
2112             }
2113             if (i == first_idx) {
2114                 break;
2115             }
2116             i = get_prev_insn_idx(st, i, &history);
2117             if (i >= env->prog->len) {
2118                 /* This can happen if backtracking reached insn 0
2119                  * and there are still reg_mask or stack_mask
2120                  * to backtrack.
2121                  * It means the backtracking missed the spot where
2122                  * particular register was initialized with a constant.
2123                  */
2124                 verbose(env, "BUG backtracking idx %d\n", i);
2125                 WARN_ONCE(1, "verifier backtracking bug");
2126                 return -EFAULT;
2127             }
2128         }
2129         st = st->parent;
2130         if (!st) {
2131             break;
2132         }
2133 
2134         new_marks = false;
2135         func = st->frame[st->curframe];
2136         bitmap_from_u64(mask, reg_mask);
2137         for_each_set_bit(i, mask, 0x20)
2138         {
2139             reg = &func->regs[i];
2140             if (reg->type != SCALAR_VALUE) {
2141                 reg_mask &= ~(1u << i);
2142                 continue;
2143             }
2144             if (!reg->precise) {
2145                 new_marks = true;
2146             }
2147             reg->precise = true;
2148         }
2149 
2150         bitmap_from_u64(mask, stack_mask);
2151         for_each_set_bit(i, mask, VERIFIER_SIXTYFOUR)
2152         {
2153             if (i >= func->allocated_stack / BPF_REG_SIZE) {
2154                 /* the sequence of instructions:
2155                  * 2: (bf) r3 = r10
2156                  * 3: (7b) *(u64 *)(r3 -8) = r0
2157                  * 4: (79) r4 = *(u64 *)(r10 -8)
2158                  * doesn't contain jmps. It's backtracked
2159                  * as a single block.
2160                  * During backtracking insn 3 is not recognized as
2161                  * stack access, so at the end of backtracking
2162                  * stack slot fp-8 is still marked in stack_mask.
2163                  * However the parent state may not have accessed
2164                  * fp-8 and it's "unallocated" stack space.
2165                  * In such case fallback to conservative.
2166                  */
2167                 mark_all_scalars_precise(env, st);
2168                 return 0;
2169             }
2170 
2171             if (func->stack[i].slot_type[0] != STACK_SPILL) {
2172                 stack_mask &= ~(1ull << i);
2173                 continue;
2174             }
2175             reg = &func->stack[i].spilled_ptr;
2176             if (reg->type != SCALAR_VALUE) {
2177                 stack_mask &= ~(1ull << i);
2178                 continue;
2179             }
2180             if (!reg->precise) {
2181                 new_marks = true;
2182             }
2183             reg->precise = true;
2184         }
2185         if (env->log.level & BPF_LOG_LEVEL) {
2186             print_verifier_state(env, func);
2187             verbose(env, "parent %s regs=%x stack=%llx marks\n", new_marks ? "didn't have" : "already had", reg_mask,
2188                     stack_mask);
2189         }
2190 
2191         if (!reg_mask && !stack_mask) {
2192             break;
2193         }
2194         if (!new_marks) {
2195             break;
2196         }
2197 
2198         last_idx = st->last_insn_idx;
2199         first_idx = st->first_insn_idx;
2200     }
2201     return 0;
2202 }
2203 
mark_chain_precision(struct bpf_verifier_env *env, int regno)2204 static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2205 {
2206     return __mark_chain_precision(env, regno, -1);
2207 }
2208 
mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)2209 static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2210 {
2211     return __mark_chain_precision(env, -1, spi);
2212 }
2213 
is_spillable_regtype(enum bpf_reg_type type)2214 static bool is_spillable_regtype(enum bpf_reg_type type)
2215 {
2216     switch (base_type(type)) {
2217         case PTR_TO_MAP_VALUE:
2218         case PTR_TO_STACK:
2219         case PTR_TO_CTX:
2220         case PTR_TO_PACKET:
2221         case PTR_TO_PACKET_META:
2222         case PTR_TO_PACKET_END:
2223         case PTR_TO_FLOW_KEYS:
2224         case CONST_PTR_TO_MAP:
2225         case PTR_TO_SOCKET:
2226         case PTR_TO_SOCK_COMMON:
2227         case PTR_TO_TCP_SOCK:
2228         case PTR_TO_XDP_SOCK:
2229         case PTR_TO_BTF_ID:
2230         case PTR_TO_BUF:
2231         case PTR_TO_PERCPU_BTF_ID:
2232         case PTR_TO_MEM:
2233             return true;
2234         default:
2235             return false;
2236     }
2237 }
2238 
2239 /* Does this register contain a constant zero? */
register_is_null(struct bpf_reg_state *reg)2240 static bool register_is_null(struct bpf_reg_state *reg)
2241 {
2242     return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2243 }
2244 
register_is_const(struct bpf_reg_state *reg)2245 static bool register_is_const(struct bpf_reg_state *reg)
2246 {
2247     return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2248 }
2249 
__is_scalar_unbounded(struct bpf_reg_state *reg)2250 static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2251 {
2252     return tnum_is_unknown(reg->var_off) && reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2253            reg->umin_value == 0 && reg->umax_value == U64_MAX && reg->s32_min_value == S32_MIN &&
2254            reg->s32_max_value == S32_MAX && reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2255 }
2256 
register_is_bounded(struct bpf_reg_state *reg)2257 static bool register_is_bounded(struct bpf_reg_state *reg)
2258 {
2259     return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2260 }
2261 
__is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg)2262 static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg)
2263 {
2264     if (allow_ptr_leaks) {
2265         return false;
2266     }
2267 
2268     return reg->type != SCALAR_VALUE;
2269 }
2270 
save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg)2271 static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg)
2272 {
2273     int i;
2274 
2275     state->stack[spi].spilled_ptr = *reg;
2276     state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2277 
2278     for (i = 0; i < BPF_REG_SIZE; i++) {
2279         state->stack[spi].slot_type[i] = STACK_SPILL;
2280     }
2281 }
2282 
2283 /* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2284  * stack boundary and alignment are checked in check_mem_access()
2285  */
check_stack_write_fixed_off(struct bpf_verifier_env *env, struct bpf_func_state *state, int off, int size, int value_regno, int insn_idx)2286 static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2287                                        /* stack frame we're writing to */
2288                                        struct bpf_func_state *state, int off, int size, int value_regno, int insn_idx)
2289 {
2290     struct bpf_func_state *cur; /* state of the current function */
2291     int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2292     u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2293     struct bpf_reg_state *reg = NULL;
2294 
2295     err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), state->acquired_refs, true);
2296     if (err) {
2297         return err;
2298     }
2299     /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2300      * so it's aligned access and [off, off + size) are within stack limits
2301      */
2302     if (!env->allow_ptr_leaks && state->stack[spi].slot_type[0] == STACK_SPILL && size != BPF_REG_SIZE) {
2303         verbose(env, "attempt to corrupt spilled pointer on stack\n");
2304         return -EACCES;
2305     }
2306 
2307     cur = env->cur_state->frame[env->cur_state->curframe];
2308     if (value_regno >= 0) {
2309         reg = &cur->regs[value_regno];
2310     }
2311     if (!env->bypass_spec_v4) {
2312         bool sanitize = reg && is_spillable_regtype(reg->type);
2313 
2314         for (i = 0; i < size; i++) {
2315             if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2316                 sanitize = true;
2317                 break;
2318             }
2319         }
2320 
2321         if (sanitize) {
2322             env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2323         }
2324     }
2325 
2326     if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) {
2327         if (dst_reg != BPF_REG_FP) {
2328             /* The backtracking logic can only recognize explicit
2329              * stack slot address like [fp - 8]. Other spill of
2330              * scalar via different register has to be conervative.
2331              * Backtrack from here and mark all registers as precise
2332              * that contributed into 'reg' being a constant.
2333              */
2334             err = mark_chain_precision(env, value_regno);
2335             if (err) {
2336                 return err;
2337             }
2338         }
2339         save_register_state(state, spi, reg);
2340     } else if (reg && is_spillable_regtype(reg->type)) {
2341         /* register containing pointer is being spilled into stack */
2342         if (size != BPF_REG_SIZE) {
2343             verbose_linfo(env, insn_idx, "; ");
2344             verbose(env, "invalid size of register spill\n");
2345             return -EACCES;
2346         }
2347         if (state != cur && reg->type == PTR_TO_STACK) {
2348             verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2349             return -EINVAL;
2350         }
2351         save_register_state(state, spi, reg);
2352     } else {
2353         u8 type = STACK_MISC;
2354 
2355         /* regular write of data into stack destroys any spilled ptr */
2356         state->stack[spi].spilled_ptr.type = NOT_INIT;
2357         /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2358         if (state->stack[spi].slot_type[0] == STACK_SPILL) {
2359             for (i = 0; i < BPF_REG_SIZE; i++) {
2360                 state->stack[spi].slot_type[i] = STACK_MISC;
2361             }
2362         }
2363 
2364         /* only mark the slot as written if all 8 bytes were written
2365          * otherwise read propagation may incorrectly stop too soon
2366          * when stack slots are partially written.
2367          * This heuristic means that read propagation will be
2368          * conservative, since it will add reg_live_read marks
2369          * to stack slots all the way to first state when programs
2370          * writes+reads less than 8 bytes
2371          */
2372         if (size == BPF_REG_SIZE) {
2373             state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2374         }
2375 
2376         /* when we zero initialize stack slots mark them as such */
2377         if (reg && register_is_null(reg)) {
2378             /* backtracking doesn't work for STACK_ZERO yet. */
2379             err = mark_chain_precision(env, value_regno);
2380             if (err) {
2381                 return err;
2382             }
2383             type = STACK_ZERO;
2384         }
2385 
2386         /* Mark slots affected by this stack write. */
2387         for (i = 0; i < size; i++) {
2388             state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
2389         }
2390     }
2391     return 0;
2392 }
2393 
2394 /* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2395  * known to contain a variable offset.
2396  * This function checks whether the write is permitted and conservatively
2397  * tracks the effects of the write, considering that each stack slot in the
2398  * dynamic range is potentially written to.
2399  *
2400  * 'off' includes 'regno->off'.
2401  * 'value_regno' can be -1, meaning that an unknown value is being written to
2402  * the stack.
2403  *
2404  * Spilled pointers in range are not marked as written because we don't know
2405  * what's going to be actually written. This means that read propagation for
2406  * future reads cannot be terminated by this write.
2407  *
2408  * For privileged programs, uninitialized stack slots are considered
2409  * initialized by this write (even though we don't know exactly what offsets
2410  * are going to be written to). The idea is that we don't want the verifier to
2411  * reject future reads that access slots written to through variable offsets.
2412  */
check_stack_write_var_off(struct bpf_verifier_env *env, struct bpf_func_state *state, int ptr_regno, int off, int size, int value_regno, int insn_idx)2413 static int check_stack_write_var_off(struct bpf_verifier_env *env,
2414                                      /* func where register points to */
2415                                      struct bpf_func_state *state, int ptr_regno, int off, int size, int value_regno,
2416                                      int insn_idx)
2417 {
2418     struct bpf_func_state *cur; /* state of the current function */
2419     int min_off, max_off;
2420     int i, err;
2421     struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2422     bool writing_zero = false;
2423     /* set if the fact that we're writing a zero is used to let any
2424      * stack slots remain STACK_ZERO
2425      */
2426     bool zero_used = false;
2427 
2428     cur = env->cur_state->frame[env->cur_state->curframe];
2429     ptr_reg = &cur->regs[ptr_regno];
2430     min_off = ptr_reg->smin_value + off;
2431     max_off = ptr_reg->smax_value + off + size;
2432     if (value_regno >= 0) {
2433         value_reg = &cur->regs[value_regno];
2434     }
2435     if (value_reg && register_is_null(value_reg)) {
2436         writing_zero = true;
2437     }
2438 
2439     err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), state->acquired_refs, true);
2440     if (err) {
2441         return err;
2442     }
2443 
2444     /* Variable offset writes destroy any spilled pointers in range. */
2445     for (i = min_off; i < max_off; i++) {
2446         u8 new_type, *stype;
2447         int slot, spi;
2448 
2449         slot = -i - 1;
2450         spi = slot / BPF_REG_SIZE;
2451         stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2452 
2453         if (!env->allow_ptr_leaks && *stype != NOT_INIT && *stype != SCALAR_VALUE) {
2454             /* Reject the write if there's are spilled pointers in
2455              * range. If we didn't reject here, the ptr status
2456              * would be erased below (even though not all slots are
2457              * actually overwritten), possibly opening the door to
2458              * leaks.
2459              */
2460             verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", insn_idx, i);
2461             return -EINVAL;
2462         }
2463 
2464         /* Erase all spilled pointers. */
2465         state->stack[spi].spilled_ptr.type = NOT_INIT;
2466 
2467         /* Update the slot type. */
2468         new_type = STACK_MISC;
2469         if (writing_zero && *stype == STACK_ZERO) {
2470             new_type = STACK_ZERO;
2471             zero_used = true;
2472         }
2473         /* If the slot is STACK_INVALID, we check whether it's OK to
2474          * pretend that it will be initialized by this write. The slot
2475          * might not actually be written to, and so if we mark it as
2476          * initialized future reads might leak uninitialized memory.
2477          * For privileged programs, we will accept such reads to slots
2478          * that may or may not be written because, if we're reject
2479          * them, the error would be too confusing.
2480          */
2481         if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2482             verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", insn_idx,
2483                     i);
2484             return -EINVAL;
2485         }
2486         *stype = new_type;
2487     }
2488     if (zero_used) {
2489         /* backtracking doesn't work for STACK_ZERO yet. */
2490         err = mark_chain_precision(env, value_regno);
2491         if (err) {
2492             return err;
2493         }
2494     }
2495     return 0;
2496 }
2497 
2498 /* When register 'dst_regno' is assigned some values from stack[min_off,
2499  * max_off), we set the register's type according to the types of the
2500  * respective stack slots. If all the stack values are known to be zeros, then
2501  * so is the destination reg. Otherwise, the register is considered to be
2502  * SCALAR. This function does not deal with register filling; the caller must
2503  * ensure that all spilled registers in the stack range have been marked as
2504  * read.
2505  */
mark_reg_stack_read(struct bpf_verifier_env *env, struct bpf_func_state *ptr_state, int min_off, int max_off, int dst_regno)2506 static void mark_reg_stack_read(struct bpf_verifier_env *env,
2507                                 /* func where src register points to */
2508                                 struct bpf_func_state *ptr_state, int min_off, int max_off, int dst_regno)
2509 {
2510     struct bpf_verifier_state *vstate = env->cur_state;
2511     struct bpf_func_state *state = vstate->frame[vstate->curframe];
2512     int i, slot, spi;
2513     u8 *stype;
2514     int zeros = 0;
2515 
2516     for (i = min_off; i < max_off; i++) {
2517         slot = -i - 1;
2518         spi = slot / BPF_REG_SIZE;
2519         stype = ptr_state->stack[spi].slot_type;
2520         if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) {
2521             break;
2522         }
2523         zeros++;
2524     }
2525     if (zeros == max_off - min_off) {
2526         /* any access_size read into register is zero extended,
2527          * so the whole register == const_zero
2528          */
2529         verifier_mark_reg_const_zero(&state->regs[dst_regno]);
2530         /* backtracking doesn't support STACK_ZERO yet,
2531          * so mark it precise here, so that later
2532          * backtracking can stop here.
2533          * Backtracking may not need this if this register
2534          * doesn't participate in pointer adjustment.
2535          * Forward propagation of precise flag is not
2536          * necessary either. This mark is only to stop
2537          * backtracking. Any register that contributed
2538          * to const 0 was marked precise before spill.
2539          */
2540         state->regs[dst_regno].precise = true;
2541     } else {
2542         /* have read misc data from the stack */
2543         mark_reg_unknown(env, state->regs, dst_regno);
2544     }
2545     state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2546 }
2547 
2548 /* Read the stack at 'off' and put the results into the register indicated by
2549  * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2550  * spilled reg.
2551  *
2552  * 'dst_regno' can be -1, meaning that the read value is not going to a
2553  * register.
2554  *
2555  * The access is assumed to be within the current stack bounds.
2556  */
check_stack_read_fixed_off(struct bpf_verifier_env *env, struct bpf_func_state *reg_state, int off, int size, int dst_regno)2557 static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2558                                       /* func where src register points to */
2559                                       struct bpf_func_state *reg_state, int off, int size, int dst_regno)
2560 {
2561     struct bpf_verifier_state *vstate = env->cur_state;
2562     struct bpf_func_state *state = vstate->frame[vstate->curframe];
2563     int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2564     struct bpf_reg_state *reg;
2565     u8 *stype;
2566 
2567     stype = reg_state->stack[spi].slot_type;
2568     reg = &reg_state->stack[spi].spilled_ptr;
2569 
2570     if (stype[0] == STACK_SPILL) {
2571         if (size != BPF_REG_SIZE) {
2572             if (reg->type != SCALAR_VALUE) {
2573                 verbose_linfo(env, env->insn_idx, "; ");
2574                 verbose(env, "invalid size of register fill\n");
2575                 return -EACCES;
2576             }
2577             if (dst_regno >= 0) {
2578                 mark_reg_unknown(env, state->regs, dst_regno);
2579                 state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2580             }
2581             mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2582             return 0;
2583         }
2584         for (i = 1; i < BPF_REG_SIZE; i++) {
2585             if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2586                 verbose(env, "corrupted spill memory\n");
2587                 return -EACCES;
2588             }
2589         }
2590 
2591         if (dst_regno >= 0) {
2592             /* restore register state from stack */
2593             state->regs[dst_regno] = *reg;
2594             /* mark reg as written since spilled pointer state likely
2595              * has its liveness marks cleared by is_state_visited()
2596              * which resets stack/reg liveness for state transitions
2597              */
2598             state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2599         } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2600             /* If dst_regno==-1, the caller is asking us whether
2601              * it is acceptable to use this value as a SCALAR_VALUE
2602              * (e.g. for XADD).
2603              * We must not allow unprivileged callers to do that
2604              * with spilled pointers.
2605              */
2606             verbose(env, "leaking pointer from stack off %d\n", off);
2607             return -EACCES;
2608         }
2609         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2610     } else {
2611         u8 type;
2612 
2613         for (i = 0; i < size; i++) {
2614             type = stype[(slot - i) % BPF_REG_SIZE];
2615             if (type == STACK_MISC) {
2616                 continue;
2617             }
2618             if (type == STACK_ZERO) {
2619                 continue;
2620             }
2621             verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size);
2622             return -EACCES;
2623         }
2624         mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2625         if (dst_regno >= 0) {
2626             mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
2627         }
2628     }
2629     return 0;
2630 }
2631 
2632 enum stack_access_src {
2633     ACCESS_DIRECT = 1, /* the access is performed by an instruction */
2634     ACCESS_HELPER = 2, /* the access is performed by a helper */
2635 };
2636 
2637 static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size,
2638                                          bool zero_size_allowed, enum stack_access_src type,
2639                                          struct bpf_call_arg_meta *meta);
2640 
reg_state(struct bpf_verifier_env *env, int regno)2641 static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2642 {
2643     return cur_regs(env) + regno;
2644 }
2645 
2646 /* Read the stack at 'ptr_regno + off' and put the result into the register
2647  * 'dst_regno'.
2648  * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2649  * but not its variable offset.
2650  * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2651  *
2652  * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2653  * filling registers (i.e. reads of spilled register cannot be detected when
2654  * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2655  * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2656  * offset; for a fixed offset check_stack_read_fixed_off should be used
2657  * instead.
2658  */
check_stack_read_var_off(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)2659 static int check_stack_read_var_off(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)
2660 {
2661     /* The state of the source register. */
2662     struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2663     struct bpf_func_state *ptr_state = func(env, reg);
2664     int err;
2665     int min_off, max_off;
2666 
2667     /* Note that we pass a NULL meta, so raw access will not be permitted.
2668      */
2669     err = check_stack_range_initialized(env, ptr_regno, off, size, false, ACCESS_DIRECT, NULL);
2670     if (err) {
2671         return err;
2672     }
2673 
2674     min_off = reg->smin_value + off;
2675     max_off = reg->smax_value + off;
2676     mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2677     return 0;
2678 }
2679 
2680 /* check_stack_read dispatches to check_stack_read_fixed_off or
2681  * check_stack_read_var_off.
2682  *
2683  * The caller must ensure that the offset falls within the allocated stack
2684  * bounds.
2685  *
2686  * 'dst_regno' is a register which will receive the value from the stack. It
2687  * can be -1, meaning that the read value is not going to a register.
2688  */
check_stack_read(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)2689 static int check_stack_read(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)
2690 {
2691     struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2692     struct bpf_func_state *state = func(env, reg);
2693     int err;
2694     /* Some accesses are only permitted with a static offset. */
2695     bool var_off = !tnum_is_const(reg->var_off);
2696     /* The offset is required to be static when reads don't go to a
2697      * register, in order to not leak pointers (see
2698      * check_stack_read_fixed_off).
2699      */
2700     if (dst_regno < 0 && var_off) {
2701         char tn_buf[48];
2702 
2703         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2704         verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
2705                 tn_buf, off, size);
2706         return -EACCES;
2707     }
2708     /* Variable offset is prohibited for unprivileged mode for simplicity
2709      * since it requires corresponding support in Spectre masking for stack
2710      * ALU. See also retrieve_ptr_limit().
2711      */
2712     if (!env->bypass_spec_v1 && var_off) {
2713         char tn_buf[48];
2714 
2715         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2716         verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", ptr_regno, tn_buf);
2717         return -EACCES;
2718     }
2719 
2720     if (!var_off) {
2721         off += reg->var_off.value;
2722         err = check_stack_read_fixed_off(env, state, off, size, dst_regno);
2723     } else {
2724         /* Variable offset stack reads need more conservative handling
2725          * than fixed offset ones. Note that dst_regno >= 0 on this
2726          * branch.
2727          */
2728         err = check_stack_read_var_off(env, ptr_regno, off, size, dst_regno);
2729     }
2730     return err;
2731 }
2732 
2733 /* check_stack_write dispatches to check_stack_write_fixed_off or
2734  * check_stack_write_var_off.
2735  *
2736  * 'ptr_regno' is the register used as a pointer into the stack.
2737  * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2738  * 'value_regno' is the register whose value we're writing to the stack. It can
2739  * be -1, meaning that we're not writing from a register.
2740  *
2741  * The caller must ensure that the offset falls within the maximum stack size.
2742  */
check_stack_write(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int value_regno, int insn_idx)2743 static int check_stack_write(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int value_regno,
2744                              int insn_idx)
2745 {
2746     struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2747     struct bpf_func_state *state = func(env, reg);
2748     int err;
2749 
2750     if (tnum_is_const(reg->var_off)) {
2751         off += reg->var_off.value;
2752         err = check_stack_write_fixed_off(env, state, off, size, value_regno, insn_idx);
2753     } else {
2754         /* Variable offset stack reads need more conservative handling
2755          * than fixed offset ones.
2756          */
2757         err = check_stack_write_var_off(env, state, ptr_regno, off, size, value_regno, insn_idx);
2758     }
2759     return err;
2760 }
2761 
check_map_access_type(struct bpf_verifier_env *env, u32 regno, int off, int size, enum bpf_access_type type)2762 static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, int off, int size, enum bpf_access_type type)
2763 {
2764     struct bpf_reg_state *regs = cur_regs(env);
2765     struct bpf_map *map = regs[regno].map_ptr;
2766     u32 cap = bpf_map_flags_to_cap(map);
2767     if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2768         verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", map->value_size, off, size);
2769         return -EACCES;
2770     }
2771     if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2772         verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", map->value_size, off, size);
2773         return -EACCES;
2774     }
2775 
2776     return 0;
2777 }
2778 
2779 /* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
__check_mem_access(struct bpf_verifier_env *env, int regno, int off, int size, u32 mem_size, bool zero_size_allowed)2780 static int __check_mem_access(struct bpf_verifier_env *env, int regno, int off, int size, u32 mem_size,
2781                               bool zero_size_allowed)
2782 {
2783     bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2784     struct bpf_reg_state *reg;
2785 
2786     if (off >= 0 && size_ok && (u64)off + size <= mem_size) {
2787         return 0;
2788     }
2789 
2790     reg = &cur_regs(env)[regno];
2791     switch (reg->type) {
2792         case PTR_TO_MAP_VALUE:
2793             verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size);
2794             break;
2795         case PTR_TO_PACKET:
2796         case PTR_TO_PACKET_META:
2797         case PTR_TO_PACKET_END:
2798             verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno,
2799                     reg->id, off, mem_size);
2800             break;
2801         case PTR_TO_MEM:
2802         default:
2803             verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", mem_size, off, size);
2804     }
2805 
2806     return -EACCES;
2807 }
2808 
2809 /* check read/write into a memory region with possible variable offset */
check_mem_region_access(struct bpf_verifier_env *env, u32 regno, int off, int size, u32 mem_size, bool zero_size_allowed)2810 static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, int off, int size, u32 mem_size,
2811                                    bool zero_size_allowed)
2812 {
2813     struct bpf_verifier_state *vstate = env->cur_state;
2814     struct bpf_func_state *state = vstate->frame[vstate->curframe];
2815     struct bpf_reg_state *reg = &state->regs[regno];
2816     int err;
2817 
2818     /* We may have adjusted the register pointing to memory region, so we
2819      * need to try adding each of min_value and max_value to off
2820      * to make sure our theoretical access will be safe.
2821      */
2822     if (env->log.level & BPF_LOG_LEVEL) {
2823         print_verifier_state(env, state);
2824     }
2825 
2826     /* The minimum value is only important with signed
2827      * comparisons where we can't assume the floor of a
2828      * value is 0.  If we are using signed variables for our
2829      * index'es we need to make sure that whatever we use
2830      * will have a set floor within our range.
2831      */
2832     if (reg->smin_value < 0 &&
2833         (reg->smin_value == S64_MIN || (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2834          reg->smin_value + off < 0)) {
2835         verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
2836         return -EACCES;
2837     }
2838     err = __check_mem_access(env, regno, reg->smin_value + off, size, mem_size, zero_size_allowed);
2839     if (err) {
2840         verbose(env, "R%d min value is outside of the allowed memory range\n", regno);
2841         return err;
2842     }
2843 
2844     /* If we haven't set a max value then we need to bail since we can't be
2845      * sure we won't do bad things.
2846      * If reg->umax_value + off could overflow, treat that as unbounded too.
2847      */
2848     if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2849         verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", regno);
2850         return -EACCES;
2851     }
2852     err = __check_mem_access(env, regno, reg->umax_value + off, size, mem_size, zero_size_allowed);
2853     if (err) {
2854         verbose(env, "R%d max value is outside of the allowed memory range\n", regno);
2855         return err;
2856     }
2857 
2858     return 0;
2859 }
2860 
2861 /* check read/write into a map element with possible variable offset */
check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)2862 static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)
2863 {
2864     struct bpf_verifier_state *vstate = env->cur_state;
2865     struct bpf_func_state *state = vstate->frame[vstate->curframe];
2866     struct bpf_reg_state *reg = &state->regs[regno];
2867     struct bpf_map *map = reg->map_ptr;
2868     int err;
2869 
2870     err = check_mem_region_access(env, regno, off, size, map->value_size, zero_size_allowed);
2871     if (err) {
2872         return err;
2873     }
2874 
2875     if (map_value_has_spin_lock(map)) {
2876         u32 lock = map->spin_lock_off;
2877 
2878         /* if any part of struct bpf_spin_lock can be touched by
2879          * load/store reject this program.
2880          * To check that [x1, x2) overlaps with [y1, y2)
2881          * it is sufficient to check x1 < y2 && y1 < x2.
2882          */
2883         if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && lock < reg->umax_value + off + size) {
2884             verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2885             return -EACCES;
2886         }
2887     }
2888     return err;
2889 }
2890 
2891 #define MAX_PACKET_OFF 0xffff
2892 
resolve_prog_type(struct bpf_prog *prog)2893 static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2894 {
2895     return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2896 }
2897 
may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t)2898 static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta,
2899                                        enum bpf_access_type t)
2900 {
2901     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2902 
2903     switch (prog_type) {
2904         /* Program types only with direct read access go here! */
2905         case BPF_PROG_TYPE_LWT_IN:
2906         case BPF_PROG_TYPE_LWT_OUT:
2907         case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2908         case BPF_PROG_TYPE_SK_REUSEPORT:
2909         case BPF_PROG_TYPE_FLOW_DISSECTOR:
2910         case BPF_PROG_TYPE_CGROUP_SKB:
2911             if (t == BPF_WRITE) {
2912                 return false;
2913             }
2914             fallthrough;
2915 
2916         /* Program types with direct read + write access go here! */
2917         case BPF_PROG_TYPE_SCHED_CLS:
2918         case BPF_PROG_TYPE_SCHED_ACT:
2919         case BPF_PROG_TYPE_XDP:
2920         case BPF_PROG_TYPE_LWT_XMIT:
2921         case BPF_PROG_TYPE_SK_SKB:
2922         case BPF_PROG_TYPE_SK_MSG:
2923             if (meta) {
2924                 return meta->pkt_access;
2925             }
2926 
2927             env->seen_direct_write = true;
2928             return true;
2929 
2930         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2931             if (t == BPF_WRITE) {
2932                 env->seen_direct_write = true;
2933             }
2934 
2935             return true;
2936 
2937         default:
2938             return false;
2939     }
2940 }
2941 
check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)2942 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)
2943 {
2944     struct bpf_reg_state *regs = cur_regs(env);
2945     struct bpf_reg_state *reg = &regs[regno];
2946     int err;
2947 
2948     /* We may have added a variable offset to the packet pointer; but any
2949      * reg->range we have comes after that.  We are only checking the fixed
2950      * offset.
2951      */
2952 
2953     /* We don't allow negative numbers, because we aren't tracking enough
2954      * detail to prove they're safe.
2955      */
2956     if (reg->smin_value < 0) {
2957         verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
2958         return -EACCES;
2959     }
2960     err = __check_mem_access(env, regno, off, size, reg->range, zero_size_allowed);
2961     if (err) {
2962         verbose(env, "R%d offset is outside of the packet\n", regno);
2963         return err;
2964     }
2965 
2966     /* __check_mem_access has made sure "off + size - 1" is within u16.
2967      * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2968      * otherwise find_good_pkt_pointers would have refused to set range info
2969      * that __check_mem_access would have rejected this pkt access.
2970      * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2971      */
2972     env->prog->aux->max_pkt_offset = max_t(u32, env->prog->aux->max_pkt_offset, off + reg->umax_value + size - 1);
2973 
2974     return err;
2975 }
2976 
2977 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t, enum bpf_reg_type *reg_type, u32 *btf_id)2978 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t,
2979                             enum bpf_reg_type *reg_type, u32 *btf_id)
2980 {
2981     struct bpf_insn_access_aux info = {
2982         .reg_type = *reg_type,
2983         .log = &env->log,
2984     };
2985 
2986     if (env->ops->is_valid_access && env->ops->is_valid_access(off, size, t, env->prog, &info)) {
2987         /* A non zero info.ctx_field_size indicates that this field is a
2988          * candidate for later verifier transformation to load the whole
2989          * field and then apply a mask when accessed with a narrower
2990          * access than actual ctx access size. A zero info.ctx_field_size
2991          * will only allow for whole field access and rejects any other
2992          * type of narrower access.
2993          */
2994         *reg_type = info.reg_type;
2995 
2996         if (base_type(*reg_type) == PTR_TO_BTF_ID) {
2997             *btf_id = info.btf_id;
2998         } else {
2999             env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3000         }
3001         /* remember the offset of last byte accessed in ctx */
3002         if (env->prog->aux->max_ctx_offset < off + size) {
3003             env->prog->aux->max_ctx_offset = off + size;
3004         }
3005         return 0;
3006     }
3007 
3008     verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3009     return -EACCES;
3010 }
3011 
check_flow_keys_access(struct bpf_verifier_env *env, int off, int size)3012 static int check_flow_keys_access(struct bpf_verifier_env *env, int off, int size)
3013 {
3014     if (size < 0 || off < 0 || (u64)off + size > sizeof(struct bpf_flow_keys)) {
3015         verbose(env, "invalid access to flow keys off=%d size=%d\n", off, size);
3016         return -EACCES;
3017     }
3018     return 0;
3019 }
3020 
check_sock_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int size, enum bpf_access_type t)3021 static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int size,
3022                              enum bpf_access_type t)
3023 {
3024     struct bpf_reg_state *regs = cur_regs(env);
3025     struct bpf_reg_state *reg = &regs[regno];
3026     struct bpf_insn_access_aux info = {};
3027     bool valid;
3028 
3029     if (reg->smin_value < 0) {
3030         verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
3031         return -EACCES;
3032     }
3033 
3034     switch (reg->type) {
3035         case PTR_TO_SOCK_COMMON:
3036             valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3037             break;
3038         case PTR_TO_SOCKET:
3039             valid = bpf_sock_is_valid_access(off, size, t, &info);
3040             break;
3041         case PTR_TO_TCP_SOCK:
3042             valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3043             break;
3044         case PTR_TO_XDP_SOCK:
3045             valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3046             break;
3047         default:
3048             valid = false;
3049     }
3050 
3051     if (valid) {
3052         env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3053         return 0;
3054     }
3055 
3056     verbose(env, "R%d invalid %s access off=%d size=%d\n", regno, reg_type_str(env, reg->type), off, size);
3057 
3058     return -EACCES;
3059 }
3060 
is_pointer_value(struct bpf_verifier_env *env, int regno)3061 static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3062 {
3063     return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3064 }
3065 
is_ctx_reg(struct bpf_verifier_env *env, int regno)3066 static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3067 {
3068     const struct bpf_reg_state *reg = reg_state(env, regno);
3069 
3070     return reg->type == PTR_TO_CTX;
3071 }
3072 
is_sk_reg(struct bpf_verifier_env *env, int regno)3073 static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3074 {
3075     const struct bpf_reg_state *reg = reg_state(env, regno);
3076 
3077     return type_is_sk_pointer(reg->type);
3078 }
3079 
is_pkt_reg(struct bpf_verifier_env *env, int regno)3080 static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3081 {
3082     const struct bpf_reg_state *reg = reg_state(env, regno);
3083 
3084     return type_is_pkt_pointer(reg->type);
3085 }
3086 
is_flow_key_reg(struct bpf_verifier_env *env, int regno)3087 static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3088 {
3089     const struct bpf_reg_state *reg = reg_state(env, regno);
3090 
3091     /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3092     return reg->type == PTR_TO_FLOW_KEYS;
3093 }
3094 
check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict)3095 static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size,
3096                                    bool strict)
3097 {
3098     struct tnum reg_off;
3099     int ip_align;
3100 
3101     /* Byte size accesses are always allowed. */
3102     if (!strict || size == 1) {
3103         return 0;
3104     }
3105 
3106     /* For platforms that do not have a Kconfig enabling
3107      * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3108      * NET_IP_ALIGN is universally set to '2'.  And on platforms
3109      * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3110      * to this code only in strict mode where we want to emulate
3111      * the NET_IP_ALIGN==2 checking.  Therefore use an
3112      * unconditional IP align value of '2'.
3113      */
3114     ip_align = 2;
3115 
3116     reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3117     if (!tnum_is_aligned(reg_off, size)) {
3118         char tn_buf[48];
3119 
3120         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3121         verbose(env, "misaligned packet access off %d+%s+%d+%d size %d\n", ip_align, tn_buf, reg->off, off, size);
3122         return -EACCES;
3123     }
3124 
3125     return 0;
3126 }
3127 
check_generic_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, const char *pointer_desc, int off, int size, bool strict)3128 static int check_generic_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg,
3129                                        const char *pointer_desc, int off, int size, bool strict)
3130 {
3131     struct tnum reg_off;
3132 
3133     /* Byte size accesses are always allowed. */
3134     if (!strict || size == 1) {
3135         return 0;
3136     }
3137 
3138     reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3139     if (!tnum_is_aligned(reg_off, size)) {
3140         char tn_buf[48];
3141 
3142         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3143         verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", pointer_desc, tn_buf, reg->off, off, size);
3144         return -EACCES;
3145     }
3146 
3147     return 0;
3148 }
3149 
check_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size, bool strict_alignment_once)3150 static int check_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size,
3151                                bool strict_alignment_once)
3152 {
3153     bool strict = env->strict_alignment || strict_alignment_once;
3154     const char *pointer_desc = "";
3155 
3156     switch (reg->type) {
3157         case PTR_TO_PACKET:
3158         case PTR_TO_PACKET_META:
3159             /* Special case, because of NET_IP_ALIGN. Given metadata sits
3160              * right in front, treat it the very same way.
3161              */
3162             return check_pkt_ptr_alignment(env, reg, off, size, strict);
3163         case PTR_TO_FLOW_KEYS:
3164             pointer_desc = "flow keys ";
3165             break;
3166         case PTR_TO_MAP_VALUE:
3167             pointer_desc = "value ";
3168             break;
3169         case PTR_TO_CTX:
3170             pointer_desc = "context ";
3171             break;
3172         case PTR_TO_STACK:
3173             pointer_desc = "stack ";
3174             /* The stack spill tracking logic in check_stack_write_fixed_off()
3175              * and check_stack_read_fixed_off() relies on stack accesses being
3176              * aligned.
3177              */
3178             strict = true;
3179             break;
3180         case PTR_TO_SOCKET:
3181             pointer_desc = "sock ";
3182             break;
3183         case PTR_TO_SOCK_COMMON:
3184             pointer_desc = "sock_common ";
3185             break;
3186         case PTR_TO_TCP_SOCK:
3187             pointer_desc = "tcp_sock ";
3188             break;
3189         case PTR_TO_XDP_SOCK:
3190             pointer_desc = "xdp_sock ";
3191             break;
3192         default:
3193             break;
3194     }
3195     return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, strict);
3196 }
3197 
update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off)3198 static int update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off)
3199 {
3200     u16 stack = env->subprog_info[func->subprogno].stack_depth;
3201 
3202     if (stack >= -off) {
3203         return 0;
3204     }
3205 
3206     /* update known max for given subprogram */
3207     env->subprog_info[func->subprogno].stack_depth = -off;
3208     return 0;
3209 }
3210 
3211 /* starting from main bpf function walk all instructions of the function
3212  * and recursively walk all callees that given function can call.
3213  * Ignore jump and exit insns.
3214  * Since recursion is prevented by check_cfg() this algorithm
3215  * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3216  */
check_max_stack_depth(struct bpf_verifier_env *env)3217 static int check_max_stack_depth(struct bpf_verifier_env *env)
3218 {
3219     int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3220     struct bpf_subprog_info *subprog = env->subprog_info;
3221     struct bpf_insn *insn = env->prog->insnsi;
3222     bool tail_call_reachable = false;
3223     int ret_insn[MAX_CALL_FRAMES];
3224     int ret_prog[MAX_CALL_FRAMES];
3225     int j;
3226     int process_flag = 0;
3227     int continue_flag = 0;
3228 
3229     while (1) {
3230         if (process_flag == 0 && continue_flag == 0) {
3231             /* protect against potential stack overflow that might happen when
3232              * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3233              * depth for such case down to 256 so that the worst case scenario
3234              * would result in 8k stack size (32 which is tailcall limit * 256 =
3235              * 8k).
3236              *
3237              * To get the idea what might happen, see an example:
3238              * func1 -> sub rsp, 128
3239              *  subfunc1 -> sub rsp, 256
3240              *  tailcall1 -> add rsp, 256
3241              *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3242              *   subfunc2 -> sub rsp, 64
3243              *   subfunc22 -> sub rsp, 128
3244              *   tailcall2 -> add rsp, 128
3245              *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3246              *
3247              * tailcall will unwind the current stack frame but it will not get rid
3248              * of caller's stack as shown on the example above.
3249              */
3250             if (idx && subprog[idx].has_tail_call && depth >= VERIFIER_TWOHUNDREDFIFTYSIX) {
3251                 verbose(env, "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3252                         depth);
3253                 return -EACCES;
3254             }
3255             /* round up to 32-bytes, since this is granularity
3256              * of interpreter stack size
3257              */
3258             depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), VERIFIER_THIRTYTWO);
3259             if (depth > MAX_BPF_STACK) {
3260                 verbose(env, "combined stack size of %d calls is %d. Too large\n", frame + 1, depth);
3261                 return -EACCES;
3262             }
3263         }
3264         while (1) {
3265             continue_flag = 0;
3266             subprog_end = subprog[idx + 1].start;
3267             for (; i < subprog_end; i++) {
3268                 if (insn[i].code != (BPF_JMP | BPF_CALL)) {
3269                     continue;
3270                 }
3271                 if (insn[i].src_reg != BPF_PSEUDO_CALL) {
3272                     continue;
3273                 }
3274                 /* remember insn and function to return to */
3275                 ret_insn[frame] = i + 1;
3276                 ret_prog[frame] = idx;
3277 
3278                 /* find the callee */
3279                 i = i + insn[i].imm + 1;
3280                 idx = find_subprog(env, i);
3281                 if (idx < 0) {
3282                     WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", i);
3283                     return -EFAULT;
3284                 }
3285 
3286                 if (subprog[idx].has_tail_call) {
3287                     tail_call_reachable = true;
3288                 }
3289 
3290                 frame++;
3291                 if (frame >= MAX_CALL_FRAMES) {
3292                     verbose(env, "the call stack of %d frames is too deep !\n", frame);
3293                     return -E2BIG;
3294                 }
3295                 process_flag = 1;
3296                 break;
3297             }
3298             if (process_flag == 1) {
3299                 break;
3300             }
3301         }
3302         if (process_flag == 1) {
3303             process_flag = 0;
3304             continue;
3305         }
3306         /* if tail call got detected across bpf2bpf calls then mark each of the
3307          * currently present subprog frames as tail call reachable subprogs;
3308          * this info will be utilized by JIT so that we will be preserving the
3309          * tail call counter throughout bpf2bpf calls combined with tailcalls
3310          */
3311         if (tail_call_reachable) {
3312             for (j = 0; j < frame; j++) {
3313                 subprog[ret_prog[j]].tail_call_reachable = true;
3314             }
3315         }
3316         if (subprog[0].tail_call_reachable) {
3317             env->prog->aux->tail_call_reachable = true;
3318         }
3319 
3320         /* end of for() loop means the last insn of the 'subprog'
3321          * was reached. Doesn't matter whether it was JA or EXIT
3322          */
3323         if (frame == 0) {
3324             return 0;
3325         }
3326         depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), VERIFIER_THIRTYTWO);
3327         frame--;
3328         i = ret_insn[frame];
3329         idx = ret_prog[frame];
3330         continue_flag = 1;
3331         continue;
3332     }
3333 }
3334 
3335 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
get_callee_stack_depth(struct bpf_verifier_env *env, const struct bpf_insn *insn, int idx)3336 static int get_callee_stack_depth(struct bpf_verifier_env *env, const struct bpf_insn *insn, int idx)
3337 {
3338     int start = idx + insn->imm + 1, subprog;
3339 
3340     subprog = find_subprog(env, start);
3341     if (subprog < 0) {
3342         WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", start);
3343         return -EFAULT;
3344     }
3345     return env->subprog_info[subprog].stack_depth;
3346 }
3347 #endif
3348 
__check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, bool fixed_off_ok)3349 static int __check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno,
3350                                bool fixed_off_ok)
3351 {
3352     /* Access to this pointer-typed register or passing it to a helper
3353      * is only allowed in its original, unmodified form.
3354      */
3355 
3356     if (!fixed_off_ok && reg->off) {
3357         verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", reg_type_str(env, reg->type), regno,
3358                 reg->off);
3359         return -EACCES;
3360     }
3361 
3362     if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3363         char tn_buf[48];
3364 
3365         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3366         verbose(env, "variable %s access var_off=%s disallowed\n", reg_type_str(env, reg->type), tn_buf);
3367         return -EACCES;
3368     }
3369 
3370     return 0;
3371 }
3372 
check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno)3373 int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno)
3374 {
3375     return __check_ptr_off_reg(env, reg, regno, false);
3376 }
3377 
__check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg, int regno, int off, int size)3378 static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg,
3379                                  int regno, int off, int size)
3380 {
3381     if (off < 0) {
3382         verbose(env, "R%d invalid %s buffer access: off=%d, size=%d\n", regno, buf_info, off, size);
3383         return -EACCES;
3384     }
3385     if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3386         char tn_buf[48];
3387 
3388         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3389         verbose(env, "R%d invalid variable buffer offset: off=%d, var_off=%s\n", regno, off, tn_buf);
3390         return -EACCES;
3391     }
3392 
3393     return 0;
3394 }
3395 
check_tp_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off, int size)3396 static int check_tp_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off,
3397                                   int size)
3398 {
3399     int err;
3400 
3401     err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3402     if (err) {
3403         return err;
3404     }
3405 
3406     if (off + size > env->prog->aux->max_tp_access) {
3407         env->prog->aux->max_tp_access = off + size;
3408     }
3409 
3410     return 0;
3411 }
3412 
check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off, int size, bool zero_size_allowed, const char *buf_info, u32 *max_access)3413 static int check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off,
3414                                int size, bool zero_size_allowed, const char *buf_info, u32 *max_access)
3415 {
3416     int err;
3417 
3418     err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3419     if (err) {
3420         return err;
3421     }
3422 
3423     if (off + size > *max_access) {
3424         *max_access = off + size;
3425     }
3426 
3427     return 0;
3428 }
3429 
3430 /* BPF architecture zero extends alu32 ops into 64-bit registesr */
zext_32_to_64(struct bpf_reg_state *reg)3431 static void zext_32_to_64(struct bpf_reg_state *reg)
3432 {
3433     reg->var_off = tnum_subreg(reg->var_off);
3434     verifier_reg_assign_32_into_64(reg);
3435 }
3436 
3437 /* truncate register to smaller size (in bytes)
3438  * must be called with size < BPF_REG_SIZE
3439  */
coerce_reg_to_size(struct bpf_reg_state *reg, int size)3440 static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3441 {
3442     u64 mask;
3443 
3444     /* clear high bits in bit representation */
3445     reg->var_off = tnum_cast(reg->var_off, size);
3446 
3447     /* fix arithmetic bounds */
3448     mask = ((u64)1 << (size * VERIFIER_EIGHT)) - 1;
3449     if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3450         reg->umin_value &= mask;
3451         reg->umax_value &= mask;
3452     } else {
3453         reg->umin_value = 0;
3454         reg->umax_value = mask;
3455     }
3456     reg->smin_value = reg->umin_value;
3457     reg->smax_value = reg->umax_value;
3458 
3459     /* If size is smaller than 32bit register the 32bit register
3460      * values are also truncated so we push 64-bit bounds into
3461      * 32-bit bounds. Above were truncated < 32-bits already.
3462      */
3463     if (size >= VERIFIER_FOUR) {
3464         return;
3465     }
3466     __reg_combine_64_into_32(reg);
3467 }
3468 
bpf_map_is_rdonly(const struct bpf_map *map)3469 static bool bpf_map_is_rdonly(const struct bpf_map *map)
3470 {
3471     return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
3472 }
3473 
bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)3474 static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3475 {
3476     void *ptr;
3477     u64 addr;
3478     int err;
3479 
3480     err = map->ops->map_direct_value_addr(map, &addr, off);
3481     if (err) {
3482         return err;
3483     }
3484     ptr = (void *)(long)addr + off;
3485 
3486     switch (size) {
3487         case sizeof(u8):
3488             *val = (u64) * (u8 *)ptr;
3489             break;
3490         case sizeof(u16):
3491             *val = (u64) * (u16 *)ptr;
3492             break;
3493         case sizeof(u32):
3494             *val = (u64) * (u32 *)ptr;
3495             break;
3496         case sizeof(u64):
3497             *val = *(u64 *)ptr;
3498             break;
3499         default:
3500             return -EINVAL;
3501     }
3502     return 0;
3503 }
3504 
check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, enum bpf_access_type atype, int value_regno)3505 static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off,
3506                                    int size, enum bpf_access_type atype, int value_regno)
3507 {
3508     struct bpf_reg_state *reg = regs + regno;
3509     const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3510     const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3511     u32 btf_id;
3512     int ret;
3513 
3514     if (off < 0) {
3515         verbose(env, "R%d is ptr_%s invalid negative access: off=%d\n", regno, tname, off);
3516         return -EACCES;
3517     }
3518     if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3519         char tn_buf[48];
3520 
3521         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3522         verbose(env, "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", regno, tname, off, tn_buf);
3523         return -EACCES;
3524     }
3525 
3526     if (env->ops->btf_struct_access) {
3527         ret = env->ops->btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3528     } else {
3529         if (atype != BPF_READ) {
3530             verbose(env, "only read is supported\n");
3531             return -EACCES;
3532         }
3533 
3534         ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3535     }
3536 
3537     if (ret < 0) {
3538         return ret;
3539     }
3540 
3541     if (atype == BPF_READ && value_regno >= 0) {
3542         mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3543     }
3544 
3545     return 0;
3546 }
3547 
check_ptr_to_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off, int size, enum bpf_access_type atype, int value_regno)3548 static int check_ptr_to_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off,
3549                                    int size, enum bpf_access_type atype, int value_regno)
3550 {
3551     struct bpf_reg_state *reg = regs + regno;
3552     struct bpf_map *map = reg->map_ptr;
3553     const struct btf_type *t;
3554     const char *tname;
3555     u32 btf_id;
3556     int ret;
3557 
3558     if (!btf_vmlinux) {
3559         verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3560         return -ENOTSUPP;
3561     }
3562 
3563     if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3564         verbose(env, "map_ptr access not supported for map type %d\n", map->map_type);
3565         return -ENOTSUPP;
3566     }
3567 
3568     t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3569     tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3570 
3571     if (!env->allow_ptr_to_map_access) {
3572         verbose(env, "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", tname);
3573         return -EPERM;
3574     }
3575 
3576     if (off < 0) {
3577         verbose(env, "R%d is %s invalid negative access: off=%d\n", regno, tname, off);
3578         return -EACCES;
3579     }
3580 
3581     if (atype != BPF_READ) {
3582         verbose(env, "only read from %s is supported\n", tname);
3583         return -EACCES;
3584     }
3585 
3586     ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3587     if (ret < 0) {
3588         return ret;
3589     }
3590 
3591     if (value_regno >= 0) {
3592         mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3593     }
3594 
3595     return 0;
3596 }
3597 
3598 /* Check that the stack access at the given offset is within bounds. The
3599  * maximum valid offset is -1.
3600  *
3601  * The minimum valid offset is -MAX_BPF_STACK for writes, and
3602  * -state->allocated_stack for reads.
3603  */
check_stack_slot_within_bounds(int off, struct bpf_func_state *state, enum bpf_access_type t)3604 static int check_stack_slot_within_bounds(int off, struct bpf_func_state *state, enum bpf_access_type t)
3605 {
3606     int min_valid_off;
3607 
3608     if (t == BPF_WRITE) {
3609         min_valid_off = -MAX_BPF_STACK;
3610     } else {
3611         min_valid_off = -state->allocated_stack;
3612     }
3613 
3614     if (off < min_valid_off || off > -1) {
3615         return -EACCES;
3616     }
3617     return 0;
3618 }
3619 
3620 /* Check that the stack access at 'regno + off' falls within the maximum stack
3621  * bounds.
3622  *
3623  * 'off' includes `regno->offset`, but not its dynamic part (if any).
3624  */
check_stack_access_within_bounds(struct bpf_verifier_env *env, int regno, int off, int access_size, enum stack_access_src src, enum bpf_access_type type)3625 static int check_stack_access_within_bounds(struct bpf_verifier_env *env, int regno, int off, int access_size,
3626                                             enum stack_access_src src, enum bpf_access_type type)
3627 {
3628     struct bpf_reg_state *regs = cur_regs(env);
3629     struct bpf_reg_state *reg = regs + regno;
3630     struct bpf_func_state *state = func(env, reg);
3631     int min_off, max_off;
3632     int err;
3633     char *err_extra;
3634 
3635     if (src == ACCESS_HELPER) {
3636         /* We don't know if helpers are reading or writing (or both). */
3637         err_extra = " indirect access to";
3638     } else if (type == BPF_READ) {
3639         err_extra = " read from";
3640     } else {
3641         err_extra = " write to";
3642     }
3643 
3644     if (tnum_is_const(reg->var_off)) {
3645         min_off = reg->var_off.value + off;
3646         if (access_size > 0) {
3647             max_off = min_off + access_size - 1;
3648         } else {
3649             max_off = min_off;
3650         }
3651     } else {
3652         if (reg->smax_value >= BPF_MAX_VAR_OFF || reg->smin_value <= -BPF_MAX_VAR_OFF) {
3653             verbose(env, "invalid unbounded variable-offset%s stack R%d\n", err_extra, regno);
3654             return -EACCES;
3655         }
3656         min_off = reg->smin_value + off;
3657         if (access_size > 0) {
3658             max_off = reg->smax_value + off + access_size - 1;
3659         } else {
3660             max_off = min_off;
3661         }
3662     }
3663 
3664     err = check_stack_slot_within_bounds(min_off, state, type);
3665     if (!err) {
3666         err = check_stack_slot_within_bounds(max_off, state, type);
3667     }
3668 
3669     if (err) {
3670         if (tnum_is_const(reg->var_off)) {
3671             verbose(env, "invalid%s stack R%d off=%d size=%d\n", err_extra, regno, off, access_size);
3672         } else {
3673             char tn_buf[48];
3674 
3675             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3676             verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", err_extra, regno, tn_buf,
3677                     access_size);
3678         }
3679     }
3680     return err;
3681 }
3682 
3683 /* check whether memory at (regno + off) is accessible for t = (read | write)
3684  * if t==write, value_regno is a register which value is stored into memory
3685  * if t==read, value_regno is a register which will receive the value from memory
3686  * if t==write && value_regno==-1, some unknown value is stored into memory
3687  * if t==read && value_regno==-1, don't care what we read from memory
3688  */
check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size, enum bpf_access_type t, int value_regno, bool strict_alignment_once)3689 static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size,
3690                             enum bpf_access_type t, int value_regno, bool strict_alignment_once)
3691 {
3692     struct bpf_reg_state *regs = cur_regs(env);
3693     struct bpf_reg_state *reg = regs + regno;
3694     struct bpf_func_state *state;
3695     int size, err = 0;
3696 
3697     size = bpf_size_to_bytes(bpf_size);
3698     if (size < 0) {
3699         return size;
3700     }
3701 
3702     /* alignment checks will add in reg->off themselves */
3703     err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
3704     if (err) {
3705         return err;
3706     }
3707 
3708     /* for access checks, reg->off is just part of off */
3709     off += reg->off;
3710 
3711     if (reg->type == PTR_TO_MAP_VALUE) {
3712         if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3713             verbose(env, "R%d leaks addr into map\n", value_regno);
3714             return -EACCES;
3715         }
3716         err = check_map_access_type(env, regno, off, size, t);
3717         if (err) {
3718             return err;
3719         }
3720         err = check_map_access(env, regno, off, size, false);
3721         if (!err && t == BPF_READ && value_regno >= 0) {
3722             struct bpf_map *map = reg->map_ptr;
3723 
3724             /* if map is read-only, track its contents as scalars */
3725             if (tnum_is_const(reg->var_off) && bpf_map_is_rdonly(map) && map->ops->map_direct_value_addr) {
3726                 int map_off = off + reg->var_off.value;
3727                 u64 val = 0;
3728 
3729                 err = bpf_map_direct_read(map, map_off, size, &val);
3730                 if (err) {
3731                     return err;
3732                 }
3733 
3734                 regs[value_regno].type = SCALAR_VALUE;
3735                 verifier_mark_reg_known(&regs[value_regno], val);
3736             } else {
3737                 mark_reg_unknown(env, regs, value_regno);
3738             }
3739         }
3740     } else if (base_type(reg->type) == PTR_TO_MEM) {
3741         bool rdonly_mem = type_is_rdonly_mem(reg->type);
3742 
3743         if (type_may_be_null(reg->type)) {
3744             verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type));
3745             return -EACCES;
3746         }
3747 
3748         if (t == BPF_WRITE && rdonly_mem) {
3749             verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3750             return -EACCES;
3751         }
3752 
3753         if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3754             verbose(env, "R%d leaks addr into mem\n", value_regno);
3755             return -EACCES;
3756         }
3757 
3758         err = check_mem_region_access(env, regno, off, size, reg->mem_size, false);
3759         if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) {
3760             mark_reg_unknown(env, regs, value_regno);
3761         }
3762     } else if (reg->type == PTR_TO_CTX) {
3763         enum bpf_reg_type reg_type = SCALAR_VALUE;
3764         u32 btf_id = 0;
3765 
3766         if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3767             verbose(env, "R%d leaks addr into ctx\n", value_regno);
3768             return -EACCES;
3769         }
3770 
3771         err = check_ptr_off_reg(env, reg, regno);
3772         if (err < 0) {
3773             return err;
3774         }
3775 
3776         err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3777         if (err) {
3778             verbose_linfo(env, insn_idx, "; ");
3779         }
3780         if (!err && t == BPF_READ && value_regno >= 0) {
3781             /* ctx access returns either a scalar, or a
3782              * PTR_TO_PACKET[_META,_END]. In the latter
3783              * case, we know the offset is zero.
3784              */
3785             if (reg_type == SCALAR_VALUE) {
3786                 mark_reg_unknown(env, regs, value_regno);
3787             } else {
3788                 mark_reg_known_zero(env, regs, value_regno);
3789                 if (type_may_be_null(reg_type)) {
3790                     regs[value_regno].id = ++env->id_gen;
3791                 }
3792                 /* A load of ctx field could have different
3793                  * actual load size with the one encoded in the
3794                  * insn. When the dst is PTR, it is for sure not
3795                  * a sub-register.
3796                  */
3797                 regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3798                 if (base_type(reg_type) == PTR_TO_BTF_ID) {
3799                     regs[value_regno].btf_id = btf_id;
3800                 }
3801             }
3802             regs[value_regno].type = reg_type;
3803         }
3804     } else if (reg->type == PTR_TO_STACK) {
3805         /* Basic bounds checks. */
3806         err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
3807         if (err) {
3808             return err;
3809         }
3810         state = func(env, reg);
3811         err = update_stack_depth(env, state, off);
3812         if (err) {
3813             return err;
3814         }
3815 
3816         if (t == BPF_READ) {
3817             err = check_stack_read(env, regno, off, size, value_regno);
3818         } else {
3819             err = check_stack_write(env, regno, off, size, value_regno, insn_idx);
3820         }
3821     } else if (reg_is_pkt_pointer(reg)) {
3822         if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3823             verbose(env, "cannot write into packet\n");
3824             return -EACCES;
3825         }
3826         if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3827             verbose(env, "R%d leaks addr into packet\n", value_regno);
3828             return -EACCES;
3829         }
3830         err = check_packet_access(env, regno, off, size, false);
3831         if (!err && t == BPF_READ && value_regno >= 0) {
3832             mark_reg_unknown(env, regs, value_regno);
3833         }
3834     } else if (reg->type == PTR_TO_FLOW_KEYS) {
3835         if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3836             verbose(env, "R%d leaks addr into flow keys\n", value_regno);
3837             return -EACCES;
3838         }
3839 
3840         err = check_flow_keys_access(env, off, size);
3841         if (!err && t == BPF_READ && value_regno >= 0) {
3842             mark_reg_unknown(env, regs, value_regno);
3843         }
3844     } else if (type_is_sk_pointer(reg->type)) {
3845         if (t == BPF_WRITE) {
3846             verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3847             return -EACCES;
3848         }
3849         err = check_sock_access(env, insn_idx, regno, off, size, t);
3850         if (!err && value_regno >= 0) {
3851             mark_reg_unknown(env, regs, value_regno);
3852         }
3853     } else if (reg->type == PTR_TO_TP_BUFFER) {
3854         err = check_tp_buffer_access(env, reg, regno, off, size);
3855         if (!err && t == BPF_READ && value_regno >= 0) {
3856             mark_reg_unknown(env, regs, value_regno);
3857         }
3858     } else if (reg->type == PTR_TO_BTF_ID) {
3859         err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno);
3860     } else if (reg->type == CONST_PTR_TO_MAP) {
3861         err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno);
3862     } else if (base_type(reg->type) == PTR_TO_BUF) {
3863         bool rdonly_mem = type_is_rdonly_mem(reg->type);
3864         const char *buf_info;
3865         u32 *max_access;
3866 
3867         if (rdonly_mem) {
3868             if (t == BPF_WRITE) {
3869                 verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3870                 return -EACCES;
3871             }
3872             buf_info = "rdonly";
3873             max_access = &env->prog->aux->max_rdonly_access;
3874         } else {
3875             buf_info = "rdwr";
3876             max_access = &env->prog->aux->max_rdwr_access;
3877         }
3878 
3879         err = check_buffer_access(env, reg, regno, off, size, false, buf_info, max_access);
3880         if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) {
3881             mark_reg_unknown(env, regs, value_regno);
3882         }
3883     } else {
3884         verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type));
3885         return -EACCES;
3886     }
3887 
3888     if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && regs[value_regno].type == SCALAR_VALUE) {
3889         /* b/h/w load zero-extends, mark upper bits as known 0 */
3890         coerce_reg_to_size(&regs[value_regno], size);
3891     }
3892     return err;
3893 }
3894 
check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)3895 static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3896 {
3897     int err;
3898 
3899     if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || insn->imm != 0) {
3900         verbose(env, "BPF_XADD uses reserved fields\n");
3901         return -EINVAL;
3902     }
3903 
3904     /* check src1 operand */
3905     err = check_reg_arg(env, insn->src_reg, SRC_OP);
3906     if (err) {
3907         return err;
3908     }
3909 
3910     /* check src2 operand */
3911     err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3912     if (err) {
3913         return err;
3914     }
3915 
3916     if (is_pointer_value(env, insn->src_reg)) {
3917         verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3918         return -EACCES;
3919     }
3920 
3921     if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) ||
3922         is_sk_reg(env, insn->dst_reg)) {
3923         verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", insn->dst_reg,
3924                 reg_type_str(env, reg_state(env, insn->dst_reg)->type));
3925         return -EACCES;
3926     }
3927 
3928     /* check whether atomic_add can read the memory */
3929     err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1, true);
3930     if (err) {
3931         return err;
3932     }
3933 
3934     /* check whether atomic_add can write into the same memory */
3935     return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3936 }
3937 
3938 /* When register 'regno' is used to read the stack (either directly or through
3939  * a helper function) make sure that it's within stack boundary and, depending
3940  * on the access type, that all elements of the stack are initialized.
3941  *
3942  * 'off' includes 'regno->off', but not its dynamic part (if any).
3943  *
3944  * All registers that have been spilled on the stack in the slots within the
3945  * read offsets are marked as read.
3946  */
check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size, bool zero_size_allowed, enum stack_access_src type, struct bpf_call_arg_meta *meta)3947 static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size,
3948                                          bool zero_size_allowed, enum stack_access_src type,
3949                                          struct bpf_call_arg_meta *meta)
3950 {
3951     struct bpf_reg_state *reg = reg_state(env, regno);
3952     struct bpf_func_state *state = func(env, reg);
3953     int err, min_off, max_off, i, j, slot, spi;
3954     char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
3955     enum bpf_access_type bounds_check_type;
3956     /* Some accesses can write anything into the stack, others are
3957      * read-only.
3958      */
3959     bool clobber = false;
3960 
3961     if (access_size == 0 && !zero_size_allowed) {
3962         verbose(env, "invalid zero-sized read\n");
3963         return -EACCES;
3964     }
3965 
3966     if (type == ACCESS_HELPER) {
3967         /* The bounds checks for writes are more permissive than for
3968          * reads. However, if raw_mode is not set, we'll do extra
3969          * checks below.
3970          */
3971         bounds_check_type = BPF_WRITE;
3972         clobber = true;
3973     } else {
3974         bounds_check_type = BPF_READ;
3975     }
3976     err = check_stack_access_within_bounds(env, regno, off, access_size, type, bounds_check_type);
3977     if (err) {
3978         return err;
3979     }
3980 
3981     if (tnum_is_const(reg->var_off)) {
3982         min_off = max_off = reg->var_off.value + off;
3983     } else {
3984         /* Variable offset is prohibited for unprivileged mode for
3985          * simplicity since it requires corresponding support in
3986          * Spectre masking for stack ALU.
3987          * See also retrieve_ptr_limit().
3988          */
3989         if (!env->bypass_spec_v1) {
3990             char tn_buf[48];
3991 
3992             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3993             verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", regno, err_extra,
3994                     tn_buf);
3995             return -EACCES;
3996         }
3997         /* Only initialized buffer on stack is allowed to be accessed
3998          * with variable offset. With uninitialized buffer it's hard to
3999          * guarantee that whole memory is marked as initialized on
4000          * helper return since specific bounds are unknown what may
4001          * cause uninitialized stack leaking.
4002          */
4003         if (meta && meta->raw_mode) {
4004             meta = NULL;
4005         }
4006 
4007         min_off = reg->smin_value + off;
4008         max_off = reg->smax_value + off;
4009     }
4010 
4011     if (meta && meta->raw_mode) {
4012         meta->access_size = access_size;
4013         meta->regno = regno;
4014         return 0;
4015     }
4016 
4017     for (i = min_off; i < max_off + access_size; i++) {
4018         u8 *stype;
4019 
4020         slot = -i - 1;
4021         spi = slot / BPF_REG_SIZE;
4022         if (state->allocated_stack <= slot) {
4023             goto err;
4024         }
4025         stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4026         if (*stype == STACK_MISC) {
4027             goto mark;
4028         }
4029         if (*stype == STACK_ZERO) {
4030             if (clobber) {
4031                 /* helper can write anything into the stack */
4032                 *stype = STACK_MISC;
4033             }
4034             goto mark;
4035         }
4036 
4037         if (state->stack[spi].slot_type[0] == STACK_SPILL && state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) {
4038             goto mark;
4039         }
4040 
4041         if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4042             (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || env->allow_ptr_leaks)) {
4043             if (clobber) {
4044                 __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4045                 for (j = 0; j < BPF_REG_SIZE; j++) {
4046                     state->stack[spi].slot_type[j] = STACK_MISC;
4047                 }
4048             }
4049             goto mark;
4050         }
4051 
4052     err:
4053         if (tnum_is_const(reg->var_off)) {
4054             verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", err_extra, regno, min_off, i - min_off,
4055                     access_size);
4056         } else {
4057             char tn_buf[48];
4058 
4059             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4060             verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", err_extra, regno, tn_buf, i - min_off,
4061                     access_size);
4062         }
4063         return -EACCES;
4064     mark:
4065         /* reading any byte out of 8-byte 'spill_slot' will cause
4066          * the whole slot to be marked as 'read'
4067          */
4068         mark_reg_read(env, &state->stack[spi].spilled_ptr, state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
4069     }
4070     return update_stack_depth(env, state, min_off);
4071 }
4072 
check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta)4073 static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed,
4074                                    struct bpf_call_arg_meta *meta)
4075 {
4076     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4077     const char *buf_info;
4078     u32 *max_access;
4079 
4080     switch (base_type(reg->type)) {
4081         case PTR_TO_PACKET:
4082         case PTR_TO_PACKET_META:
4083             return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed);
4084         case PTR_TO_MAP_VALUE:
4085             if (check_map_access_type(env, regno, reg->off, access_size,
4086                                       meta && meta->raw_mode ? BPF_WRITE : BPF_READ)) {
4087                 return -EACCES;
4088             }
4089             return check_map_access(env, regno, reg->off, access_size, zero_size_allowed);
4090         case PTR_TO_MEM:
4091             return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed);
4092         case PTR_TO_BUF:
4093             if (type_is_rdonly_mem(reg->type)) {
4094                 if (meta && meta->raw_mode) {
4095                     return -EACCES;
4096                 }
4097 
4098                 buf_info = "rdonly";
4099                 max_access = &env->prog->aux->max_rdonly_access;
4100             } else {
4101                 buf_info = "rdwr";
4102                 max_access = &env->prog->aux->max_rdwr_access;
4103             }
4104             return check_buffer_access(env, reg, regno, reg->off, access_size, zero_size_allowed, buf_info, max_access);
4105         case PTR_TO_STACK:
4106             return check_stack_range_initialized(env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER,
4107                                                  meta);
4108         default: /* scalar_value or invalid ptr */
4109             /* Allow zero-byte read from NULL, regardless of pointer type */
4110             if (zero_size_allowed && access_size == 0 && register_is_null(reg)) {
4111                 return 0;
4112             }
4113 
4114             verbose(env, "R%d type=%s ", regno, reg_type_str(env, reg->type));
4115             verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
4116             return -EACCES;
4117     }
4118 }
4119 
4120 /* Implementation details:
4121  * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4122  * Two bpf_map_lookups (even with the same key) will have different reg->id.
4123  * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4124  * value_or_null->value transition, since the verifier only cares about
4125  * the range of access to valid map value pointer and doesn't care about actual
4126  * address of the map element.
4127  * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4128  * reg->id > 0 after value_or_null->value transition. By doing so
4129  * two bpf_map_lookups will be considered two different pointers that
4130  * point to different bpf_spin_locks.
4131  * The verifier allows taking only one bpf_spin_lock at a time to avoid
4132  * dead-locks.
4133  * Since only one bpf_spin_lock is allowed the checks are simpler than
4134  * reg_is_refcounted() logic. The verifier needs to remember only
4135  * one spin_lock instead of array of acquired_refs.
4136  * cur_state->active_spin_lock remembers which map value element got locked
4137  * and clears it after bpf_spin_unlock.
4138  */
process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock)4139 static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock)
4140 {
4141     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4142     struct bpf_verifier_state *cur = env->cur_state;
4143     bool is_const = tnum_is_const(reg->var_off);
4144     struct bpf_map *map = reg->map_ptr;
4145     u64 val = reg->var_off.value;
4146 
4147     if (!is_const) {
4148         verbose(env, "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", regno);
4149         return -EINVAL;
4150     }
4151     if (!map->btf) {
4152         verbose(env, "map '%s' has to have BTF in order to use bpf_spin_lock\n", map->name);
4153         return -EINVAL;
4154     }
4155     if (!map_value_has_spin_lock(map)) {
4156         if (map->spin_lock_off == -E2BIG) {
4157             verbose(env, "map '%s' has more than one 'struct bpf_spin_lock'\n", map->name);
4158         } else if (map->spin_lock_off == -ENOENT) {
4159             verbose(env, "map '%s' doesn't have 'struct bpf_spin_lock'\n", map->name);
4160         } else {
4161             verbose(env, "map '%s' is not a struct type or bpf_spin_lock is mangled\n", map->name);
4162         }
4163         return -EINVAL;
4164     }
4165     if (map->spin_lock_off != val + reg->off) {
4166         verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", val + reg->off);
4167         return -EINVAL;
4168     }
4169     if (is_lock) {
4170         if (cur->active_spin_lock) {
4171             verbose(env, "Locking two bpf_spin_locks are not allowed\n");
4172             return -EINVAL;
4173         }
4174         cur->active_spin_lock = reg->id;
4175     } else {
4176         if (!cur->active_spin_lock) {
4177             verbose(env, "bpf_spin_unlock without taking a lock\n");
4178             return -EINVAL;
4179         }
4180         if (cur->active_spin_lock != reg->id) {
4181             verbose(env, "bpf_spin_unlock of different lock\n");
4182             return -EINVAL;
4183         }
4184         cur->active_spin_lock = 0;
4185     }
4186     return 0;
4187 }
4188 
arg_type_is_mem_ptr(enum bpf_arg_type type)4189 static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4190 {
4191     return base_type(type) == ARG_PTR_TO_MEM || base_type(type) == ARG_PTR_TO_UNINIT_MEM;
4192 }
4193 
arg_type_is_mem_size(enum bpf_arg_type type)4194 static bool arg_type_is_mem_size(enum bpf_arg_type type)
4195 {
4196     return type == ARG_CONST_SIZE || type == ARG_CONST_SIZE_OR_ZERO;
4197 }
4198 
arg_type_is_alloc_size(enum bpf_arg_type type)4199 static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4200 {
4201     return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4202 }
4203 
arg_type_is_int_ptr(enum bpf_arg_type type)4204 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4205 {
4206     return type == ARG_PTR_TO_INT || type == ARG_PTR_TO_LONG;
4207 }
4208 
int_ptr_type_to_size(enum bpf_arg_type type)4209 static int int_ptr_type_to_size(enum bpf_arg_type type)
4210 {
4211     if (type == ARG_PTR_TO_INT) {
4212         return sizeof(u32);
4213     } else if (type == ARG_PTR_TO_LONG) {
4214         return sizeof(u64);
4215     }
4216 
4217     return -EINVAL;
4218 }
4219 
resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_arg_type *arg_type)4220 static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta,
4221                                 enum bpf_arg_type *arg_type)
4222 {
4223     if (!meta->map_ptr) {
4224         /* kernel subsystem misconfigured verifier */
4225         verbose(env, "invalid map_ptr to access map->type\n");
4226         return -EACCES;
4227     }
4228 
4229     switch (meta->map_ptr->map_type) {
4230         case BPF_MAP_TYPE_SOCKMAP:
4231         case BPF_MAP_TYPE_SOCKHASH:
4232             if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4233                 *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4234             } else {
4235                 verbose(env, "invalid arg_type for sockmap/sockhash\n");
4236                 return -EINVAL;
4237             }
4238             break;
4239 
4240         default:
4241             break;
4242     }
4243     return 0;
4244 }
4245 
4246 struct bpf_reg_types {
4247     const enum bpf_reg_type types[10];
4248     u32 *btf_id;
4249 };
4250 
4251 static const struct bpf_reg_types map_key_value_types = {
4252     .types =
4253         {
4254             PTR_TO_STACK,
4255             PTR_TO_PACKET,
4256             PTR_TO_PACKET_META,
4257             PTR_TO_MAP_VALUE,
4258         },
4259 };
4260 
4261 static const struct bpf_reg_types sock_types = {
4262     .types =
4263         {
4264             PTR_TO_SOCK_COMMON,
4265             PTR_TO_SOCKET,
4266             PTR_TO_TCP_SOCK,
4267             PTR_TO_XDP_SOCK,
4268         },
4269 };
4270 
4271 #ifdef CONFIG_NET
4272 static const struct bpf_reg_types btf_id_sock_common_types = {
4273     .types =
4274         {
4275             PTR_TO_SOCK_COMMON,
4276             PTR_TO_SOCKET,
4277             PTR_TO_TCP_SOCK,
4278             PTR_TO_XDP_SOCK,
4279             PTR_TO_BTF_ID,
4280         },
4281     .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4282 };
4283 #endif
4284 
4285 static const struct bpf_reg_types mem_types = {
4286     .types =
4287         {
4288             PTR_TO_STACK,
4289             PTR_TO_PACKET,
4290             PTR_TO_PACKET_META,
4291             PTR_TO_MAP_VALUE,
4292             PTR_TO_MEM,
4293             PTR_TO_MEM | MEM_ALLOC,
4294             PTR_TO_BUF,
4295         },
4296 };
4297 
4298 static const struct bpf_reg_types int_ptr_types = {
4299     .types =
4300         {
4301             PTR_TO_STACK,
4302             PTR_TO_PACKET,
4303             PTR_TO_PACKET_META,
4304             PTR_TO_MAP_VALUE,
4305         },
4306 };
4307 
4308 static const struct bpf_reg_types fullsock_types = {.types = {PTR_TO_SOCKET}};
4309 static const struct bpf_reg_types scalar_types = {.types = {SCALAR_VALUE}};
4310 static const struct bpf_reg_types context_types = {.types = {PTR_TO_CTX}};
4311 static const struct bpf_reg_types alloc_mem_types = {.types = {PTR_TO_MEM | MEM_ALLOC}};
4312 static const struct bpf_reg_types const_map_ptr_types = {.types = {CONST_PTR_TO_MAP}};
4313 static const struct bpf_reg_types btf_ptr_types = {.types = {PTR_TO_BTF_ID}};
4314 static const struct bpf_reg_types spin_lock_types = {.types = {PTR_TO_MAP_VALUE}};
4315 static const struct bpf_reg_types percpu_btf_ptr_types = {.types = {PTR_TO_PERCPU_BTF_ID}};
4316 
4317 static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4318     [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
4319     [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
4320     [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
4321     [ARG_CONST_SIZE] = &scalar_types,
4322     [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
4323     [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
4324     [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
4325     [ARG_PTR_TO_CTX] = &context_types,
4326     [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
4327 #ifdef CONFIG_NET
4328     [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
4329 #endif
4330     [ARG_PTR_TO_SOCKET] = &fullsock_types,
4331     [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
4332     [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
4333     [ARG_PTR_TO_MEM] = &mem_types,
4334     [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
4335     [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
4336     [ARG_PTR_TO_INT] = &int_ptr_types,
4337     [ARG_PTR_TO_LONG] = &int_ptr_types,
4338     [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
4339 };
4340 
check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, const u32 *arg_btf_id)4341 static int check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, const u32 *arg_btf_id)
4342 {
4343     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4344     enum bpf_reg_type expected, type = reg->type;
4345     const struct bpf_reg_types *compatible;
4346     int i, j;
4347 
4348     compatible = compatible_reg_types[base_type(arg_type)];
4349     if (!compatible) {
4350         verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4351         return -EFAULT;
4352     }
4353 
4354     /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
4355      * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
4356      *
4357      * Same for MAYBE_NULL:
4358      *
4359      * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
4360      * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
4361      *
4362      * Therefore we fold these flags depending on the arg_type before comparison.
4363      */
4364     if (arg_type & MEM_RDONLY) {
4365         type &= ~MEM_RDONLY;
4366     }
4367     if (arg_type & PTR_MAYBE_NULL) {
4368         type &= ~PTR_MAYBE_NULL;
4369     }
4370 
4371     for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4372         expected = compatible->types[i];
4373         if (expected == NOT_INIT) {
4374             break;
4375         }
4376 
4377         if (type == expected) {
4378             goto found;
4379         }
4380     }
4381 
4382     verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
4383     for (j = 0; j + 1 < i; j++) {
4384         verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
4385     }
4386     verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
4387     return -EACCES;
4388 
4389 found:
4390     if (reg->type == PTR_TO_BTF_ID) {
4391         if (!arg_btf_id) {
4392             if (!compatible->btf_id) {
4393                 verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4394                 return -EFAULT;
4395             }
4396             arg_btf_id = compatible->btf_id;
4397         }
4398 
4399         if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *arg_btf_id)) {
4400             verbose(env, "R%d is of type %s but %s is expected\n", regno, kernel_type_name(reg->btf_id),
4401                     kernel_type_name(*arg_btf_id));
4402             return -EACCES;
4403         }
4404     }
4405 
4406     return 0;
4407 }
4408 
check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn)4409 static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta,
4410                           const struct bpf_func_proto *fn)
4411 {
4412     u32 regno = BPF_REG_1 + arg;
4413     struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4414     enum bpf_arg_type arg_type = fn->arg_type[arg];
4415     enum bpf_reg_type type = reg->type;
4416     int err = 0;
4417 
4418     if (arg_type == ARG_DONTCARE) {
4419         return 0;
4420     }
4421 
4422     err = check_reg_arg(env, regno, SRC_OP);
4423     if (err) {
4424         return err;
4425     }
4426 
4427     if (arg_type == ARG_ANYTHING) {
4428         if (is_pointer_value(env, regno)) {
4429             verbose(env, "R%d leaks addr into helper function\n", regno);
4430             return -EACCES;
4431         }
4432         return 0;
4433     }
4434 
4435     if (type_is_pkt_pointer(type) && !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4436         verbose(env, "helper access to the packet is not allowed\n");
4437         return -EACCES;
4438     }
4439 
4440     if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4441         err = resolve_map_arg_type(env, meta, &arg_type);
4442         if (err) {
4443             return err;
4444         }
4445     }
4446 
4447     if (register_is_null(reg) && type_may_be_null(arg_type)) {
4448         /* A NULL register has a SCALAR_VALUE type, so skip
4449          * type checking.
4450          */
4451         goto skip_type_check;
4452     }
4453 
4454     err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4455     if (err) {
4456         return err;
4457     }
4458 
4459     switch ((u32)type) {
4460         case SCALAR_VALUE:
4461         /* Pointer types where reg offset is explicitly allowed: */
4462         case PTR_TO_PACKET:
4463         case PTR_TO_PACKET_META:
4464         case PTR_TO_MAP_VALUE:
4465         case PTR_TO_MEM:
4466         case PTR_TO_MEM | MEM_RDONLY:
4467         case PTR_TO_MEM | MEM_ALLOC:
4468         case PTR_TO_BUF:
4469         case PTR_TO_BUF | MEM_RDONLY:
4470         case PTR_TO_STACK:
4471             /* Some of the argument types nevertheless require a
4472              * zero register offset.
4473              */
4474             if (arg_type == ARG_PTR_TO_ALLOC_MEM) {
4475                 goto force_off_check;
4476             }
4477             break;
4478         /* All the rest must be rejected: */
4479         default:
4480             force_off_check:
4481             err = __check_ptr_off_reg(env, reg, regno, type == PTR_TO_BTF_ID);
4482             if (err < 0) {
4483                 return err;
4484             }
4485             break;
4486     }
4487 
4488 skip_type_check:
4489     if (reg->ref_obj_id) {
4490         if (meta->ref_obj_id) {
4491             verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno,
4492                     reg->ref_obj_id, meta->ref_obj_id);
4493             return -EFAULT;
4494         }
4495         meta->ref_obj_id = reg->ref_obj_id;
4496     }
4497 
4498     if (arg_type == ARG_CONST_MAP_PTR) {
4499         /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4500         meta->map_ptr = reg->map_ptr;
4501     } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
4502         /* bpf_map_xxx(..., map_ptr, ..., key) call:
4503          * check that [key, key + map->key_size) are within
4504          * stack limits and initialized
4505          */
4506         if (!meta->map_ptr) {
4507             /* in function declaration map_ptr must come before
4508              * map_key, so that it's verified and known before
4509              * we have to check map_key here. Otherwise it means
4510              * that kernel subsystem misconfigured verifier
4511              */
4512             verbose(env, "invalid map_ptr to access map->key\n");
4513             return -EACCES;
4514         }
4515         err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL);
4516     } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4517         if (type_may_be_null(arg_type) && register_is_null(reg)) {
4518             return 0;
4519         }
4520 
4521         /* bpf_map_xxx(..., map_ptr, ..., value) call:
4522          * check [value, value + map->value_size) validity
4523          */
4524         if (!meta->map_ptr) {
4525             /* kernel subsystem misconfigured verifier */
4526             verbose(env, "invalid map_ptr to access map->value\n");
4527             return -EACCES;
4528         }
4529         meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4530         err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta);
4531     } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4532         if (!reg->btf_id) {
4533             verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4534             return -EACCES;
4535         }
4536         meta->ret_btf_id = reg->btf_id;
4537     } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4538         if (meta->func_id == BPF_FUNC_spin_lock) {
4539             if (process_spin_lock(env, regno, true)) {
4540                 return -EACCES;
4541             }
4542         } else if (meta->func_id == BPF_FUNC_spin_unlock) {
4543             if (process_spin_lock(env, regno, false)) {
4544                 return -EACCES;
4545             }
4546         } else {
4547             verbose(env, "verifier internal error\n");
4548             return -EFAULT;
4549         }
4550     } else if (arg_type_is_mem_ptr(arg_type)) {
4551         /* The access to this pointer is only checked when we hit the
4552          * next is_mem_size argument below.
4553          */
4554         meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4555     } else if (arg_type_is_mem_size(arg_type)) {
4556         bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4557 
4558         /* This is used to refine r0 return value bounds for helpers
4559          * that enforce this value as an upper bound on return values.
4560          * See do_refine_retval_range() for helpers that can refine
4561          * the return value. C type of helper is u32 so we pull register
4562          * bound from umax_value however, if negative verifier errors
4563          * out. Only upper bounds can be learned because retval is an
4564          * int type and negative retvals are allowed.
4565          */
4566         meta->msize_max_value = reg->umax_value;
4567 
4568         /* The register is SCALAR_VALUE; the access check
4569          * happens using its boundaries.
4570          */
4571         if (!tnum_is_const(reg->var_off)) {
4572             /* For unprivileged variable accesses, disable raw
4573              * mode so that the program is required to
4574              * initialize all the memory that the helper could
4575              * just partially fill up.
4576              */
4577             meta = NULL;
4578         }
4579 
4580         if (reg->smin_value < 0) {
4581             verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", regno);
4582             return -EACCES;
4583         }
4584 
4585         if (reg->umin_value == 0) {
4586             err = check_helper_mem_access(env, regno - 1, 0, zero_size_allowed, meta);
4587             if (err) {
4588                 return err;
4589             }
4590         }
4591 
4592         if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4593             verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", regno);
4594             return -EACCES;
4595         }
4596         err = check_helper_mem_access(env, regno - 1, reg->umax_value, zero_size_allowed, meta);
4597         if (!err) {
4598             err = mark_chain_precision(env, regno);
4599         }
4600     } else if (arg_type_is_alloc_size(arg_type)) {
4601         if (!tnum_is_const(reg->var_off)) {
4602             verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n", regno);
4603             return -EACCES;
4604         }
4605         meta->mem_size = reg->var_off.value;
4606     } else if (arg_type_is_int_ptr(arg_type)) {
4607         int size = int_ptr_type_to_size(arg_type);
4608 
4609         err = check_helper_mem_access(env, regno, size, false, meta);
4610         if (err) {
4611             return err;
4612         }
4613         err = check_ptr_alignment(env, reg, 0, size, true);
4614     }
4615 
4616     return err;
4617 }
4618 
may_update_sockmap(struct bpf_verifier_env *env, int func_id)4619 static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4620 {
4621     enum bpf_attach_type eatype = env->prog->expected_attach_type;
4622     enum bpf_prog_type type = resolve_prog_type(env->prog);
4623 
4624     if (func_id != BPF_FUNC_map_update_elem) {
4625         return false;
4626     }
4627 
4628     /* It's not possible to get access to a locked struct sock in these
4629      * contexts, so updating is safe.
4630      */
4631     switch (type) {
4632         case BPF_PROG_TYPE_TRACING:
4633             if (eatype == BPF_TRACE_ITER) {
4634                 return true;
4635             }
4636             break;
4637         case BPF_PROG_TYPE_SOCKET_FILTER:
4638         case BPF_PROG_TYPE_SCHED_CLS:
4639         case BPF_PROG_TYPE_SCHED_ACT:
4640         case BPF_PROG_TYPE_XDP:
4641         case BPF_PROG_TYPE_SK_REUSEPORT:
4642         case BPF_PROG_TYPE_FLOW_DISSECTOR:
4643         case BPF_PROG_TYPE_SK_LOOKUP:
4644             return true;
4645         default:
4646             break;
4647     }
4648 
4649     verbose(env, "cannot update sockmap in this context\n");
4650     return false;
4651 }
4652 
allow_tail_call_in_subprogs(struct bpf_verifier_env *env)4653 static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4654 {
4655     return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
4656 }
4657 
check_map_func_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, int func_id)4658 static int check_map_func_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, int func_id)
4659 {
4660     if (!map) {
4661         return 0;
4662     }
4663 
4664     /* We need a two way check, first is from map perspective ... */
4665     switch (map->map_type) {
4666         case BPF_MAP_TYPE_PROG_ARRAY:
4667             if (func_id != BPF_FUNC_tail_call) {
4668                 goto error;
4669             }
4670             break;
4671         case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4672             if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output &&
4673                 func_id != BPF_FUNC_skb_output && func_id != BPF_FUNC_perf_event_read_value &&
4674                 func_id != BPF_FUNC_xdp_output) {
4675                 goto error;
4676             }
4677             break;
4678         case BPF_MAP_TYPE_RINGBUF:
4679             if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve &&
4680                 func_id != BPF_FUNC_ringbuf_query) {
4681                 goto error;
4682             }
4683             break;
4684         case BPF_MAP_TYPE_STACK_TRACE:
4685             if (func_id != BPF_FUNC_get_stackid) {
4686                 goto error;
4687             }
4688             break;
4689         case BPF_MAP_TYPE_CGROUP_ARRAY:
4690             if (func_id != BPF_FUNC_skb_under_cgroup && func_id != BPF_FUNC_current_task_under_cgroup) {
4691                 goto error;
4692             }
4693             break;
4694         case BPF_MAP_TYPE_CGROUP_STORAGE:
4695         case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4696             if (func_id != BPF_FUNC_get_local_storage) {
4697                 goto error;
4698             }
4699             break;
4700         case BPF_MAP_TYPE_DEVMAP:
4701         case BPF_MAP_TYPE_DEVMAP_HASH:
4702             if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) {
4703                 goto error;
4704             }
4705             break;
4706         /* Restrict bpf side of cpumap and xskmap, open when use-cases
4707          * appear.
4708          */
4709         case BPF_MAP_TYPE_CPUMAP:
4710             if (func_id != BPF_FUNC_redirect_map) {
4711                 goto error;
4712             }
4713             break;
4714         case BPF_MAP_TYPE_XSKMAP:
4715             if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) {
4716                 goto error;
4717             }
4718             break;
4719         case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4720         case BPF_MAP_TYPE_HASH_OF_MAPS:
4721             if (func_id != BPF_FUNC_map_lookup_elem) {
4722                 goto error;
4723             }
4724             break;
4725         case BPF_MAP_TYPE_SOCKMAP:
4726             if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update &&
4727                 func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map &&
4728                 func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem &&
4729                 !may_update_sockmap(env, func_id)) {
4730                 goto error;
4731             }
4732             break;
4733         case BPF_MAP_TYPE_SOCKHASH:
4734             if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update &&
4735                 func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash &&
4736                 func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem &&
4737                 !may_update_sockmap(env, func_id)) {
4738                 goto error;
4739             }
4740             break;
4741         case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
4742             if (func_id != BPF_FUNC_sk_select_reuseport) {
4743                 goto error;
4744             }
4745             break;
4746         case BPF_MAP_TYPE_QUEUE:
4747         case BPF_MAP_TYPE_STACK:
4748             if (func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_map_pop_elem &&
4749                 func_id != BPF_FUNC_map_push_elem) {
4750                 goto error;
4751             }
4752             break;
4753         case BPF_MAP_TYPE_SK_STORAGE:
4754             if (func_id != BPF_FUNC_sk_storage_get && func_id != BPF_FUNC_sk_storage_delete) {
4755                 goto error;
4756             }
4757             break;
4758         case BPF_MAP_TYPE_INODE_STORAGE:
4759             if (func_id != BPF_FUNC_inode_storage_get && func_id != BPF_FUNC_inode_storage_delete) {
4760                 goto error;
4761             }
4762             break;
4763         default:
4764             break;
4765     }
4766 
4767     /* ... and second from the function itself. */
4768     switch (func_id) {
4769         case BPF_FUNC_tail_call:
4770             if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
4771                 goto error;
4772             }
4773             if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4774                 verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4775                 return -EINVAL;
4776             }
4777             break;
4778         case BPF_FUNC_perf_event_read:
4779         case BPF_FUNC_perf_event_output:
4780         case BPF_FUNC_perf_event_read_value:
4781         case BPF_FUNC_skb_output:
4782         case BPF_FUNC_xdp_output:
4783             if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
4784                 goto error;
4785             }
4786             break;
4787         case BPF_FUNC_ringbuf_output:
4788         case BPF_FUNC_ringbuf_reserve:
4789         case BPF_FUNC_ringbuf_query:
4790             if (map->map_type != BPF_MAP_TYPE_RINGBUF) {
4791                 goto error;
4792             }
4793             break;
4794         case BPF_FUNC_get_stackid:
4795             if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) {
4796                 goto error;
4797             }
4798             break;
4799         case BPF_FUNC_current_task_under_cgroup:
4800         case BPF_FUNC_skb_under_cgroup:
4801             if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) {
4802                 goto error;
4803             }
4804             break;
4805         case BPF_FUNC_redirect_map:
4806             if (map->map_type != BPF_MAP_TYPE_DEVMAP && map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4807                 map->map_type != BPF_MAP_TYPE_CPUMAP && map->map_type != BPF_MAP_TYPE_XSKMAP) {
4808                 goto error;
4809             }
4810             break;
4811         case BPF_FUNC_sk_redirect_map:
4812         case BPF_FUNC_msg_redirect_map:
4813         case BPF_FUNC_sock_map_update:
4814             if (map->map_type != BPF_MAP_TYPE_SOCKMAP) {
4815                 goto error;
4816             }
4817             break;
4818         case BPF_FUNC_sk_redirect_hash:
4819         case BPF_FUNC_msg_redirect_hash:
4820         case BPF_FUNC_sock_hash_update:
4821             if (map->map_type != BPF_MAP_TYPE_SOCKHASH) {
4822                 goto error;
4823             }
4824             break;
4825         case BPF_FUNC_get_local_storage:
4826             if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
4827                 goto error;
4828             }
4829             break;
4830         case BPF_FUNC_sk_select_reuseport:
4831             if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4832                 map->map_type != BPF_MAP_TYPE_SOCKHASH) {
4833                 goto error;
4834             }
4835             break;
4836         case BPF_FUNC_map_peek_elem:
4837         case BPF_FUNC_map_pop_elem:
4838         case BPF_FUNC_map_push_elem:
4839             if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) {
4840                 goto error;
4841             }
4842             break;
4843         case BPF_FUNC_sk_storage_get:
4844         case BPF_FUNC_sk_storage_delete:
4845             if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
4846                 goto error;
4847             }
4848             break;
4849         case BPF_FUNC_inode_storage_get:
4850         case BPF_FUNC_inode_storage_delete:
4851             if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) {
4852                 goto error;
4853             }
4854             break;
4855         default:
4856             break;
4857     }
4858 
4859     return 0;
4860 error:
4861     verbose(env, "cannot pass map_type %d into func %s#%d\n", map->map_type, func_id_name(func_id), func_id);
4862     return -EINVAL;
4863 }
4864 
check_raw_mode_ok(const struct bpf_func_proto *fn)4865 static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4866 {
4867     int count = 0;
4868 
4869     if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) {
4870         count++;
4871     }
4872     if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) {
4873         count++;
4874     }
4875     if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) {
4876         count++;
4877     }
4878     if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) {
4879         count++;
4880     }
4881     if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) {
4882         count++;
4883     }
4884 
4885     /* We only support one arg being in raw mode at the moment,
4886      * which is sufficient for the helper functions we have
4887      * right now.
4888      */
4889     return count <= 1;
4890 }
4891 
check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next)4892 static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next)
4893 {
4894     return (arg_type_is_mem_ptr(arg_curr) && !arg_type_is_mem_size(arg_next)) ||
4895            (!arg_type_is_mem_ptr(arg_curr) && arg_type_is_mem_size(arg_next));
4896 }
4897 
check_arg_pair_ok(const struct bpf_func_proto *fn)4898 static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4899 {
4900     /* bpf_xxx(..., buf, len) call will access 'len'
4901      * bytes from memory 'buf'. Both arg types need
4902      * to be paired, so make sure there's no buggy
4903      * helper function specification.
4904      */
4905     if (arg_type_is_mem_size(fn->arg1_type) || arg_type_is_mem_ptr(fn->arg5_type) ||
4906         check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4907         check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4908         check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4909         check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) {
4910         return false;
4911     }
4912 
4913     return true;
4914 }
4915 
check_refcount_ok(const struct bpf_func_proto *fn, int func_id)4916 static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4917 {
4918     int count = 0;
4919 
4920     if (arg_type_may_be_refcounted(fn->arg1_type)) {
4921         count++;
4922     }
4923     if (arg_type_may_be_refcounted(fn->arg2_type)) {
4924         count++;
4925     }
4926     if (arg_type_may_be_refcounted(fn->arg3_type)) {
4927         count++;
4928     }
4929     if (arg_type_may_be_refcounted(fn->arg4_type)) {
4930         count++;
4931     }
4932     if (arg_type_may_be_refcounted(fn->arg5_type)) {
4933         count++;
4934     }
4935 
4936     /* A reference acquiring function cannot acquire
4937      * another refcounted ptr.
4938      */
4939     if (may_be_acquire_function(func_id) && count) {
4940         return false;
4941     }
4942 
4943     /* We only support one arg being unreferenced at the moment,
4944      * which is sufficient for the helper functions we have right now.
4945      */
4946     return count <= 1;
4947 }
4948 
check_btf_id_ok(const struct bpf_func_proto *fn)4949 static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4950 {
4951     int i;
4952 
4953     for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4954         if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) {
4955             return false;
4956         }
4957 
4958         if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) {
4959             return false;
4960         }
4961     }
4962 
4963     return true;
4964 }
4965 
check_func_proto(const struct bpf_func_proto *fn, int func_id)4966 static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4967 {
4968     return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && check_btf_id_ok(fn) && check_refcount_ok(fn, func_id)
4969                ? 0
4970                : -EINVAL;
4971 }
4972 
4973 /* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4974  * are now invalid, so turn them into unknown SCALAR_VALUE.
4975  */
__clear_all_pkt_pointers(struct bpf_verifier_env *env, struct bpf_func_state *state)4976 static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, struct bpf_func_state *state)
4977 {
4978     struct bpf_reg_state *regs = state->regs, *reg;
4979     int i;
4980 
4981     for (i = 0; i < MAX_BPF_REG; i++) {
4982         if (reg_is_pkt_pointer_any(&regs[i])) {
4983             mark_reg_unknown(env, regs, i);
4984         }
4985     }
4986 
4987     bpf_for_each_spilled_reg(i, state, reg)
4988     {
4989         if (!reg) {
4990             continue;
4991         }
4992         if (reg_is_pkt_pointer_any(reg)) {
4993             __mark_reg_unknown(env, reg);
4994         }
4995     }
4996 }
4997 
clear_all_pkt_pointers(struct bpf_verifier_env *env)4998 static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
4999 {
5000     struct bpf_verifier_state *vstate = env->cur_state;
5001     int i;
5002 
5003     for (i = 0; i <= vstate->curframe; i++) {
5004         __clear_all_pkt_pointers(env, vstate->frame[i]);
5005     }
5006 }
5007 
release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id)5008 static void release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id)
5009 {
5010     struct bpf_reg_state *regs = state->regs, *reg;
5011     int i;
5012 
5013     for (i = 0; i < MAX_BPF_REG; i++) {
5014         if (regs[i].ref_obj_id == ref_obj_id) {
5015             mark_reg_unknown(env, regs, i);
5016         }
5017     }
5018 
5019     bpf_for_each_spilled_reg(i, state, reg)
5020     {
5021         if (!reg) {
5022             continue;
5023         }
5024         if (reg->ref_obj_id == ref_obj_id) {
5025             __mark_reg_unknown(env, reg);
5026         }
5027     }
5028 }
5029 
5030 /* The pointer with the specified id has released its reference to kernel
5031  * resources. Identify all copies of the same pointer and clear the reference.
5032  */
release_reference(struct bpf_verifier_env *env, int ref_obj_id)5033 static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
5034 {
5035     struct bpf_verifier_state *vstate = env->cur_state;
5036     int err;
5037     int i;
5038 
5039     err = release_reference_state(cur_func(env), ref_obj_id);
5040     if (err) {
5041         return err;
5042     }
5043 
5044     for (i = 0; i <= vstate->curframe; i++) {
5045         release_reg_references(env, vstate->frame[i], ref_obj_id);
5046     }
5047 
5048     return 0;
5049 }
5050 
clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs)5051 static void clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs)
5052 {
5053     int i;
5054 
5055     /* after the call registers r0 - r5 were scratched */
5056     for (i = 0; i < CALLER_SAVED_REGS; i++) {
5057         mark_reg_not_init(env, regs, caller_saved[i]);
5058         check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5059     }
5060 }
5061 
check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)5062 static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)
5063 {
5064     struct bpf_verifier_state *state = env->cur_state;
5065     struct bpf_func_info_aux *func_info_aux;
5066     struct bpf_func_state *caller, *callee;
5067     int i, err, subprog, target_insn;
5068     bool is_global = false;
5069 
5070     if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5071         verbose(env, "the call stack of %d frames is too deep\n", state->curframe + 2);
5072         return -E2BIG;
5073     }
5074 
5075     target_insn = *insn_idx + insn->imm;
5076     subprog = find_subprog(env, target_insn + 1);
5077     if (subprog < 0) {
5078         verbose(env, "verifier bug. No program starts at insn %d\n", target_insn + 1);
5079         return -EFAULT;
5080     }
5081 
5082     caller = state->frame[state->curframe];
5083     if (state->frame[state->curframe + 1]) {
5084         verbose(env, "verifier bug. Frame %d already allocated\n", state->curframe + 1);
5085         return -EFAULT;
5086     }
5087 
5088     func_info_aux = env->prog->aux->func_info_aux;
5089     if (func_info_aux) {
5090         is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5091     }
5092     err = btf_check_func_arg_match(env, subprog, caller->regs);
5093     if (err == -EFAULT) {
5094         return err;
5095     }
5096     if (is_global) {
5097         if (err) {
5098             verbose(env, "Caller passes invalid args into func#%d\n", subprog);
5099             return err;
5100         } else {
5101             if (env->log.level & BPF_LOG_LEVEL) {
5102                 verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
5103             }
5104             clear_caller_saved_regs(env, caller->regs);
5105 
5106             /* All global functions return a 64-bit SCALAR_VALUE */
5107             mark_reg_unknown(env, caller->regs, BPF_REG_0);
5108             caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5109 
5110             /* continue with next insn after call */
5111             return 0;
5112         }
5113     }
5114 
5115     callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5116     if (!callee) {
5117         return -ENOMEM;
5118     }
5119     state->frame[state->curframe + 1] = callee;
5120 
5121     /* callee cannot access r0, r6 - r9 for reading and has to write
5122      * into its own stack before reading from it.
5123      * callee can read/write into caller's stack
5124      */
5125     init_func_state(env, callee,
5126                     /* remember the callsite, it will be used by bpf_exit */
5127                     *insn_idx /* callsite */, state->curframe + 1 /* frameno within this callchain */,
5128                     subprog /* subprog number within this prog */);
5129 
5130     /* Transfer references to the callee */
5131     err = transfer_reference_state(callee, caller);
5132     if (err) {
5133         return err;
5134     }
5135 
5136     /* copy r1 - r5 args that callee can access.  The copy includes parent
5137      * pointers, which connects us up to the liveness chain
5138      */
5139     for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
5140         callee->regs[i] = caller->regs[i];
5141     }
5142 
5143     clear_caller_saved_regs(env, caller->regs);
5144 
5145     /* only increment it after check_reg_arg() finished */
5146     state->curframe++;
5147 
5148     /* and go analyze first insn of the callee */
5149     *insn_idx = target_insn;
5150 
5151     if (env->log.level & BPF_LOG_LEVEL) {
5152         verbose(env, "caller:\n");
5153         print_verifier_state(env, caller);
5154         verbose(env, "callee:\n");
5155         print_verifier_state(env, callee);
5156     }
5157     return 0;
5158 }
5159 
prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)5160 static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
5161 {
5162     struct bpf_verifier_state *state = env->cur_state;
5163     struct bpf_func_state *caller, *callee;
5164     struct bpf_reg_state *r0;
5165     int err;
5166 
5167     callee = state->frame[state->curframe];
5168     r0 = &callee->regs[BPF_REG_0];
5169     if (r0->type == PTR_TO_STACK) {
5170         /* technically it's ok to return caller's stack pointer
5171          * (or caller's caller's pointer) back to the caller,
5172          * since these pointers are valid. Only current stack
5173          * pointer will be invalid as soon as function exits,
5174          * but let's be conservative
5175          */
5176         verbose(env, "cannot return stack pointer to the caller\n");
5177         return -EINVAL;
5178     }
5179 
5180     state->curframe--;
5181     caller = state->frame[state->curframe];
5182     /* return to the caller whatever r0 had in the callee */
5183     caller->regs[BPF_REG_0] = *r0;
5184 
5185     /* Transfer references to the caller */
5186     err = transfer_reference_state(caller, callee);
5187     if (err) {
5188         return err;
5189     }
5190 
5191     *insn_idx = callee->callsite + 1;
5192     if (env->log.level & BPF_LOG_LEVEL) {
5193         verbose(env, "returning from callee:\n");
5194         print_verifier_state(env, callee);
5195         verbose(env, "to caller at %d:\n", *insn_idx);
5196         print_verifier_state(env, caller);
5197     }
5198     /* clear everything in the callee */
5199     free_func_state(callee);
5200     state->frame[state->curframe + 1] = NULL;
5201     return 0;
5202 }
5203 
do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, int func_id, struct bpf_call_arg_meta *meta)5204 static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, int func_id,
5205                                    struct bpf_call_arg_meta *meta)
5206 {
5207     struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
5208 
5209     if (ret_type != RET_INTEGER ||
5210         (func_id != BPF_FUNC_get_stack && func_id != BPF_FUNC_probe_read_str &&
5211          func_id != BPF_FUNC_probe_read_kernel_str && func_id != BPF_FUNC_probe_read_user_str)) {
5212         return;
5213     }
5214 
5215     ret_reg->smax_value = meta->msize_max_value;
5216     ret_reg->s32_max_value = meta->msize_max_value;
5217     ret_reg->smin_value = -MAX_ERRNO;
5218     ret_reg->s32_min_value = -MAX_ERRNO;
5219     reg_bounds_sync(ret_reg);
5220 }
5221 
record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)5222 static int record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)
5223 {
5224     struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5225     struct bpf_map *map = meta->map_ptr;
5226 
5227     if (func_id != BPF_FUNC_tail_call && func_id != BPF_FUNC_map_lookup_elem && func_id != BPF_FUNC_map_update_elem &&
5228         func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem &&
5229         func_id != BPF_FUNC_map_peek_elem) {
5230         return 0;
5231     }
5232 
5233     if (map == NULL) {
5234         verbose(env, "kernel subsystem misconfigured verifier\n");
5235         return -EINVAL;
5236     }
5237 
5238     /* In case of read-only, some additional restrictions
5239      * need to be applied in order to prevent altering the
5240      * state of the map from program side.
5241      */
5242     if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5243         (func_id == BPF_FUNC_map_delete_elem || func_id == BPF_FUNC_map_update_elem ||
5244          func_id == BPF_FUNC_map_push_elem || func_id == BPF_FUNC_map_pop_elem)) {
5245         verbose(env, "write into map forbidden\n");
5246         return -EACCES;
5247     }
5248 
5249     if (!BPF_MAP_PTR(aux->map_ptr_state)) {
5250         bpf_map_ptr_store(aux, meta->map_ptr, !meta->map_ptr->bypass_spec_v1);
5251     } else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) {
5252         bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, !meta->map_ptr->bypass_spec_v1);
5253     }
5254     return 0;
5255 }
5256 
record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)5257 static int record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)
5258 {
5259     struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5260     struct bpf_reg_state *regs = cur_regs(env), *reg;
5261     struct bpf_map *map = meta->map_ptr;
5262     u64 val;
5263     int err;
5264 
5265     if (func_id != BPF_FUNC_tail_call) {
5266         return 0;
5267     }
5268     if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5269         verbose(env, "kernel subsystem misconfigured verifier\n");
5270         return -EINVAL;
5271     }
5272 
5273     reg = &regs[BPF_REG_3];
5274     val = reg->var_off.value;
5275     max = map->max_entries;
5276 
5277     if (!(register_is_const(reg) && val < max)) {
5278         bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5279         return 0;
5280     }
5281 
5282     err = mark_chain_precision(env, BPF_REG_3);
5283     if (err) {
5284         return err;
5285     }
5286 
5287     if (bpf_map_key_unseen(aux)) {
5288         bpf_map_key_store(aux, val);
5289     } else if (!bpf_map_key_poisoned(aux) && bpf_map_key_immediate(aux) != val) {
5290         bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5291     }
5292     return 0;
5293 }
5294 
check_reference_leak(struct bpf_verifier_env *env)5295 static int check_reference_leak(struct bpf_verifier_env *env)
5296 {
5297     struct bpf_func_state *state = cur_func(env);
5298     int i;
5299 
5300     for (i = 0; i < state->acquired_refs; i++) {
5301         verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx);
5302     }
5303     return state->acquired_refs ? -EINVAL : 0;
5304 }
5305 
check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)5306 static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
5307 {
5308     const struct bpf_func_proto *fn = NULL;
5309     enum bpf_return_type ret_type;
5310     enum bpf_type_flag ret_flag;
5311     struct bpf_reg_state *regs;
5312     struct bpf_call_arg_meta meta;
5313     bool changes_data;
5314     int i, err;
5315 
5316     /* find function prototype */
5317     if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5318         verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
5319         return -EINVAL;
5320     }
5321 
5322     if (env->ops->get_func_proto) {
5323         fn = env->ops->get_func_proto(func_id, env->prog);
5324     }
5325     if (!fn) {
5326         verbose(env, "unknown func %s#%d\n", func_id_name(func_id), func_id);
5327         return -EINVAL;
5328     }
5329 
5330     /* eBPF programs must be GPL compatible to use GPL-ed functions */
5331     if (!env->prog->gpl_compatible && fn->gpl_only) {
5332         verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5333         return -EINVAL;
5334     }
5335 
5336     if (fn->allowed && !fn->allowed(env->prog)) {
5337         verbose(env, "helper call is not allowed in probe\n");
5338         return -EINVAL;
5339     }
5340 
5341     /* With LD_ABS/IND some JITs save/restore skb from r1. */
5342     changes_data = bpf_helper_changes_pkt_data(fn->func);
5343     if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
5344         verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id);
5345         return -EINVAL;
5346     }
5347 
5348     memset(&meta, 0, sizeof(meta));
5349     meta.pkt_access = fn->pkt_access;
5350 
5351     err = check_func_proto(fn, func_id);
5352     if (err) {
5353         verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id);
5354         return err;
5355     }
5356 
5357     meta.func_id = func_id;
5358     /* check args */
5359     for (i = 0; i < 5; i++) {
5360         err = check_func_arg(env, i, &meta, fn);
5361         if (err) {
5362             return err;
5363         }
5364     }
5365 
5366     err = record_func_map(env, &meta, func_id, insn_idx);
5367     if (err) {
5368         return err;
5369     }
5370 
5371     err = record_func_key(env, &meta, func_id, insn_idx);
5372     if (err) {
5373         return err;
5374     }
5375 
5376     /* Mark slots with STACK_MISC in case of raw mode, stack offset
5377      * is inferred from register state.
5378      */
5379     for (i = 0; i < meta.access_size; i++) {
5380         err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false);
5381         if (err) {
5382             return err;
5383         }
5384     }
5385 
5386     if (func_id == BPF_FUNC_tail_call) {
5387         err = check_reference_leak(env);
5388         if (err) {
5389             verbose(env, "tail_call would lead to reference leak\n");
5390             return err;
5391         }
5392     } else if (is_release_function(func_id)) {
5393         err = release_reference(env, meta.ref_obj_id);
5394         if (err) {
5395             verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id);
5396             return err;
5397         }
5398     }
5399 
5400     regs = cur_regs(env);
5401     /* check that flags argument in get_local_storage(map, flags) is 0,
5402      * this is required because get_local_storage() can't return an error.
5403      */
5404     if (func_id == BPF_FUNC_get_local_storage && !register_is_null(&regs[BPF_REG_2])) {
5405         verbose(env, "get_local_storage() doesn't support non-zero flags\n");
5406         return -EINVAL;
5407     }
5408 
5409     /* reset caller saved regs */
5410     for (i = 0; i < CALLER_SAVED_REGS; i++) {
5411         mark_reg_not_init(env, regs, caller_saved[i]);
5412         check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5413     }
5414 
5415     /* helper call returns 64-bit value. */
5416     regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5417 
5418     /* update return register (already marked as written above) */
5419     ret_type = fn->ret_type;
5420     ret_flag = type_flag(fn->ret_type);
5421     if (ret_type == RET_INTEGER) {
5422         /* sets type to SCALAR_VALUE */
5423         mark_reg_unknown(env, regs, BPF_REG_0);
5424     } else if (ret_type == RET_VOID) {
5425         regs[BPF_REG_0].type = NOT_INIT;
5426     } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
5427         /* There is no offset yet applied, variable or fixed */
5428         mark_reg_known_zero(env, regs, BPF_REG_0);
5429         /* remember map_ptr, so that check_map_access()
5430          * can check 'value_size' boundary of memory access
5431          * to map element returned from bpf_map_lookup_elem()
5432          */
5433         if (meta.map_ptr == NULL) {
5434             verbose(env, "kernel subsystem misconfigured verifier\n");
5435             return -EINVAL;
5436         }
5437         regs[BPF_REG_0].map_ptr = meta.map_ptr;
5438         regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
5439         if (!type_may_be_null(ret_type) && map_value_has_spin_lock(meta.map_ptr)) {
5440             regs[BPF_REG_0].id = ++env->id_gen;
5441         }
5442     } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
5443         mark_reg_known_zero(env, regs, BPF_REG_0);
5444         regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
5445     } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
5446         mark_reg_known_zero(env, regs, BPF_REG_0);
5447         regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
5448     } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
5449         mark_reg_known_zero(env, regs, BPF_REG_0);
5450         regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
5451     } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
5452         mark_reg_known_zero(env, regs, BPF_REG_0);
5453         regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
5454         regs[BPF_REG_0].mem_size = meta.mem_size;
5455     } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
5456         const struct btf_type *t;
5457 
5458         mark_reg_known_zero(env, regs, BPF_REG_0);
5459         t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5460         if (!btf_type_is_struct(t)) {
5461             u32 tsize;
5462             const struct btf_type *ret;
5463             const char *tname;
5464 
5465             /* resolve the type size of ksym. */
5466             ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5467             if (IS_ERR(ret)) {
5468                 tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5469                 verbose(env, "unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret));
5470                 return -EINVAL;
5471             }
5472             regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
5473             regs[BPF_REG_0].mem_size = tsize;
5474         } else {
5475             /* MEM_RDONLY may be carried from ret_flag, but it
5476              * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
5477              * it will confuse the check of PTR_TO_BTF_ID in
5478              * check_mem_access().
5479              */
5480             ret_flag &= ~MEM_RDONLY;
5481 
5482             regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
5483             regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5484         }
5485     } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
5486         int ret_btf_id;
5487 
5488         mark_reg_known_zero(env, regs, BPF_REG_0);
5489         regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
5490         ret_btf_id = *fn->ret_btf_id;
5491         if (ret_btf_id == 0) {
5492             verbose(env, "invalid return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id);
5493             return -EINVAL;
5494         }
5495         regs[BPF_REG_0].btf_id = ret_btf_id;
5496     } else {
5497         verbose(env, "unknown return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id);
5498         return -EINVAL;
5499     }
5500 
5501     if (type_may_be_null(regs[BPF_REG_0].type)) {
5502         regs[BPF_REG_0].id = ++env->id_gen;
5503     }
5504 
5505     if (is_ptr_cast_function(func_id)) {
5506         /* For release_reference() */
5507         regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5508     } else if (is_acquire_function(func_id, meta.map_ptr)) {
5509         int id = acquire_reference_state(env, insn_idx);
5510         if (id < 0) {
5511             return id;
5512         }
5513         /* For mark_ptr_or_null_reg() */
5514         regs[BPF_REG_0].id = id;
5515         /* For release_reference() */
5516         regs[BPF_REG_0].ref_obj_id = id;
5517     }
5518 
5519     do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
5520 
5521     err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5522     if (err) {
5523         return err;
5524     }
5525 
5526     if ((func_id == BPF_FUNC_get_stack || func_id == BPF_FUNC_get_task_stack) && !env->prog->has_callchain_buf) {
5527         const char *err_str;
5528 
5529 #ifdef CONFIG_PERF_EVENTS
5530         err = get_callchain_buffers(sysctl_perf_event_max_stack);
5531         err_str = "cannot get callchain buffer for func %s#%d\n";
5532 #else
5533         err = -ENOTSUPP;
5534         err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
5535 #endif
5536         if (err) {
5537             verbose(env, err_str, func_id_name(func_id), func_id);
5538             return err;
5539         }
5540 
5541         env->prog->has_callchain_buf = true;
5542     }
5543 
5544     if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) {
5545         env->prog->call_get_stack = true;
5546     }
5547 
5548     if (changes_data) {
5549         clear_all_pkt_pointers(env);
5550     }
5551     return 0;
5552 }
5553 
signed_add_overflows(s64 a, s64 b)5554 static bool signed_add_overflows(s64 a, s64 b)
5555 {
5556     /* Do the add in u64, where overflow is well-defined */
5557     s64 res = (s64)((u64)a + (u64)b);
5558 
5559     if (b < 0) {
5560         return res > a;
5561     }
5562     return res < a;
5563 }
5564 
signed_add32_overflows(s32 a, s32 b)5565 static bool signed_add32_overflows(s32 a, s32 b)
5566 {
5567     /* Do the add in u32, where overflow is well-defined */
5568     s32 res = (s32)((u32)a + (u32)b);
5569 
5570     if (b < 0) {
5571         return res > a;
5572     }
5573     return res < a;
5574 }
5575 
signed_sub_overflows(s64 a, s64 b)5576 static bool signed_sub_overflows(s64 a, s64 b)
5577 {
5578     /* Do the sub in u64, where overflow is well-defined */
5579     s64 res = (s64)((u64)a - (u64)b);
5580 
5581     if (b < 0) {
5582         return res < a;
5583     }
5584     return res > a;
5585 }
5586 
signed_sub32_overflows(s32 a, s32 b)5587 static bool signed_sub32_overflows(s32 a, s32 b)
5588 {
5589     /* Do the sub in u32, where overflow is well-defined */
5590     s32 res = (s32)((u32)a - (u32)b);
5591 
5592     if (b < 0) {
5593         return res < a;
5594     }
5595     return res > a;
5596 }
5597 
check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type)5598 static bool check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type)
5599 {
5600     bool known = tnum_is_const(reg->var_off);
5601     s64 val = reg->var_off.value;
5602     s64 smin = reg->smin_value;
5603 
5604     if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
5605         verbose(env, "math between %s pointer and %lld is not allowed\n", reg_type_str(env, type), val);
5606         return false;
5607     }
5608 
5609     if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
5610         verbose(env, "%s pointer offset %d is not allowed\n", reg_type_str(env, type), reg->off);
5611         return false;
5612     }
5613 
5614     if (smin == S64_MIN) {
5615         verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
5616                 reg_type_str(env, type));
5617         return false;
5618     }
5619 
5620     if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
5621         verbose(env, "value %lld makes %s pointer be out of bounds\n", smin, reg_type_str(env, type));
5622         return false;
5623     }
5624 
5625     return true;
5626 }
5627 
cur_aux(struct bpf_verifier_env *env)5628 static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
5629 {
5630     return &env->insn_aux_data[env->insn_idx];
5631 }
5632 
5633 enum {
5634     REASON_BOUNDS = -1,
5635     REASON_TYPE = -2,
5636     REASON_PATHS = -3,
5637     REASON_LIMIT = -4,
5638     REASON_STACK = -5,
5639 };
5640 
retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, u32 *alu_limit, bool mask_to_left)5641 static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, u32 *alu_limit, bool mask_to_left)
5642 {
5643     u32 max = 0, ptr_limit = 0;
5644 
5645     switch (ptr_reg->type) {
5646         case PTR_TO_STACK:
5647             /* Offset 0 is out-of-bounds, but acceptable start for the
5648              * left direction, see BPF_REG_FP. Also, unknown scalar
5649              * offset where we would need to deal with min/max bounds is
5650              * currently prohibited for unprivileged.
5651              */
5652             max = MAX_BPF_STACK + mask_to_left;
5653             ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
5654             break;
5655         case PTR_TO_MAP_VALUE:
5656             max = ptr_reg->map_ptr->value_size;
5657             ptr_limit = (mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value) + ptr_reg->off;
5658             break;
5659         default:
5660             return REASON_TYPE;
5661     }
5662 
5663     if (ptr_limit >= max) {
5664         return REASON_LIMIT;
5665     }
5666     *alu_limit = ptr_limit;
5667     return 0;
5668 }
5669 
can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn)5670 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn)
5671 {
5672     return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5673 }
5674 
update_alu_sanitation_state(struct bpf_insn_aux_data *aux, u32 alu_state, u32 alu_limit)5675 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, u32 alu_state, u32 alu_limit)
5676 {
5677     /* If we arrived here from different branches with different
5678      * state or limits to sanitize, then this won't work.
5679      */
5680     if (aux->alu_state && (aux->alu_state != alu_state || aux->alu_limit != alu_limit)) {
5681         return REASON_PATHS;
5682     }
5683 
5684     /* Corresponding fixup done in fixup_bpf_calls(). */
5685     aux->alu_state = alu_state;
5686     aux->alu_limit = alu_limit;
5687     return 0;
5688 }
5689 
sanitize_val_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)5690 static int sanitize_val_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
5691 {
5692     struct bpf_insn_aux_data *aux = cur_aux(env);
5693 
5694     if (can_skip_alu_sanitation(env, insn)) {
5695         return 0;
5696     }
5697 
5698     return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
5699 }
5700 
sanitize_needed(u8 opcode)5701 static bool sanitize_needed(u8 opcode)
5702 {
5703     return opcode == BPF_ADD || opcode == BPF_SUB;
5704 }
5705 
5706 struct bpf_sanitize_info {
5707     struct bpf_insn_aux_data aux;
5708     bool mask_to_left;
5709 };
5710 
sanitize_speculative_path(struct bpf_verifier_env *env, const struct bpf_insn *insn, u32 next_idx, u32 curr_idx)5711 static struct bpf_verifier_state *sanitize_speculative_path(struct bpf_verifier_env *env, const struct bpf_insn *insn,
5712                                                             u32 next_idx, u32 curr_idx)
5713 {
5714     struct bpf_verifier_state *branch;
5715     struct bpf_reg_state *regs;
5716 
5717     branch = push_stack(env, next_idx, curr_idx, true);
5718     if (branch && insn) {
5719         regs = branch->frame[branch->curframe]->regs;
5720         if (BPF_SRC(insn->code) == BPF_K) {
5721             mark_reg_unknown(env, regs, insn->dst_reg);
5722         } else if (BPF_SRC(insn->code) == BPF_X) {
5723             mark_reg_unknown(env, regs, insn->dst_reg);
5724             mark_reg_unknown(env, regs, insn->src_reg);
5725         }
5726     }
5727     return branch;
5728 }
5729 
sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg, struct bpf_sanitize_info *info, const bool commit_window)5730 static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg,
5731                             const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg,
5732                             struct bpf_sanitize_info *info, const bool commit_window)
5733 {
5734     struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
5735     struct bpf_verifier_state *vstate = env->cur_state;
5736     bool off_is_imm = tnum_is_const(off_reg->var_off);
5737     bool off_is_neg = off_reg->smin_value < 0;
5738     bool ptr_is_dst_reg = ptr_reg == dst_reg;
5739     u8 opcode = BPF_OP(insn->code);
5740     u32 alu_state, alu_limit;
5741     struct bpf_reg_state tmp;
5742     bool ret;
5743     int err;
5744 
5745     if (can_skip_alu_sanitation(env, insn)) {
5746         return 0;
5747     }
5748 
5749     /* We already marked aux for masking from non-speculative
5750      * paths, thus we got here in the first place. We only care
5751      * to explore bad access from here.
5752      */
5753     if (vstate->speculative) {
5754         goto do_sim;
5755     }
5756 
5757     if (!commit_window) {
5758         if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) {
5759             return REASON_BOUNDS;
5760         }
5761 
5762         info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg);
5763     }
5764 
5765     err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
5766     if (err < 0) {
5767         return err;
5768     }
5769 
5770     if (commit_window) {
5771         /* In commit phase we narrow the masking window based on
5772          * the observed pointer move after the simulated operation.
5773          */
5774         alu_state = info->aux.alu_state;
5775         alu_limit = abs(info->aux.alu_limit - alu_limit);
5776     } else {
5777         alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
5778         alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
5779         alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
5780 
5781         /* Limit pruning on unknown scalars to enable deep search for
5782          * potential masking differences from other program paths.
5783          */
5784         if (!off_is_imm) {
5785             env->explore_alu_limits = true;
5786         }
5787     }
5788 
5789     err = update_alu_sanitation_state(aux, alu_state, alu_limit);
5790     if (err < 0) {
5791         return err;
5792     }
5793 do_sim:
5794     /* If we're in commit phase, we're done here given we already
5795      * pushed the truncated dst_reg into the speculative verification
5796      * stack.
5797      *
5798      * Also, when register is a known constant, we rewrite register-based
5799      * operation to immediate-based, and thus do not need masking (and as
5800      * a consequence, do not need to simulate the zero-truncation either).
5801      */
5802     if (commit_window || off_is_imm) {
5803         return 0;
5804     }
5805 
5806     /* Simulate and find potential out-of-bounds access under
5807      * speculative execution from truncation as a result of
5808      * masking when off was not within expected range. If off
5809      * sits in dst, then we temporarily need to move ptr there
5810      * to simulate dst (== 0) +/-= ptr. Needed, for example,
5811      * for cases where we use K-based arithmetic in one direction
5812      * and truncated reg-based in the other in order to explore
5813      * bad access.
5814      */
5815     if (!ptr_is_dst_reg) {
5816         tmp = *dst_reg;
5817         *dst_reg = *ptr_reg;
5818     }
5819     ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
5820     if (!ptr_is_dst_reg && ret) {
5821         *dst_reg = tmp;
5822     }
5823     return !ret ? REASON_STACK : 0;
5824 }
5825 
sanitize_mark_insn_seen(struct bpf_verifier_env *env)5826 static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
5827 {
5828     struct bpf_verifier_state *vstate = env->cur_state;
5829 
5830     /* If we simulate paths under speculation, we don't update the
5831      * insn as 'seen' such that when we verify unreachable paths in
5832      * the non-speculative domain, sanitize_dead_code() can still
5833      * rewrite/sanitize them.
5834      */
5835     if (!vstate->speculative) {
5836         env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
5837     }
5838 }
5839 
sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason, const struct bpf_reg_state *off_reg, const struct bpf_reg_state *dst_reg)5840 static int sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason,
5841                         const struct bpf_reg_state *off_reg, const struct bpf_reg_state *dst_reg)
5842 {
5843     static const char *err = "pointer arithmetic with it prohibited for !root";
5844     const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
5845     u32 dst = insn->dst_reg, src = insn->src_reg;
5846 
5847     switch (reason) {
5848         case REASON_BOUNDS:
5849             verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", off_reg == dst_reg ? dst : src, err);
5850             break;
5851         case REASON_TYPE:
5852             verbose(env, "R%d has pointer with unsupported alu operation, %s\n", off_reg == dst_reg ? src : dst, err);
5853             break;
5854         case REASON_PATHS:
5855             verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", dst, op, err);
5856             break;
5857         case REASON_LIMIT:
5858             verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", dst, op, err);
5859             break;
5860         case REASON_STACK:
5861             verbose(env, "R%d could not be pushed for speculative verification, %s\n", dst, err);
5862             break;
5863         default:
5864             verbose(env, "verifier internal error: unknown reason (%d)\n", reason);
5865             break;
5866     }
5867 
5868     return -EACCES;
5869 }
5870 
5871 /* check that stack access falls within stack limits and that 'reg' doesn't
5872  * have a variable offset.
5873  *
5874  * Variable offset is prohibited for unprivileged mode for simplicity since it
5875  * requires corresponding support in Spectre masking for stack ALU.  See also
5876  * retrieve_ptr_limit().
5877  *
5878  *
5879  * 'off' includes 'reg->off'.
5880  */
check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env *env, int regno, const struct bpf_reg_state *reg, int off)5881 static int check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env *env, int regno,
5882                                                  const struct bpf_reg_state *reg, int off)
5883 {
5884     if (!tnum_is_const(reg->var_off)) {
5885         char tn_buf[48];
5886 
5887         tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5888         verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", regno, tn_buf, off);
5889         return -EACCES;
5890     }
5891 
5892     if (off >= 0 || off < -MAX_BPF_STACK) {
5893         verbose(env,
5894                 "R%d stack pointer arithmetic goes out of range, "
5895                 "prohibited for !root; off=%d\n",
5896                 regno, off);
5897         return -EACCES;
5898     }
5899 
5900     return 0;
5901 }
5902 
sanitize_check_bounds(struct bpf_verifier_env *env, const struct bpf_insn *insn, const struct bpf_reg_state *dst_reg)5903 static int sanitize_check_bounds(struct bpf_verifier_env *env, const struct bpf_insn *insn,
5904                                  const struct bpf_reg_state *dst_reg)
5905 {
5906     u32 dst = insn->dst_reg;
5907 
5908     /* For unprivileged we require that resulting offset must be in bounds
5909      * in order to be able to sanitize access later on.
5910      */
5911     if (env->bypass_spec_v1) {
5912         return 0;
5913     }
5914 
5915     switch (dst_reg->type) {
5916         case PTR_TO_STACK:
5917             if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, dst_reg->off + dst_reg->var_off.value)) {
5918                 return -EACCES;
5919             }
5920             break;
5921         case PTR_TO_MAP_VALUE:
5922             if (check_map_access(env, dst, dst_reg->off, 1, false)) {
5923                 verbose(env,
5924                         "R%d pointer arithmetic of map value goes out of range, "
5925                         "prohibited for !root\n",
5926                         dst);
5927                 return -EACCES;
5928             }
5929             break;
5930         default:
5931             break;
5932     }
5933 
5934     return 0;
5935 }
5936 
5937 /* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
5938  * Caller should also handle BPF_MOV case separately.
5939  * If we return -EACCES, caller may want to try again treating pointer as a
5940  * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
5941  */
adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg)5942 static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn,
5943                                    const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg)
5944 {
5945     struct bpf_verifier_state *vstate = env->cur_state;
5946     struct bpf_func_state *state = vstate->frame[vstate->curframe];
5947     struct bpf_reg_state *regs = state->regs, *dst_reg;
5948     bool known = tnum_is_const(off_reg->var_off);
5949     s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, smin_ptr = ptr_reg->smin_value,
5950         smax_ptr = ptr_reg->smax_value;
5951     u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value,
5952         umax_ptr = ptr_reg->umax_value;
5953     struct bpf_sanitize_info info = {};
5954     u8 opcode = BPF_OP(insn->code);
5955     u32 dst = insn->dst_reg;
5956     int ret;
5957 
5958     dst_reg = &regs[dst];
5959 
5960     if ((known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val || umin_val > umax_val) {
5961         /* Taint dst register if offset had invalid bounds derived from
5962          * e.g. dead branches.
5963          */
5964         __mark_reg_unknown(env, dst_reg);
5965         return 0;
5966     }
5967 
5968     if (BPF_CLASS(insn->code) != BPF_ALU64) {
5969         /* 32-bit ALU ops on pointers produce (meaningless) scalars */
5970         if (opcode == BPF_SUB && env->allow_ptr_leaks) {
5971             __mark_reg_unknown(env, dst_reg);
5972             return 0;
5973         }
5974 
5975         verbose(env, "R%d 32-bit pointer arithmetic prohibited\n", dst);
5976         return -EACCES;
5977     }
5978 
5979     if (ptr_reg->type & PTR_MAYBE_NULL) {
5980         verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", dst,
5981                 reg_type_str(env, ptr_reg->type));
5982         return -EACCES;
5983     }
5984 
5985     switch (base_type(ptr_reg->type)) {
5986         case CONST_PTR_TO_MAP:
5987             /* smin_val represents the known value */
5988             if (known && smin_val == 0 && opcode == BPF_ADD) {
5989                 break;
5990             }
5991             fallthrough;
5992         case PTR_TO_PACKET_END:
5993         case PTR_TO_SOCKET:
5994         case PTR_TO_SOCK_COMMON:
5995         case PTR_TO_TCP_SOCK:
5996         case PTR_TO_XDP_SOCK:
5997             reject:
5998             verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str(env, ptr_reg->type));
5999             return -EACCES;
6000         default:
6001             if (type_may_be_null(ptr_reg->type)) {
6002                 goto reject;
6003             }
6004             break;
6005     }
6006 
6007     /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
6008      * The id may be overwritten later if we create a new variable offset.
6009      */
6010     dst_reg->type = ptr_reg->type;
6011     dst_reg->id = ptr_reg->id;
6012 
6013     if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) {
6014         return -EINVAL;
6015     }
6016 
6017     /* pointer types do not carry 32-bit bounds at the moment. */
6018     verifier_mark_reg32_unbounded(dst_reg);
6019 
6020     if (sanitize_needed(opcode)) {
6021         ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, &info, false);
6022         if (ret < 0) {
6023             return sanitize_err(env, insn, ret, off_reg, dst_reg);
6024         }
6025     }
6026 
6027     switch (opcode) {
6028         case BPF_ADD:
6029             /* We can take a fixed offset as long as it doesn't overflow
6030              * the s32 'off' field
6031              */
6032             if (known && (ptr_reg->off + smin_val == (s64)(s32)(ptr_reg->off + smin_val))) {
6033                 /* pointer += K.  Accumulate it into fixed offset */
6034                 dst_reg->smin_value = smin_ptr;
6035                 dst_reg->smax_value = smax_ptr;
6036                 dst_reg->umin_value = umin_ptr;
6037                 dst_reg->umax_value = umax_ptr;
6038                 dst_reg->var_off = ptr_reg->var_off;
6039                 dst_reg->off = ptr_reg->off + smin_val;
6040                 dst_reg->raw = ptr_reg->raw;
6041                 break;
6042             }
6043             /* A new variable offset is created.  Note that off_reg->off
6044              * == 0, since it's a scalar.
6045              * dst_reg gets the pointer type and since some positive
6046              * integer value was added to the pointer, give it a new 'id'
6047              * if it's a PTR_TO_PACKET.
6048              * this creates a new 'base' pointer, off_reg (variable) gets
6049              * added into the variable offset, and we copy the fixed offset
6050              * from ptr_reg.
6051              */
6052             if (signed_add_overflows(smin_ptr, smin_val) || signed_add_overflows(smax_ptr, smax_val)) {
6053                 dst_reg->smin_value = S64_MIN;
6054                 dst_reg->smax_value = S64_MAX;
6055             } else {
6056                 dst_reg->smin_value = smin_ptr + smin_val;
6057                 dst_reg->smax_value = smax_ptr + smax_val;
6058             }
6059             if (umin_ptr + umin_val < umin_ptr || umax_ptr + umax_val < umax_ptr) {
6060                 dst_reg->umin_value = 0;
6061                 dst_reg->umax_value = U64_MAX;
6062             } else {
6063                 dst_reg->umin_value = umin_ptr + umin_val;
6064                 dst_reg->umax_value = umax_ptr + umax_val;
6065             }
6066             dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
6067             dst_reg->off = ptr_reg->off;
6068             dst_reg->raw = ptr_reg->raw;
6069             if (reg_is_pkt_pointer(ptr_reg)) {
6070                 dst_reg->id = ++env->id_gen;
6071                 /* something was added to pkt_ptr, set range to zero */
6072                 dst_reg->raw = 0;
6073             }
6074             break;
6075         case BPF_SUB:
6076             if (dst_reg == off_reg) {
6077                 /* scalar -= pointer.  Creates an unknown scalar */
6078                 verbose(env, "R%d tried to subtract pointer from scalar\n", dst);
6079                 return -EACCES;
6080             }
6081             /* We don't allow subtraction from FP, because (according to
6082              * test_verifier.c test "invalid fp arithmetic", JITs might not
6083              * be able to deal with it.
6084              */
6085             if (ptr_reg->type == PTR_TO_STACK) {
6086                 verbose(env, "R%d subtraction from stack pointer prohibited\n", dst);
6087                 return -EACCES;
6088             }
6089             if (known && (ptr_reg->off - smin_val == (s64)(s32)(ptr_reg->off - smin_val))) {
6090                 /* pointer -= K.  Subtract it from fixed offset */
6091                 dst_reg->smin_value = smin_ptr;
6092                 dst_reg->smax_value = smax_ptr;
6093                 dst_reg->umin_value = umin_ptr;
6094                 dst_reg->umax_value = umax_ptr;
6095                 dst_reg->var_off = ptr_reg->var_off;
6096                 dst_reg->id = ptr_reg->id;
6097                 dst_reg->off = ptr_reg->off - smin_val;
6098                 dst_reg->raw = ptr_reg->raw;
6099                 break;
6100             }
6101             /* A new variable offset is created.  If the subtrahend is known
6102              * nonnegative, then any reg->range we had before is still good.
6103              */
6104             if (signed_sub_overflows(smin_ptr, smax_val) || signed_sub_overflows(smax_ptr, smin_val)) {
6105                 /* Overflow possible, we know nothing */
6106                 dst_reg->smin_value = S64_MIN;
6107                 dst_reg->smax_value = S64_MAX;
6108             } else {
6109                 dst_reg->smin_value = smin_ptr - smax_val;
6110                 dst_reg->smax_value = smax_ptr - smin_val;
6111             }
6112             if (umin_ptr < umax_val) {
6113                 /* Overflow possible, we know nothing */
6114                 dst_reg->umin_value = 0;
6115                 dst_reg->umax_value = U64_MAX;
6116             } else {
6117                 /* Cannot overflow (as long as bounds are consistent) */
6118                 dst_reg->umin_value = umin_ptr - umax_val;
6119                 dst_reg->umax_value = umax_ptr - umin_val;
6120             }
6121             dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
6122             dst_reg->off = ptr_reg->off;
6123             dst_reg->raw = ptr_reg->raw;
6124             if (reg_is_pkt_pointer(ptr_reg)) {
6125                 dst_reg->id = ++env->id_gen;
6126                 /* something was added to pkt_ptr, set range to zero */
6127                 if (smin_val < 0) {
6128                     dst_reg->raw = 0;
6129                 }
6130             }
6131             break;
6132         case BPF_AND:
6133         case BPF_OR:
6134         case BPF_XOR:
6135             /* bitwise ops on pointers are troublesome, prohibit. */
6136             verbose(env, "R%d bitwise operator %s on pointer prohibited\n", dst, bpf_alu_string[opcode >> 0x4]);
6137             return -EACCES;
6138         default:
6139             /* other operators (e.g. MUL,LSH) produce non-pointer results */
6140             verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", dst, bpf_alu_string[opcode >> 0x4]);
6141             return -EACCES;
6142     }
6143 
6144     if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) {
6145         return -EINVAL;
6146     }
6147 
6148     reg_bounds_sync(dst_reg);
6149 
6150     if (sanitize_check_bounds(env, insn, dst_reg) < 0) {
6151         return -EACCES;
6152     }
6153     if (sanitize_needed(opcode)) {
6154         ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, &info, true);
6155         if (ret < 0) {
6156             return sanitize_err(env, insn, ret, off_reg, dst_reg);
6157         }
6158     }
6159 
6160     return 0;
6161 }
6162 
scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6163 static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6164 {
6165     s32 smin_val = src_reg->s32_min_value;
6166     s32 smax_val = src_reg->s32_max_value;
6167     u32 umin_val = src_reg->u32_min_value;
6168     u32 umax_val = src_reg->u32_max_value;
6169 
6170     if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6171         signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6172         dst_reg->s32_min_value = S32_MIN;
6173         dst_reg->s32_max_value = S32_MAX;
6174     } else {
6175         dst_reg->s32_min_value += smin_val;
6176         dst_reg->s32_max_value += smax_val;
6177     }
6178     if (dst_reg->u32_min_value + umin_val < umin_val || dst_reg->u32_max_value + umax_val < umax_val) {
6179         dst_reg->u32_min_value = 0;
6180         dst_reg->u32_max_value = U32_MAX;
6181     } else {
6182         dst_reg->u32_min_value += umin_val;
6183         dst_reg->u32_max_value += umax_val;
6184     }
6185 }
6186 
scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6187 static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6188 {
6189     s64 smin_val = src_reg->smin_value;
6190     s64 smax_val = src_reg->smax_value;
6191     u64 umin_val = src_reg->umin_value;
6192     u64 umax_val = src_reg->umax_value;
6193 
6194     if (signed_add_overflows(dst_reg->smin_value, smin_val) || signed_add_overflows(dst_reg->smax_value, smax_val)) {
6195         dst_reg->smin_value = S64_MIN;
6196         dst_reg->smax_value = S64_MAX;
6197     } else {
6198         dst_reg->smin_value += smin_val;
6199         dst_reg->smax_value += smax_val;
6200     }
6201     if (dst_reg->umin_value + umin_val < umin_val || dst_reg->umax_value + umax_val < umax_val) {
6202         dst_reg->umin_value = 0;
6203         dst_reg->umax_value = U64_MAX;
6204     } else {
6205         dst_reg->umin_value += umin_val;
6206         dst_reg->umax_value += umax_val;
6207     }
6208 }
6209 
scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6210 static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6211 {
6212     s32 smin_val = src_reg->s32_min_value;
6213     s32 smax_val = src_reg->s32_max_value;
6214     u32 umin_val = src_reg->u32_min_value;
6215     u32 umax_val = src_reg->u32_max_value;
6216 
6217     if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6218         signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6219         /* Overflow possible, we know nothing */
6220         dst_reg->s32_min_value = S32_MIN;
6221         dst_reg->s32_max_value = S32_MAX;
6222     } else {
6223         dst_reg->s32_min_value -= smax_val;
6224         dst_reg->s32_max_value -= smin_val;
6225     }
6226     if (dst_reg->u32_min_value < umax_val) {
6227         /* Overflow possible, we know nothing */
6228         dst_reg->u32_min_value = 0;
6229         dst_reg->u32_max_value = U32_MAX;
6230     } else {
6231         /* Cannot overflow (as long as bounds are consistent) */
6232         dst_reg->u32_min_value -= umax_val;
6233         dst_reg->u32_max_value -= umin_val;
6234     }
6235 }
6236 
scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6237 static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6238 {
6239     s64 smin_val = src_reg->smin_value;
6240     s64 smax_val = src_reg->smax_value;
6241     u64 umin_val = src_reg->umin_value;
6242     u64 umax_val = src_reg->umax_value;
6243 
6244     if (signed_sub_overflows(dst_reg->smin_value, smax_val) || signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6245         /* Overflow possible, we know nothing */
6246         dst_reg->smin_value = S64_MIN;
6247         dst_reg->smax_value = S64_MAX;
6248     } else {
6249         dst_reg->smin_value -= smax_val;
6250         dst_reg->smax_value -= smin_val;
6251     }
6252     if (dst_reg->umin_value < umax_val) {
6253         /* Overflow possible, we know nothing */
6254         dst_reg->umin_value = 0;
6255         dst_reg->umax_value = U64_MAX;
6256     } else {
6257         /* Cannot overflow (as long as bounds are consistent) */
6258         dst_reg->umin_value -= umax_val;
6259         dst_reg->umax_value -= umin_val;
6260     }
6261 }
6262 
scalar32_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6263 static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6264 {
6265     s32 smin_val = src_reg->s32_min_value;
6266     u32 umin_val = src_reg->u32_min_value;
6267     u32 umax_val = src_reg->u32_max_value;
6268 
6269     if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6270         /* Ain't nobody got time to multiply that sign */
6271         verifier_mark_reg32_unbounded(dst_reg);
6272         return;
6273     }
6274     /* Both values are positive, so we can work with unsigned and
6275      * copy the result to signed (unless it exceeds S32_MAX).
6276      */
6277     if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6278         /* Potential overflow, we know nothing */
6279         verifier_mark_reg32_unbounded(dst_reg);
6280         return;
6281     }
6282     dst_reg->u32_min_value *= umin_val;
6283     dst_reg->u32_max_value *= umax_val;
6284     if (dst_reg->u32_max_value > S32_MAX) {
6285         /* Overflow possible, we know nothing */
6286         dst_reg->s32_min_value = S32_MIN;
6287         dst_reg->s32_max_value = S32_MAX;
6288     } else {
6289         dst_reg->s32_min_value = dst_reg->u32_min_value;
6290         dst_reg->s32_max_value = dst_reg->u32_max_value;
6291     }
6292 }
6293 
scalar_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6294 static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6295 {
6296     s64 smin_val = src_reg->smin_value;
6297     u64 umin_val = src_reg->umin_value;
6298     u64 umax_val = src_reg->umax_value;
6299 
6300     if (smin_val < 0 || dst_reg->smin_value < 0) {
6301         /* Ain't nobody got time to multiply that sign */
6302         verifier_mark_reg64_unbounded(dst_reg);
6303         return;
6304     }
6305     /* Both values are positive, so we can work with unsigned and
6306      * copy the result to signed (unless it exceeds S64_MAX).
6307      */
6308     if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6309         /* Potential overflow, we know nothing */
6310         verifier_mark_reg64_unbounded(dst_reg);
6311         return;
6312     }
6313     dst_reg->umin_value *= umin_val;
6314     dst_reg->umax_value *= umax_val;
6315     if (dst_reg->umax_value > S64_MAX) {
6316         /* Overflow possible, we know nothing */
6317         dst_reg->smin_value = S64_MIN;
6318         dst_reg->smax_value = S64_MAX;
6319     } else {
6320         dst_reg->smin_value = dst_reg->umin_value;
6321         dst_reg->smax_value = dst_reg->umax_value;
6322     }
6323 }
6324 
scalar32_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6325 static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6326 {
6327     bool src_known = tnum_subreg_is_const(src_reg->var_off);
6328     bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6329     struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6330     s32 smin_val = src_reg->s32_min_value;
6331     u32 umax_val = src_reg->u32_max_value;
6332 
6333     if (src_known && dst_known) {
6334         verifier_mark_reg32_known(dst_reg, var32_off.value);
6335         return;
6336     }
6337 
6338     /* We get our minimum from the var_off, since that's inherently
6339      * bitwise.  Our maximum is the minimum of the operands' maxima.
6340      */
6341     dst_reg->u32_min_value = var32_off.value;
6342     dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6343     if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6344         /* Lose signed bounds when ANDing negative numbers,
6345          * ain't nobody got time for that.
6346          */
6347         dst_reg->s32_min_value = S32_MIN;
6348         dst_reg->s32_max_value = S32_MAX;
6349     } else {
6350         /* ANDing two positives gives a positive, so safe to
6351          * cast result into s64.
6352          */
6353         dst_reg->s32_min_value = dst_reg->u32_min_value;
6354         dst_reg->s32_max_value = dst_reg->u32_max_value;
6355     }
6356 }
6357 
scalar_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6358 static void scalar_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6359 {
6360     bool src_known = tnum_is_const(src_reg->var_off);
6361     bool dst_known = tnum_is_const(dst_reg->var_off);
6362     s64 smin_val = src_reg->smin_value;
6363     u64 umax_val = src_reg->umax_value;
6364 
6365     if (src_known && dst_known) {
6366         verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6367         return;
6368     }
6369 
6370     /* We get our minimum from the var_off, since that's inherently
6371      * bitwise.  Our maximum is the minimum of the operands' maxima.
6372      */
6373     dst_reg->umin_value = dst_reg->var_off.value;
6374     dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6375     if (dst_reg->smin_value < 0 || smin_val < 0) {
6376         /* Lose signed bounds when ANDing negative numbers,
6377          * ain't nobody got time for that.
6378          */
6379         dst_reg->smin_value = S64_MIN;
6380         dst_reg->smax_value = S64_MAX;
6381     } else {
6382         /* ANDing two positives gives a positive, so safe to
6383          * cast result into s64.
6384          */
6385         dst_reg->smin_value = dst_reg->umin_value;
6386         dst_reg->smax_value = dst_reg->umax_value;
6387     }
6388     /* We may learn something more from the var_off */
6389     verifier_update_reg_bounds(dst_reg);
6390 }
6391 
scalar32_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6392 static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6393 {
6394     bool src_known = tnum_subreg_is_const(src_reg->var_off);
6395     bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6396     struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6397     s32 smin_val = src_reg->s32_min_value;
6398     u32 umin_val = src_reg->u32_min_value;
6399 
6400     if (src_known && dst_known) {
6401         verifier_mark_reg32_known(dst_reg, var32_off.value);
6402         return;
6403     }
6404 
6405     /* We get our maximum from the var_off, and our minimum is the
6406      * maximum of the operands' minima
6407      */
6408     dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6409     dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6410     if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6411         /* Lose signed bounds when ORing negative numbers,
6412          * ain't nobody got time for that.
6413          */
6414         dst_reg->s32_min_value = S32_MIN;
6415         dst_reg->s32_max_value = S32_MAX;
6416     } else {
6417         /* ORing two positives gives a positive, so safe to
6418          * cast result into s64.
6419          */
6420         dst_reg->s32_min_value = dst_reg->u32_min_value;
6421         dst_reg->s32_max_value = dst_reg->u32_max_value;
6422     }
6423 }
6424 
scalar_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6425 static void scalar_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6426 {
6427     bool src_known = tnum_is_const(src_reg->var_off);
6428     bool dst_known = tnum_is_const(dst_reg->var_off);
6429     s64 smin_val = src_reg->smin_value;
6430     u64 umin_val = src_reg->umin_value;
6431 
6432     if (src_known && dst_known) {
6433         verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6434         return;
6435     }
6436 
6437     /* We get our maximum from the var_off, and our minimum is the
6438      * maximum of the operands' minima
6439      */
6440     dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6441     dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6442     if (dst_reg->smin_value < 0 || smin_val < 0) {
6443         /* Lose signed bounds when ORing negative numbers,
6444          * ain't nobody got time for that.
6445          */
6446         dst_reg->smin_value = S64_MIN;
6447         dst_reg->smax_value = S64_MAX;
6448     } else {
6449         /* ORing two positives gives a positive, so safe to
6450          * cast result into s64.
6451          */
6452         dst_reg->smin_value = dst_reg->umin_value;
6453         dst_reg->smax_value = dst_reg->umax_value;
6454     }
6455     /* We may learn something more from the var_off */
6456     verifier_update_reg_bounds(dst_reg);
6457 }
6458 
scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6459 static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6460 {
6461     bool src_known = tnum_subreg_is_const(src_reg->var_off);
6462     bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6463     struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6464     s32 smin_val = src_reg->s32_min_value;
6465 
6466     if (src_known && dst_known) {
6467         verifier_mark_reg32_known(dst_reg, var32_off.value);
6468         return;
6469     }
6470 
6471     /* We get both minimum and maximum from the var32_off. */
6472     dst_reg->u32_min_value = var32_off.value;
6473     dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6474 
6475     if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6476         /* XORing two positive sign numbers gives a positive,
6477          * so safe to cast u32 result into s32.
6478          */
6479         dst_reg->s32_min_value = dst_reg->u32_min_value;
6480         dst_reg->s32_max_value = dst_reg->u32_max_value;
6481     } else {
6482         dst_reg->s32_min_value = S32_MIN;
6483         dst_reg->s32_max_value = S32_MAX;
6484     }
6485 }
6486 
scalar_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6487 static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6488 {
6489     bool src_known = tnum_is_const(src_reg->var_off);
6490     bool dst_known = tnum_is_const(dst_reg->var_off);
6491     s64 smin_val = src_reg->smin_value;
6492 
6493     if (src_known && dst_known) {
6494         /* dst_reg->var_off.value has been updated earlier */
6495         verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6496         return;
6497     }
6498 
6499     /* We get both minimum and maximum from the var_off. */
6500     dst_reg->umin_value = dst_reg->var_off.value;
6501     dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6502 
6503     if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6504         /* XORing two positive sign numbers gives a positive,
6505          * so safe to cast u64 result into s64.
6506          */
6507         dst_reg->smin_value = dst_reg->umin_value;
6508         dst_reg->smax_value = dst_reg->umax_value;
6509     } else {
6510         dst_reg->smin_value = S64_MIN;
6511         dst_reg->smax_value = S64_MAX;
6512     }
6513 
6514     verifier_update_reg_bounds(dst_reg);
6515 }
6516 
__scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)6517 static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)
6518 {
6519     /* We lose all sign bit information (except what we can pick
6520      * up from var_off)
6521      */
6522     dst_reg->s32_min_value = S32_MIN;
6523     dst_reg->s32_max_value = S32_MAX;
6524     /* If we might shift our top bit out, then we know nothing */
6525     if (umax_val > VERIFIER_THIRTYONE || dst_reg->u32_max_value > 1ULL << (VERIFIER_THIRTYONE - umax_val)) {
6526         dst_reg->u32_min_value = 0;
6527         dst_reg->u32_max_value = U32_MAX;
6528     } else {
6529         dst_reg->u32_min_value <<= umin_val;
6530         dst_reg->u32_max_value <<= umax_val;
6531     }
6532 }
6533 
scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6534 static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6535 {
6536     u32 umax_val = src_reg->u32_max_value;
6537     u32 umin_val = src_reg->u32_min_value;
6538     /* u32 alu operation will zext upper bits */
6539     struct tnum subreg = tnum_subreg(dst_reg->var_off);
6540 
6541     __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6542     dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6543     /* Not required but being careful mark reg64 bounds as unknown so
6544      * that we are forced to pick them up from tnum and zext later and
6545      * if some path skips this step we are still safe.
6546      */
6547     verifier_mark_reg64_unbounded(dst_reg);
6548     verifier_update_reg32_bounds(dst_reg);
6549 }
6550 
__scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)6551 static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)
6552 {
6553     /* Special case <<32 because it is a common compiler pattern to sign
6554      * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6555      * positive we know this shift will also be positive so we can track
6556      * bounds correctly. Otherwise we lose all sign bit information except
6557      * what we can pick up from var_off. Perhaps we can generalize this
6558      * later to shifts of any length.
6559      */
6560     if (umin_val == 0x20 && umax_val == 0x20 && dst_reg->s32_max_value >= 0) {
6561         dst_reg->smax_value = (s64)dst_reg->s32_max_value << 0x20;
6562     } else {
6563         dst_reg->smax_value = S64_MAX;
6564     }
6565 
6566     if (umin_val == 0x20 && umax_val == 0x20 && dst_reg->s32_min_value >= 0) {
6567         dst_reg->smin_value = (s64)dst_reg->s32_min_value << 0x20;
6568     } else {
6569         dst_reg->smin_value = S64_MIN;
6570     }
6571 
6572     /* If we might shift our top bit out, then we know nothing */
6573     if (dst_reg->umax_value > 1ULL << (0x3f - umax_val)) {
6574         dst_reg->umin_value = 0;
6575         dst_reg->umax_value = U64_MAX;
6576     } else {
6577         dst_reg->umin_value <<= umin_val;
6578         dst_reg->umax_value <<= umax_val;
6579     }
6580 }
6581 
scalar_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6582 static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6583 {
6584     u64 umax_val = src_reg->umax_value;
6585     u64 umin_val = src_reg->umin_value;
6586 
6587     /* scalar64 calc uses 32bit unshifted bounds so must be called first */
6588     __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6589     __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6590 
6591     dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6592     /* We may learn something more from the var_off */
6593     verifier_update_reg_bounds(dst_reg);
6594 }
6595 
scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6596 static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6597 {
6598     struct tnum subreg = tnum_subreg(dst_reg->var_off);
6599     u32 umax_val = src_reg->u32_max_value;
6600     u32 umin_val = src_reg->u32_min_value;
6601 
6602     /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6603      * be negative, then either:
6604      * 1) src_reg might be zero, so the sign bit of the result is
6605      *    unknown, so we lose our signed bounds
6606      * 2) it's known negative, thus the unsigned bounds capture the
6607      *    signed bounds
6608      * 3) the signed bounds cross zero, so they tell us nothing
6609      *    about the result
6610      * If the value in dst_reg is known nonnegative, then again the
6611      * unsigned bounts capture the signed bounds.
6612      * Thus, in all cases it suffices to blow away our signed bounds
6613      * and rely on inferring new ones from the unsigned bounds and
6614      * var_off of the result.
6615      */
6616     dst_reg->s32_min_value = S32_MIN;
6617     dst_reg->s32_max_value = S32_MAX;
6618 
6619     dst_reg->var_off = tnum_rshift(subreg, umin_val);
6620     dst_reg->u32_min_value >>= umax_val;
6621     dst_reg->u32_max_value >>= umin_val;
6622 
6623     verifier_mark_reg64_unbounded(dst_reg);
6624     verifier_update_reg32_bounds(dst_reg);
6625 }
6626 
scalar_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6627 static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6628 {
6629     u64 umax_val = src_reg->umax_value;
6630     u64 umin_val = src_reg->umin_value;
6631 
6632     /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6633      * be negative, then either:
6634      * 1) src_reg might be zero, so the sign bit of the result is
6635      *    unknown, so we lose our signed bounds
6636      * 2) it's known negative, thus the unsigned bounds capture the
6637      *    signed bounds
6638      * 3) the signed bounds cross zero, so they tell us nothing
6639      *    about the result
6640      * If the value in dst_reg is known nonnegative, then again the
6641      * unsigned bounts capture the signed bounds.
6642      * Thus, in all cases it suffices to blow away our signed bounds
6643      * and rely on inferring new ones from the unsigned bounds and
6644      * var_off of the result.
6645      */
6646     dst_reg->smin_value = S64_MIN;
6647     dst_reg->smax_value = S64_MAX;
6648     dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6649     dst_reg->umin_value >>= umax_val;
6650     dst_reg->umax_value >>= umin_val;
6651 
6652     /* Its not easy to operate on alu32 bounds here because it depends
6653      * on bits being shifted in. Take easy way out and mark unbounded
6654      * so we can recalculate later from tnum.
6655      */
6656     verifier_mark_reg32_unbounded(dst_reg);
6657     verifier_update_reg_bounds(dst_reg);
6658 }
6659 
scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6660 static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6661 {
6662     u64 umin_val = src_reg->u32_min_value;
6663 
6664     /* Upon reaching here, src_known is true and
6665      * umax_val is equal to umin_val.
6666      */
6667     dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6668     dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6669 
6670     dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 0x20);
6671 
6672     /* blow away the dst_reg umin_value/umax_value and rely on
6673      * dst_reg var_off to refine the result.
6674      */
6675     dst_reg->u32_min_value = 0;
6676     dst_reg->u32_max_value = U32_MAX;
6677 
6678     verifier_mark_reg64_unbounded(dst_reg);
6679     verifier_update_reg32_bounds(dst_reg);
6680 }
6681 
scalar_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)6682 static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6683 {
6684     u64 umin_val = src_reg->umin_value;
6685 
6686     /* Upon reaching here, src_known is true and umax_val is equal
6687      * to umin_val.
6688      */
6689     dst_reg->smin_value >>= umin_val;
6690     dst_reg->smax_value >>= umin_val;
6691 
6692     dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 0x40);
6693 
6694     /* blow away the dst_reg umin_value/umax_value and rely on
6695      * dst_reg var_off to refine the result.
6696      */
6697     dst_reg->umin_value = 0;
6698     dst_reg->umax_value = U64_MAX;
6699 
6700     /* Its not easy to operate on alu32 bounds here because it depends
6701      * on bits being shifted in from upper 32-bits. Take easy way out
6702      * and mark unbounded so we can recalculate later from tnum.
6703      */
6704     verifier_mark_reg32_unbounded(dst_reg);
6705     verifier_update_reg_bounds(dst_reg);
6706 }
6707 
6708 /* WARNING: This function does calculations on 64-bit values, but the actual
6709  * execution may occur on 32-bit values. Therefore, things like bitshifts
6710  * need extra checks in the 32-bit case.
6711  */
adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg)6712 static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn,
6713                                       struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg)
6714 {
6715     struct bpf_reg_state *regs = cur_regs(env);
6716     u8 opcode = BPF_OP(insn->code);
6717     bool src_known;
6718     s64 smin_val, smax_val;
6719     u64 umin_val, umax_val;
6720     s32 s32_min_val, s32_max_val;
6721     u32 u32_min_val, u32_max_val;
6722     u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? VERIFIER_SIXTYFOUR : VERIFIER_THIRTYTWO;
6723     bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6724     int ret;
6725 
6726     smin_val = src_reg.smin_value;
6727     smax_val = src_reg.smax_value;
6728     umin_val = src_reg.umin_value;
6729     umax_val = src_reg.umax_value;
6730 
6731     s32_min_val = src_reg.s32_min_value;
6732     s32_max_val = src_reg.s32_max_value;
6733     u32_min_val = src_reg.u32_min_value;
6734     u32_max_val = src_reg.u32_max_value;
6735 
6736     if (alu32) {
6737         src_known = tnum_subreg_is_const(src_reg.var_off);
6738         if ((src_known && (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || s32_min_val > s32_max_val ||
6739             u32_min_val > u32_max_val) {
6740             /* Taint dst register if offset had invalid bounds
6741              * derived from e.g. dead branches.
6742              */
6743             __mark_reg_unknown(env, dst_reg);
6744             return 0;
6745         }
6746     } else {
6747         src_known = tnum_is_const(src_reg.var_off);
6748         if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val ||
6749             umin_val > umax_val) {
6750             /* Taint dst register if offset had invalid bounds
6751              * derived from e.g. dead branches.
6752              */
6753             __mark_reg_unknown(env, dst_reg);
6754             return 0;
6755         }
6756     }
6757 
6758     if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6759         __mark_reg_unknown(env, dst_reg);
6760         return 0;
6761     }
6762 
6763     if (sanitize_needed(opcode)) {
6764         ret = sanitize_val_alu(env, insn);
6765         if (ret < 0) {
6766             return sanitize_err(env, insn, ret, NULL, NULL);
6767         }
6768     }
6769 
6770     /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6771      * There are two classes of instructions: The first class we track both
6772      * alu32 and alu64 sign/unsigned bounds independently this provides the
6773      * greatest amount of precision when alu operations are mixed with jmp32
6774      * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6775      * and BPF_OR. This is possible because these ops have fairly easy to
6776      * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6777      * See alu32 verifier tests for examples. The second class of
6778      * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6779      * with regards to tracking sign/unsigned bounds because the bits may
6780      * cross subreg boundaries in the alu64 case. When this happens we mark
6781      * the reg unbounded in the subreg bound space and use the resulting
6782      * tnum to calculate an approximation of the sign/unsigned bounds.
6783      */
6784     switch (opcode) {
6785         case BPF_ADD:
6786             scalar32_min_max_add(dst_reg, &src_reg);
6787             scalar_min_max_add(dst_reg, &src_reg);
6788             dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6789             break;
6790         case BPF_SUB:
6791             scalar32_min_max_sub(dst_reg, &src_reg);
6792             scalar_min_max_sub(dst_reg, &src_reg);
6793             dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6794             break;
6795         case BPF_MUL:
6796             dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
6797             scalar32_min_max_mul(dst_reg, &src_reg);
6798             scalar_min_max_mul(dst_reg, &src_reg);
6799             break;
6800         case BPF_AND:
6801             dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
6802             scalar32_min_max_and(dst_reg, &src_reg);
6803             scalar_min_max_and(dst_reg, &src_reg);
6804             break;
6805         case BPF_OR:
6806             dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
6807             scalar32_min_max_or(dst_reg, &src_reg);
6808             scalar_min_max_or(dst_reg, &src_reg);
6809             break;
6810         case BPF_XOR:
6811             dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6812             scalar32_min_max_xor(dst_reg, &src_reg);
6813             scalar_min_max_xor(dst_reg, &src_reg);
6814             break;
6815         case BPF_LSH:
6816             if (umax_val >= insn_bitness) {
6817                 /* Shifts greater than 31 or 63 are undefined.
6818                  * This includes shifts by a negative number.
6819                  */
6820                 mark_reg_unknown(env, regs, insn->dst_reg);
6821                 break;
6822             }
6823             if (alu32) {
6824                 scalar32_min_max_lsh(dst_reg, &src_reg);
6825             } else {
6826                 scalar_min_max_lsh(dst_reg, &src_reg);
6827             }
6828             break;
6829         case BPF_RSH:
6830             if (umax_val >= insn_bitness) {
6831                 /* Shifts greater than 31 or 63 are undefined.
6832                  * This includes shifts by a negative number.
6833                  */
6834                 mark_reg_unknown(env, regs, insn->dst_reg);
6835                 break;
6836             }
6837             if (alu32) {
6838                 scalar32_min_max_rsh(dst_reg, &src_reg);
6839             } else {
6840                 scalar_min_max_rsh(dst_reg, &src_reg);
6841             }
6842             break;
6843         case BPF_ARSH:
6844             if (umax_val >= insn_bitness) {
6845                 /* Shifts greater than 31 or 63 are undefined.
6846                  * This includes shifts by a negative number.
6847                  */
6848                 mark_reg_unknown(env, regs, insn->dst_reg);
6849                 break;
6850             }
6851             if (alu32) {
6852                 scalar32_min_max_arsh(dst_reg, &src_reg);
6853             } else {
6854                 scalar_min_max_arsh(dst_reg, &src_reg);
6855             }
6856             break;
6857         default:
6858             mark_reg_unknown(env, regs, insn->dst_reg);
6859             break;
6860     }
6861 
6862     /* ALU32 ops are zero extended into 64bit register */
6863     if (alu32) {
6864         zext_32_to_64(dst_reg);
6865     }
6866 
6867     reg_bounds_sync(dst_reg);
6868     return 0;
6869 }
6870 
6871 /* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
6872  * and var_off.
6873  */
adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn)6874 static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn)
6875 {
6876     struct bpf_verifier_state *vstate = env->cur_state;
6877     struct bpf_func_state *state = vstate->frame[vstate->curframe];
6878     struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6879     struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
6880     u8 opcode = BPF_OP(insn->code);
6881     int err;
6882 
6883     dst_reg = &regs[insn->dst_reg];
6884     src_reg = NULL;
6885     if (dst_reg->type != SCALAR_VALUE) {
6886         ptr_reg = dst_reg;
6887     } else {
6888         /* Make sure ID is cleared otherwise dst_reg min/max could be
6889          * incorrectly propagated into other registers by find_equal_scalars()
6890          */
6891         dst_reg->id = 0;
6892     }
6893     if (BPF_SRC(insn->code) == BPF_X) {
6894         src_reg = &regs[insn->src_reg];
6895         if (src_reg->type != SCALAR_VALUE) {
6896             if (dst_reg->type != SCALAR_VALUE) {
6897                 /* Combining two pointers by any ALU op yields
6898                  * an arbitrary scalar. Disallow all math except
6899                  * pointer subtraction
6900                  */
6901                 if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6902                     mark_reg_unknown(env, regs, insn->dst_reg);
6903                     return 0;
6904                 }
6905                 verbose(env, "R%d pointer %s pointer prohibited\n", insn->dst_reg,
6906                         bpf_alu_string[opcode >> VERIFIER_FOUR]);
6907                 return -EACCES;
6908             } else {
6909                 /* scalar += pointer
6910                  * This is legal, but we have to reverse our
6911                  * src/dest handling in computing the range
6912                  */
6913                 err = mark_chain_precision(env, insn->dst_reg);
6914                 if (err) {
6915                     return err;
6916                 }
6917                 return adjust_ptr_min_max_vals(env, insn, src_reg, dst_reg);
6918             }
6919         } else if (ptr_reg) {
6920             /* pointer += scalar */
6921             err = mark_chain_precision(env, insn->src_reg);
6922             if (err) {
6923                 return err;
6924             }
6925             return adjust_ptr_min_max_vals(env, insn, dst_reg, src_reg);
6926         }
6927     } else {
6928         /* Pretend the src is a reg with a known value, since we only
6929          * need to be able to read from this state.
6930          */
6931         off_reg.type = SCALAR_VALUE;
6932         verifier_mark_reg_known(&off_reg, insn->imm);
6933         src_reg = &off_reg;
6934         if (ptr_reg) { /* pointer += K */
6935             return adjust_ptr_min_max_vals(env, insn, ptr_reg, src_reg);
6936         }
6937     }
6938 
6939     /* Got here implies adding two SCALAR_VALUEs */
6940     if (WARN_ON_ONCE(ptr_reg)) {
6941         print_verifier_state(env, state);
6942         verbose(env, "verifier internal error: unexpected ptr_reg\n");
6943         return -EINVAL;
6944     }
6945     if (WARN_ON(!src_reg)) {
6946         print_verifier_state(env, state);
6947         verbose(env, "verifier internal error: no src_reg\n");
6948         return -EINVAL;
6949     }
6950     return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
6951 }
6952 
6953 /* check validity of 32-bit and 64-bit arithmetic operations */
check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)6954 static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
6955 {
6956     struct bpf_reg_state *regs = cur_regs(env);
6957     u8 opcode = BPF_OP(insn->code);
6958     int err;
6959 
6960     if (opcode == BPF_END || opcode == BPF_NEG) {
6961         if (opcode == BPF_NEG) {
6962             if (BPF_SRC(insn->code) != 0 || insn->src_reg != BPF_REG_0 || insn->off != 0 || insn->imm != 0) {
6963                 verbose(env, "BPF_NEG uses reserved fields\n");
6964                 return -EINVAL;
6965             }
6966         } else {
6967             if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
6968                 (insn->imm != 0x10 && insn->imm != VERIFIER_THIRTYTWO && insn->imm != VERIFIER_SIXTYFOUR) ||
6969                 BPF_CLASS(insn->code) == BPF_ALU64) {
6970                 verbose(env, "BPF_END uses reserved fields\n");
6971                 return -EINVAL;
6972             }
6973         }
6974 
6975         /* check src operand */
6976         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6977         if (err) {
6978             return err;
6979         }
6980 
6981         if (is_pointer_value(env, insn->dst_reg)) {
6982             verbose(env, "R%d pointer arithmetic prohibited\n", insn->dst_reg);
6983             return -EACCES;
6984         }
6985 
6986         /* check dest operand */
6987         err = check_reg_arg(env, insn->dst_reg, DST_OP);
6988         if (err) {
6989             return err;
6990         }
6991     } else if (opcode == BPF_MOV) {
6992         if (BPF_SRC(insn->code) == BPF_X) {
6993             if (insn->imm != 0 || insn->off != 0) {
6994                 verbose(env, "BPF_MOV uses reserved fields\n");
6995                 return -EINVAL;
6996             }
6997             /* check src operand */
6998             err = check_reg_arg(env, insn->src_reg, SRC_OP);
6999             if (err) {
7000                 return err;
7001             }
7002         } else {
7003             if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7004                 verbose(env, "BPF_MOV uses reserved fields\n");
7005                 return -EINVAL;
7006             }
7007         }
7008 
7009         /* check dest operand, mark as required later */
7010         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7011         if (err) {
7012             return err;
7013         }
7014 
7015         if (BPF_SRC(insn->code) == BPF_X) {
7016             struct bpf_reg_state *src_reg = regs + insn->src_reg;
7017             struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
7018 
7019             if (BPF_CLASS(insn->code) == BPF_ALU64) {
7020                 /* case: R1 = R2
7021                  * copy register state to dest reg
7022                  */
7023                 if (src_reg->type == SCALAR_VALUE && !src_reg->id) {
7024                     /* Assign src and dst registers the same ID
7025                      * that will be used by find_equal_scalars()
7026                      * to propagate min/max range.
7027                      */
7028                     src_reg->id = ++env->id_gen;
7029                 }
7030                 *dst_reg = *src_reg;
7031                 dst_reg->live |= REG_LIVE_WRITTEN;
7032                 dst_reg->subreg_def = DEF_NOT_SUBREG;
7033             } else {
7034                 /* R1 = (u32) R2 */
7035                 if (is_pointer_value(env, insn->src_reg)) {
7036                     verbose(env, "R%d partial copy of pointer\n", insn->src_reg);
7037                     return -EACCES;
7038                 } else if (src_reg->type == SCALAR_VALUE) {
7039                     *dst_reg = *src_reg;
7040                     /* Make sure ID is cleared otherwise
7041                      * dst_reg min/max could be incorrectly
7042                      * propagated into src_reg by find_equal_scalars()
7043                      */
7044                     dst_reg->id = 0;
7045                     dst_reg->live |= REG_LIVE_WRITTEN;
7046                     dst_reg->subreg_def = env->insn_idx + 1;
7047                 } else {
7048                     mark_reg_unknown(env, regs, insn->dst_reg);
7049                 }
7050                 zext_32_to_64(dst_reg);
7051 
7052                 reg_bounds_sync(dst_reg);
7053             }
7054         } else {
7055             /* case: R = imm
7056              * remember the value we stored into this reg
7057              */
7058             /* clear any state __mark_reg_known doesn't set */
7059             mark_reg_unknown(env, regs, insn->dst_reg);
7060             regs[insn->dst_reg].type = SCALAR_VALUE;
7061             if (BPF_CLASS(insn->code) == BPF_ALU64) {
7062                 verifier_mark_reg_known(regs + insn->dst_reg, insn->imm);
7063             } else {
7064                 verifier_mark_reg_known(regs + insn->dst_reg, (u32)insn->imm);
7065             }
7066         }
7067     } else if (opcode > BPF_END) {
7068         verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
7069         return -EINVAL;
7070     } else { /* all other ALU ops: and, sub, xor, add, ... */
7071         if (BPF_SRC(insn->code) == BPF_X) {
7072             if (insn->imm != 0 || insn->off != 0) {
7073                 verbose(env, "BPF_ALU uses reserved fields\n");
7074                 return -EINVAL;
7075             }
7076             /* check src1 operand */
7077             err = check_reg_arg(env, insn->src_reg, SRC_OP);
7078             if (err) {
7079                 return err;
7080             }
7081         } else {
7082             if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7083                 verbose(env, "BPF_ALU uses reserved fields\n");
7084                 return -EINVAL;
7085             }
7086         }
7087 
7088         /* check src2 operand */
7089         err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7090         if (err) {
7091             return err;
7092         }
7093 
7094         if ((opcode == BPF_MOD || opcode == BPF_DIV) && BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
7095             verbose(env, "div by zero\n");
7096             return -EINVAL;
7097         }
7098 
7099         if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
7100             int size = BPF_CLASS(insn->code) == BPF_ALU64 ? VERIFIER_SIXTYFOUR : 32;
7101             if (insn->imm < 0 || insn->imm >= size) {
7102                 verbose(env, "invalid shift %d\n", insn->imm);
7103                 return -EINVAL;
7104             }
7105         }
7106         /* check dest operand */
7107         err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7108         if (err) {
7109             return err;
7110         }
7111         return adjust_reg_min_max_vals(env, insn);
7112     }
7113 
7114     return 0;
7115 }
7116 
__find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, u16 new_range)7117 static void __find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg,
7118                                      enum bpf_reg_type type, u16 new_range)
7119 {
7120     struct bpf_reg_state *reg;
7121     int i;
7122 
7123     for (i = 0; i < MAX_BPF_REG; i++) {
7124         reg = &state->regs[i];
7125         if (reg->type == type && reg->id == dst_reg->id) {
7126             /* keep the maximum range already checked */
7127             reg->range = max(reg->range, new_range);
7128         }
7129     }
7130 
7131     bpf_for_each_spilled_reg(i, state, reg)
7132     {
7133         if (!reg) {
7134             continue;
7135         }
7136         if (reg->type == type && reg->id == dst_reg->id) {
7137             reg->range = max(reg->range, new_range);
7138         }
7139     }
7140 }
7141 
find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg, enum bpf_reg_type type, bool range_right_open)7142 static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg,
7143                                    enum bpf_reg_type type, bool range_right_open)
7144 {
7145     u16 new_range;
7146     int i;
7147 
7148     if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) {
7149         /* This doesn't give us any range */
7150         return;
7151     }
7152 
7153     if (dst_reg->umax_value > MAX_PACKET_OFF || dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) {
7154         /* Risk of overflow.  For instance, ptr + (1<<63) may be less
7155          * than pkt_end, but that's because it's also less than pkt.
7156          */
7157         return;
7158     }
7159 
7160     new_range = dst_reg->off;
7161     if (range_right_open) {
7162         new_range--;
7163     }
7164 
7165     /* Examples for register markings:
7166      *
7167      * pkt_data in dst register:
7168      *
7169      *   r2 = r3;
7170      *   r2 += 8;
7171      *   if (r2 > pkt_end) goto <handle exception>
7172      *   <access okay>
7173      *
7174      *   r2 = r3;
7175      *   r2 += 8;
7176      *   if (r2 < pkt_end) goto <access okay>
7177      *   <handle exception>
7178      *
7179      *   Where:
7180      *     r2 == dst_reg, pkt_end == src_reg
7181      *     r2=pkt(id=n,off=8,r=0)
7182      *     r3=pkt(id=n,off=0,r=0)
7183      *
7184      * pkt_data in src register:
7185      *
7186      *   r2 = r3;
7187      *   r2 += 8;
7188      *   if (pkt_end >= r2) goto <access okay>
7189      *   <handle exception>
7190      *
7191      *   r2 = r3;
7192      *   r2 += 8;
7193      *   if (pkt_end <= r2) goto <handle exception>
7194      *   <access okay>
7195      *
7196      *   Where:
7197      *     pkt_end == dst_reg, r2 == src_reg
7198      *     r2=pkt(id=n,off=8,r=0)
7199      *     r3=pkt(id=n,off=0,r=0)
7200      *
7201      * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
7202      * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
7203      * and [r3, r3 + 8-1) respectively is safe to access depending on
7204      * the check.
7205      */
7206 
7207     /* If our ids match, then we must have the same max_value.  And we
7208      * don't care about the other reg's fixed offset, since if it's too big
7209      * the range won't allow anything.
7210      * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
7211      */
7212     for (i = 0; i <= vstate->curframe; i++) {
7213         __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, new_range);
7214     }
7215 }
7216 
is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)7217 static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
7218 {
7219     struct tnum subreg = tnum_subreg(reg->var_off);
7220     s32 sval = (s32)val;
7221 
7222     switch (opcode) {
7223         case BPF_JEQ:
7224             if (tnum_is_const(subreg)) {
7225                 return !!tnum_equals_const(subreg, val);
7226             }
7227             break;
7228         case BPF_JNE:
7229             if (tnum_is_const(subreg)) {
7230                 return !tnum_equals_const(subreg, val);
7231             }
7232             break;
7233         case BPF_JSET:
7234             if ((~subreg.mask & subreg.value) & val) {
7235                 return 1;
7236             }
7237             if (!((subreg.mask | subreg.value) & val)) {
7238                 return 0;
7239             }
7240             break;
7241         case BPF_JGT:
7242             if (reg->u32_min_value > val) {
7243                 return 1;
7244             } else if (reg->u32_max_value <= val) {
7245                 return 0;
7246             }
7247             break;
7248         case BPF_JSGT:
7249             if (reg->s32_min_value > sval) {
7250                 return 1;
7251             } else if (reg->s32_max_value <= sval) {
7252                 return 0;
7253             }
7254             break;
7255         case BPF_JLT:
7256             if (reg->u32_max_value < val) {
7257                 return 1;
7258             } else if (reg->u32_min_value >= val) {
7259                 return 0;
7260             }
7261             break;
7262         case BPF_JSLT:
7263             if (reg->s32_max_value < sval) {
7264                 return 1;
7265             } else if (reg->s32_min_value >= sval) {
7266                 return 0;
7267             }
7268             break;
7269         case BPF_JGE:
7270             if (reg->u32_min_value >= val) {
7271                 return 1;
7272             } else if (reg->u32_max_value < val) {
7273                 return 0;
7274             }
7275             break;
7276         case BPF_JSGE:
7277             if (reg->s32_min_value >= sval) {
7278                 return 1;
7279             } else if (reg->s32_max_value < sval) {
7280                 return 0;
7281             }
7282             break;
7283         case BPF_JLE:
7284             if (reg->u32_max_value <= val) {
7285                 return 1;
7286             } else if (reg->u32_min_value > val) {
7287                 return 0;
7288             }
7289             break;
7290         case BPF_JSLE:
7291             if (reg->s32_max_value <= sval) {
7292                 return 1;
7293             } else if (reg->s32_min_value > sval) {
7294                 return 0;
7295             }
7296             break;
7297     }
7298 
7299     return -1;
7300 }
7301 
is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)7302 static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7303 {
7304     s64 sval = (s64)val;
7305 
7306     switch (opcode) {
7307         case BPF_JEQ:
7308             if (tnum_is_const(reg->var_off)) {
7309                 return !!tnum_equals_const(reg->var_off, val);
7310             }
7311             break;
7312         case BPF_JNE:
7313             if (tnum_is_const(reg->var_off)) {
7314                 return !tnum_equals_const(reg->var_off, val);
7315             }
7316             break;
7317         case BPF_JSET:
7318             if ((~reg->var_off.mask & reg->var_off.value) & val) {
7319                 return 1;
7320             }
7321             if (!((reg->var_off.mask | reg->var_off.value) & val)) {
7322                 return 0;
7323             }
7324             break;
7325         case BPF_JGT:
7326             if (reg->umin_value > val) {
7327                 return 1;
7328             } else if (reg->umax_value <= val) {
7329                 return 0;
7330             }
7331             break;
7332         case BPF_JSGT:
7333             if (reg->smin_value > sval) {
7334                 return 1;
7335             } else if (reg->smax_value <= sval) {
7336                 return 0;
7337             }
7338             break;
7339         case BPF_JLT:
7340             if (reg->umax_value < val) {
7341                 return 1;
7342             } else if (reg->umin_value >= val) {
7343                 return 0;
7344             }
7345             break;
7346         case BPF_JSLT:
7347             if (reg->smax_value < sval) {
7348                 return 1;
7349             } else if (reg->smin_value >= sval) {
7350                 return 0;
7351             }
7352             break;
7353         case BPF_JGE:
7354             if (reg->umin_value >= val) {
7355                 return 1;
7356             } else if (reg->umax_value < val) {
7357                 return 0;
7358             }
7359             break;
7360         case BPF_JSGE:
7361             if (reg->smin_value >= sval) {
7362                 return 1;
7363             } else if (reg->smax_value < sval) {
7364                 return 0;
7365             }
7366             break;
7367         case BPF_JLE:
7368             if (reg->umax_value <= val) {
7369                 return 1;
7370             } else if (reg->umin_value > val) {
7371                 return 0;
7372             }
7373             break;
7374         case BPF_JSLE:
7375             if (reg->smax_value <= sval) {
7376                 return 1;
7377             } else if (reg->smin_value > sval) {
7378                 return 0;
7379             }
7380             break;
7381     }
7382 
7383     return -1;
7384 }
7385 
7386 /* compute branch direction of the expression "if (reg opcode val) goto target;"
7387  * and return:
7388  *  1 - branch will be taken and "goto target" will be executed
7389  *  0 - branch will not be taken and fall-through to next insn
7390  * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7391  *      range [0,10]
7392  */
is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32)7393 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32)
7394 {
7395     if (__is_pointer_value(false, reg)) {
7396         if (!reg_type_not_null(reg->type)) {
7397             return -1;
7398         }
7399 
7400         /* If pointer is valid tests against zero will fail so we can
7401          * use this to direct branch taken.
7402          */
7403         if (val != 0) {
7404             return -1;
7405         }
7406 
7407         switch (opcode) {
7408             case BPF_JEQ:
7409                 return 0;
7410             case BPF_JNE:
7411                 return 1;
7412             default:
7413                 return -1;
7414         }
7415     }
7416 
7417     if (is_jmp32) {
7418         return is_branch32_taken(reg, val, opcode);
7419     }
7420     return is_branch64_taken(reg, val, opcode);
7421 }
7422 
7423 /* Adjusts the register min/max values in the case that the dst_reg is the
7424  * variable register that we are working on, and src_reg is a constant or we're
7425  * simply doing a BPF_K check.
7426  * In JEQ/JNE cases we also adjust the var_off values.
7427  */
reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32)7428 static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32,
7429                             u8 opcode, bool is_jmp32)
7430 {
7431     struct tnum false_32off = tnum_subreg(false_reg->var_off);
7432     struct tnum false_64off = false_reg->var_off;
7433     struct tnum true_32off = tnum_subreg(true_reg->var_off);
7434     struct tnum true_64off = true_reg->var_off;
7435     s64 sval = (s64)val;
7436     s32 sval32 = (s32)val32;
7437 
7438     /* If the dst_reg is a pointer, we can't learn anything about its
7439      * variable offset from the compare (unless src_reg were a pointer into
7440      * the same object, but we don't bother with that.
7441      * Since false_reg and true_reg have the same type by construction, we
7442      * only need to check one of them for pointerness.
7443      */
7444     if (__is_pointer_value(false, false_reg)) {
7445         return;
7446     }
7447 
7448     switch (opcode) {
7449     /* JEQ/JNE comparison doesn't change the register equivalence.
7450      *
7451      * r1 = r2;
7452      * if (r1 == 42) goto label;
7453      * ...
7454      * label: // here both r1 and r2 are known to be 42.
7455      *
7456      * Hence when marking register as known preserve it's ID.
7457      */
7458         case BPF_JEQ:
7459             if (is_jmp32) {
7460                 __mark_reg32_known(true_reg, val32);
7461                 true_32off = tnum_subreg(true_reg->var_off);
7462             } else {
7463                 ___mark_reg_known(true_reg, val);
7464                 true_64off = true_reg->var_off;
7465             }
7466             break;
7467         case BPF_JNE:
7468             if (is_jmp32) {
7469                 __mark_reg32_known(false_reg, val32);
7470                 false_32off = tnum_subreg(false_reg->var_off);
7471             } else {
7472                 ___mark_reg_known(false_reg, val);
7473                 false_64off = false_reg->var_off;
7474             }
7475             break;
7476         case BPF_JSET:
7477             if (is_jmp32) {
7478                 false_32off = tnum_and(false_32off, tnum_const(~val32));
7479                 if (is_power_of_2(val32)) {
7480                     true_32off = tnum_or(true_32off, tnum_const(val32));
7481                 }
7482             } else {
7483                 false_64off = tnum_and(false_64off, tnum_const(~val));
7484                 if (is_power_of_2(val)) {
7485                     true_64off = tnum_or(true_64off, tnum_const(val));
7486                 }
7487             }
7488             break;
7489         case BPF_JGE:
7490         case BPF_JGT: {
7491             if (is_jmp32) {
7492                 u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
7493                 u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7494 
7495                 false_reg->u32_max_value = min(false_reg->u32_max_value, false_umax);
7496                 true_reg->u32_min_value = max(true_reg->u32_min_value, true_umin);
7497             } else {
7498                 u64 false_umax = opcode == BPF_JGT ? val : val - 1;
7499                 u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7500 
7501                 false_reg->umax_value = min(false_reg->umax_value, false_umax);
7502                 true_reg->umin_value = max(true_reg->umin_value, true_umin);
7503             }
7504             break;
7505         }
7506         case BPF_JSGE:
7507         case BPF_JSGT: {
7508             if (is_jmp32) {
7509                 s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
7510                 s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7511 
7512                 false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7513                 true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7514             } else {
7515                 s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
7516                 s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7517 
7518                 false_reg->smax_value = min(false_reg->smax_value, false_smax);
7519                 true_reg->smin_value = max(true_reg->smin_value, true_smin);
7520             }
7521             break;
7522         }
7523         case BPF_JLE:
7524         case BPF_JLT: {
7525             if (is_jmp32) {
7526                 u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
7527                 u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7528 
7529                 false_reg->u32_min_value = max(false_reg->u32_min_value, false_umin);
7530                 true_reg->u32_max_value = min(true_reg->u32_max_value, true_umax);
7531             } else {
7532                 u64 false_umin = opcode == BPF_JLT ? val : val + 1;
7533                 u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7534 
7535                 false_reg->umin_value = max(false_reg->umin_value, false_umin);
7536                 true_reg->umax_value = min(true_reg->umax_value, true_umax);
7537             }
7538             break;
7539         }
7540         case BPF_JSLE:
7541         case BPF_JSLT: {
7542             if (is_jmp32) {
7543                 s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
7544                 s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7545 
7546                 false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7547                 true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7548             } else {
7549                 s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
7550                 s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7551 
7552                 false_reg->smin_value = max(false_reg->smin_value, false_smin);
7553                 true_reg->smax_value = min(true_reg->smax_value, true_smax);
7554             }
7555             break;
7556         }
7557         default:
7558             return;
7559     }
7560 
7561     if (is_jmp32) {
7562         false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), tnum_subreg(false_32off));
7563         true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), tnum_subreg(true_32off));
7564         verifier_reg_combine_32_into_64(false_reg);
7565         verifier_reg_combine_32_into_64(true_reg);
7566     } else {
7567         false_reg->var_off = false_64off;
7568         true_reg->var_off = true_64off;
7569         __reg_combine_64_into_32(false_reg);
7570         __reg_combine_64_into_32(true_reg);
7571     }
7572 }
7573 
7574 /* Same as above, but for the case that dst_reg holds a constant and src_reg is
7575  * the variable reg.
7576  */
reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32, u8 opcode, bool is_jmp32)7577 static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32,
7578                                 u8 opcode, bool is_jmp32)
7579 {
7580     /* How can we transform "a <op> b" into "b <op> a"? */
7581     static const u8 opcode_flip[VERIFIER_SIXTEEN] = {
7582         [BPF_JEQ >> VERIFIER_FOUR] = BPF_JEQ,
7583         [BPF_JNE >> VERIFIER_FOUR] = BPF_JNE,
7584         [BPF_JSET >> VERIFIER_FOUR] = BPF_JSET,
7585         /* these swap "lesser" and "greater" (L and G in the opcodes) */
7586         [BPF_JGE >> VERIFIER_FOUR] = BPF_JLE,
7587         [BPF_JGT >> VERIFIER_FOUR] = BPF_JLT,
7588         [BPF_JLE >> VERIFIER_FOUR] = BPF_JGE,
7589         [BPF_JLT >> VERIFIER_FOUR] = BPF_JGT,
7590         [BPF_JSGE >> VERIFIER_FOUR] = BPF_JSLE,
7591         [BPF_JSGT >> VERIFIER_FOUR] = BPF_JSLT,
7592         [BPF_JSLE >> VERIFIER_FOUR] = BPF_JSGE,
7593         [BPF_JSLT >> VERIFIER_FOUR] = BPF_JSGT};
7594     opcode = opcode_flip[opcode >> VERIFIER_FOUR];
7595     /* This uses zero as "not present in table"; luckily the zero opcode,
7596      * BPF_JA, can't get here.
7597      */
7598     if (opcode) {
7599         reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7600     }
7601 }
7602 
7603 /* Regs are known to be equal, so intersect their min/max/var_off */
__reg_combine_min_max(struct bpf_reg_state *src_reg, struct bpf_reg_state *dst_reg)7604 static void __reg_combine_min_max(struct bpf_reg_state *src_reg, struct bpf_reg_state *dst_reg)
7605 {
7606     src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, dst_reg->umin_value);
7607     src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, dst_reg->umax_value);
7608     src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, dst_reg->smin_value);
7609     src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, dst_reg->smax_value);
7610     src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, dst_reg->var_off);
7611     /* We might have learned new bounds from the var_off. */
7612     reg_bounds_sync(src_reg);
7613     reg_bounds_sync(dst_reg);
7614 }
7615 
reg_combine_min_max(struct bpf_reg_state *true_src, struct bpf_reg_state *true_dst, struct bpf_reg_state *false_src, struct bpf_reg_state *false_dst, u8 opcode)7616 static void reg_combine_min_max(struct bpf_reg_state *true_src, struct bpf_reg_state *true_dst,
7617                                 struct bpf_reg_state *false_src, struct bpf_reg_state *false_dst, u8 opcode)
7618 {
7619     switch (opcode) {
7620         case BPF_JEQ:
7621             __reg_combine_min_max(true_src, true_dst);
7622             break;
7623         case BPF_JNE:
7624             __reg_combine_min_max(false_src, false_dst);
7625             break;
7626     }
7627 }
7628 
mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null)7629 static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null)
7630 {
7631     if (type_may_be_null(reg->type) && reg->id == id &&
7632         !WARN_ON_ONCE(!reg->id)) {
7633         if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
7634             !tnum_equals_const(reg->var_off, 0) || reg->off)) {
7635             /* Old offset (both fixed and variable parts) should
7636              * have been known-zero, because we don't allow pointer
7637              * arithmetic on pointers that might be NULL. If we
7638              * see this happening, don't convert the register.
7639              */
7640             return;
7641         }
7642         if (is_null) {
7643             reg->type = SCALAR_VALUE;
7644         } else if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
7645             const struct bpf_map *map = reg->map_ptr;
7646 
7647             if (map->inner_map_meta) {
7648                 reg->type = CONST_PTR_TO_MAP;
7649                 reg->map_ptr = map->inner_map_meta;
7650             } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7651                 reg->type = PTR_TO_XDP_SOCK;
7652             } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7653                 reg->type = PTR_TO_SOCKET;
7654             } else {
7655                 reg->type = PTR_TO_MAP_VALUE;
7656             }
7657         } else {
7658             reg->type &= ~PTR_MAYBE_NULL;
7659         }
7660 
7661         if (is_null) {
7662             /* We don't need id and ref_obj_id from this point
7663              * onwards anymore, thus we should better reset it,
7664              * so that state pruning has chances to take effect.
7665              */
7666             reg->id = 0;
7667             reg->ref_obj_id = 0;
7668         } else if (!reg_may_point_to_spin_lock(reg)) {
7669             /* For not-NULL ptr, reg->ref_obj_id will be reset
7670              * in release_reg_references().
7671              *
7672              * reg->id is still used by spin_lock ptr. Other
7673              * than spin_lock ptr type, reg->id can be reset.
7674              */
7675             reg->id = 0;
7676         }
7677     }
7678 }
7679 
__mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, bool is_null)7680 static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, bool is_null)
7681 {
7682     struct bpf_reg_state *reg;
7683     int i;
7684 
7685     for (i = 0; i < MAX_BPF_REG; i++) {
7686         mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
7687     }
7688 
7689     bpf_for_each_spilled_reg(i, state, reg)
7690     {
7691         if (!reg) {
7692             continue;
7693         }
7694         mark_ptr_or_null_reg(state, reg, id, is_null);
7695     }
7696 }
7697 
7698 /* The logic is similar to find_good_pkt_pointers(), both could eventually
7699  * be folded together at some point.
7700  */
mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null)7701 static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null)
7702 {
7703     struct bpf_func_state *state = vstate->frame[vstate->curframe];
7704     struct bpf_reg_state *regs = state->regs;
7705     u32 ref_obj_id = regs[regno].ref_obj_id;
7706     u32 id = regs[regno].id;
7707     int i;
7708 
7709     if (ref_obj_id && ref_obj_id == id && is_null) {
7710         /* regs[regno] is in the " == NULL" branch.
7711          * No one could have freed the reference state before
7712          * doing the NULL check.
7713          */
7714         WARN_ON_ONCE(release_reference_state(state, id));
7715     }
7716 
7717     for (i = 0; i <= vstate->curframe; i++) {
7718         __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
7719     }
7720 }
7721 
try_match_pkt_pointers(const struct bpf_insn *insn, struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg, struct bpf_verifier_state *this_branch, struct bpf_verifier_state *other_branch)7722 static bool try_match_pkt_pointers(const struct bpf_insn *insn, struct bpf_reg_state *dst_reg,
7723                                    struct bpf_reg_state *src_reg, struct bpf_verifier_state *this_branch,
7724                                    struct bpf_verifier_state *other_branch)
7725 {
7726     if (BPF_SRC(insn->code) != BPF_X) {
7727         return false;
7728     }
7729 
7730     /* Pointers are always 64-bit. */
7731     if (BPF_CLASS(insn->code) == BPF_JMP32) {
7732         return false;
7733     }
7734 
7735     switch (BPF_OP(insn->code)) {
7736         case BPF_JGT:
7737             if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7738                 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7739                 /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
7740                 find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, false);
7741             } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7742                        (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7743                 /* pkt_end > pkt_data', pkt_data > pkt_meta' */
7744                 find_good_pkt_pointers(other_branch, src_reg, src_reg->type, true);
7745             } else {
7746                 return false;
7747             }
7748             break;
7749         case BPF_JLT:
7750             if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7751                 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7752                 /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
7753                 find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, true);
7754             } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7755                        (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7756                 /* pkt_end < pkt_data', pkt_data > pkt_meta' */
7757                 find_good_pkt_pointers(this_branch, src_reg, src_reg->type, false);
7758             } else {
7759                 return false;
7760             }
7761             break;
7762         case BPF_JGE:
7763             if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7764                 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7765                 /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
7766                 find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, true);
7767             } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7768                        (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7769                 /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
7770                 find_good_pkt_pointers(other_branch, src_reg, src_reg->type, false);
7771             } else {
7772                 return false;
7773             }
7774             break;
7775         case BPF_JLE:
7776             if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7777                 (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7778                 /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
7779                 find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, false);
7780             } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7781                        (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7782                 /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
7783                 find_good_pkt_pointers(this_branch, src_reg, src_reg->type, true);
7784             } else {
7785                 return false;
7786             }
7787             break;
7788         default:
7789             return false;
7790     }
7791 
7792     return true;
7793 }
7794 
find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg)7795 static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg)
7796 {
7797     struct bpf_func_state *state;
7798     struct bpf_reg_state *reg;
7799     int i, j;
7800 
7801     for (i = 0; i <= vstate->curframe; i++) {
7802         state = vstate->frame[i];
7803         for (j = 0; j < MAX_BPF_REG; j++) {
7804             reg = &state->regs[j];
7805             if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) {
7806                 *reg = *known_reg;
7807             }
7808         }
7809 
7810         bpf_for_each_spilled_reg(j, state, reg)
7811         {
7812             if (!reg) {
7813                 continue;
7814             }
7815             if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) {
7816                 *reg = *known_reg;
7817             }
7818         }
7819     }
7820 }
7821 
check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)7822 static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)
7823 {
7824     struct bpf_verifier_state *this_branch = env->cur_state;
7825     struct bpf_verifier_state *other_branch;
7826     struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7827     struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7828     u8 opcode = BPF_OP(insn->code);
7829     bool is_jmp32;
7830     int pred = -1;
7831     int err;
7832 
7833     /* Only conditional jumps are expected to reach here. */
7834     if (opcode == BPF_JA || opcode > BPF_JSLE) {
7835         verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7836         return -EINVAL;
7837     }
7838 
7839     if (BPF_SRC(insn->code) == BPF_X) {
7840         if (insn->imm != 0) {
7841             verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7842             return -EINVAL;
7843         }
7844 
7845         /* check src1 operand */
7846         err = check_reg_arg(env, insn->src_reg, SRC_OP);
7847         if (err) {
7848             return err;
7849         }
7850 
7851         if (is_pointer_value(env, insn->src_reg)) {
7852             verbose(env, "R%d pointer comparison prohibited\n", insn->src_reg);
7853             return -EACCES;
7854         }
7855         src_reg = &regs[insn->src_reg];
7856     } else {
7857         if (insn->src_reg != BPF_REG_0) {
7858             verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7859             return -EINVAL;
7860         }
7861     }
7862 
7863     /* check src2 operand */
7864     err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7865     if (err) {
7866         return err;
7867     }
7868 
7869     dst_reg = &regs[insn->dst_reg];
7870     is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
7871 
7872     if (BPF_SRC(insn->code) == BPF_K) {
7873         pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7874     } else if (src_reg->type == SCALAR_VALUE && is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7875         pred = is_branch_taken(dst_reg, tnum_subreg(src_reg->var_off).value, opcode, is_jmp32);
7876     } else if (src_reg->type == SCALAR_VALUE && !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7877         pred = is_branch_taken(dst_reg, src_reg->var_off.value, opcode, is_jmp32);
7878     }
7879 
7880     if (pred >= 0) {
7881         /* If we get here with a dst_reg pointer type it is because
7882          * above is_branch_taken() special cased the 0 comparison.
7883          */
7884         if (!__is_pointer_value(false, dst_reg)) {
7885             err = mark_chain_precision(env, insn->dst_reg);
7886         }
7887         if (BPF_SRC(insn->code) == BPF_X && !err) {
7888             err = mark_chain_precision(env, insn->src_reg);
7889         }
7890         if (err) {
7891             return err;
7892         }
7893     }
7894 
7895     if (pred == 1) {
7896         /* Only follow the goto, ignore fall-through. If needed, push
7897          * the fall-through branch for simulation under speculative
7898          * execution.
7899          */
7900         if (!env->bypass_spec_v1 && !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) {
7901             return -EFAULT;
7902         }
7903         *insn_idx += insn->off;
7904         return 0;
7905     } else if (pred == 0) {
7906         /* Only follow the fall-through branch, since that's where the
7907          * program will go. If needed, push the goto branch for
7908          * simulation under speculative execution.
7909          */
7910         if (!env->bypass_spec_v1 && !sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1, *insn_idx)) {
7911             return -EFAULT;
7912         }
7913         return 0;
7914     }
7915 
7916     other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
7917     if (!other_branch) {
7918         return -EFAULT;
7919     }
7920     other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
7921 
7922     /* detect if we are comparing against a constant value so we can adjust
7923      * our min/max values for our dst register.
7924      * this is only legit if both are scalars (or pointers to the same
7925      * object, I suppose, but we don't support that right now), because
7926      * otherwise the different base pointers mean the offsets aren't
7927      * comparable.
7928      */
7929     if (BPF_SRC(insn->code) == BPF_X) {
7930         struct bpf_reg_state *src_reg_in = &regs[insn->src_reg];
7931 
7932         if (dst_reg->type == SCALAR_VALUE && src_reg_in->type == SCALAR_VALUE) {
7933             if (tnum_is_const(src_reg_in->var_off) || (is_jmp32 && tnum_is_const(tnum_subreg(src_reg_in->var_off)))) {
7934                 reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, src_reg_in->var_off.value,
7935                                 tnum_subreg(src_reg_in->var_off).value, opcode, is_jmp32);
7936             } else if (tnum_is_const(dst_reg->var_off) || (is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off)))) {
7937                 reg_set_min_max_inv(&other_branch_regs[insn->src_reg], src_reg_in, dst_reg->var_off.value,
7938                                     tnum_subreg(dst_reg->var_off).value, opcode, is_jmp32);
7939             } else if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE)) {
7940                 /* Comparing for equality, we can combine knowledge */
7941                 reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], src_reg_in,
7942                                     dst_reg, opcode);
7943             }
7944             if (src_reg_in->id && !WARN_ON_ONCE(src_reg_in->id != other_branch_regs[insn->src_reg].id)) {
7945                 find_equal_scalars(this_branch, src_reg_in);
7946                 find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
7947             }
7948         }
7949     } else if (dst_reg->type == SCALAR_VALUE) {
7950         reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, insn->imm, (u32)insn->imm, opcode, is_jmp32);
7951     }
7952 
7953     if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
7954         !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
7955         find_equal_scalars(this_branch, dst_reg);
7956         find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
7957     }
7958 
7959     /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
7960      * NOTE: these optimizations below are related with pointer comparison
7961      *       which will never be JMP32.
7962      */
7963     if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
7964         type_may_be_null(dst_reg->type)) {
7965         /* Mark all identical registers in each branch as either
7966          * safe or unknown depending R == 0 or R != 0 conditional.
7967          */
7968         mark_ptr_or_null_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
7969         mark_ptr_or_null_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
7970     } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg], this_branch, other_branch) &&
7971                is_pointer_value(env, insn->dst_reg)) {
7972         verbose(env, "R%d pointer comparison prohibited\n", insn->dst_reg);
7973         return -EACCES;
7974     }
7975     if (env->log.level & BPF_LOG_LEVEL) {
7976         print_verifier_state(env, this_branch->frame[this_branch->curframe]);
7977     }
7978     return 0;
7979 }
7980 
7981 /* verify BPF_LD_IMM64 instruction */
check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)7982 static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
7983 {
7984     struct bpf_insn_aux_data *aux = cur_aux(env);
7985     struct bpf_reg_state *regs = cur_regs(env);
7986     struct bpf_reg_state *dst_reg;
7987     struct bpf_map *map;
7988     int err;
7989 
7990     if (BPF_SIZE(insn->code) != BPF_DW) {
7991         verbose(env, "invalid BPF_LD_IMM insn\n");
7992         return -EINVAL;
7993     }
7994     if (insn->off != 0) {
7995         verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
7996         return -EINVAL;
7997     }
7998 
7999     err = check_reg_arg(env, insn->dst_reg, DST_OP);
8000     if (err) {
8001         return err;
8002     }
8003 
8004     dst_reg = &regs[insn->dst_reg];
8005     if (insn->src_reg == 0) {
8006         u64 imm = ((u64)(insn + 1)->imm << VERIFIER_THIRTYTWO) | (u32)insn->imm;
8007 
8008         dst_reg->type = SCALAR_VALUE;
8009         verifier_mark_reg_known(&regs[insn->dst_reg], imm);
8010         return 0;
8011     }
8012 
8013     /* All special src_reg cases are listed below. From this point onwards
8014      * we either succeed and assign a corresponding dst_reg->type after
8015      * zeroing the offset, or fail and reject the program.
8016      */
8017     mark_reg_known_zero(env, regs, insn->dst_reg);
8018 
8019     if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8020         dst_reg->type = aux->btf_var.reg_type;
8021         switch (base_type(dst_reg->type)) {
8022             case PTR_TO_MEM:
8023                 dst_reg->mem_size = aux->btf_var.mem_size;
8024                 break;
8025             case PTR_TO_BTF_ID:
8026             case PTR_TO_PERCPU_BTF_ID:
8027                 dst_reg->btf_id = aux->btf_var.btf_id;
8028                 break;
8029             default:
8030                 verbose(env, "bpf verifier is misconfigured\n");
8031                 return -EFAULT;
8032         }
8033         return 0;
8034     }
8035 
8036     map = env->used_maps[aux->map_index];
8037     dst_reg->map_ptr = map;
8038 
8039     if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8040         dst_reg->type = PTR_TO_MAP_VALUE;
8041         dst_reg->off = aux->map_off;
8042         if (map_value_has_spin_lock(map)) {
8043             dst_reg->id = ++env->id_gen;
8044         }
8045     } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8046         dst_reg->type = CONST_PTR_TO_MAP;
8047     } else {
8048         verbose(env, "bpf verifier is misconfigured\n");
8049         return -EINVAL;
8050     }
8051 
8052     return 0;
8053 }
8054 
may_access_skb(enum bpf_prog_type type)8055 static bool may_access_skb(enum bpf_prog_type type)
8056 {
8057     switch (type) {
8058         case BPF_PROG_TYPE_SOCKET_FILTER:
8059         case BPF_PROG_TYPE_SCHED_CLS:
8060         case BPF_PROG_TYPE_SCHED_ACT:
8061             return true;
8062         default:
8063             return false;
8064     }
8065 }
8066 
8067 /* verify safety of LD_ABS|LD_IND instructions:
8068  * - they can only appear in the programs where ctx == skb
8069  * - since they are wrappers of function calls, they scratch R1-R5 registers,
8070  *   preserve R6-R9, and store return value into R0
8071  *
8072  * Implicit input:
8073  *   ctx == skb == R6 == CTX
8074  *
8075  * Explicit input:
8076  *   SRC == any register
8077  *   IMM == 32-bit immediate
8078  *
8079  * Output:
8080  *   R0 - 8/16/32-bit skb data converted to cpu endianness
8081  */
check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)8082 static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
8083 {
8084     struct bpf_reg_state *regs = cur_regs(env);
8085     static const int ctx_reg = BPF_REG_6;
8086     u8 mode = BPF_MODE(insn->code);
8087     int i, err;
8088 
8089     if (!may_access_skb(resolve_prog_type(env->prog))) {
8090         verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
8091         return -EINVAL;
8092     }
8093 
8094     if (!env->ops->gen_ld_abs) {
8095         verbose(env, "bpf verifier is misconfigured\n");
8096         return -EINVAL;
8097     }
8098 
8099     if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW ||
8100         (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
8101         verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
8102         return -EINVAL;
8103     }
8104 
8105     /* check whether implicit source operand (register R6) is readable */
8106     err = check_reg_arg(env, ctx_reg, SRC_OP);
8107     if (err) {
8108         return err;
8109     }
8110 
8111     /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8112      * gen_ld_abs() may terminate the program at runtime, leading to
8113      * reference leak.
8114      */
8115     err = check_reference_leak(env);
8116     if (err) {
8117         verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8118         return err;
8119     }
8120 
8121     if (env->cur_state->active_spin_lock) {
8122         verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8123         return -EINVAL;
8124     }
8125 
8126     if (regs[ctx_reg].type != PTR_TO_CTX) {
8127         verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
8128         return -EINVAL;
8129     }
8130 
8131     if (mode == BPF_IND) {
8132         /* check explicit source operand */
8133         err = check_reg_arg(env, insn->src_reg, SRC_OP);
8134         if (err) {
8135             return err;
8136         }
8137     }
8138 
8139     err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
8140     if (err < 0) {
8141         return err;
8142     }
8143 
8144     /* reset caller saved regs to unreadable */
8145     for (i = 0; i < CALLER_SAVED_REGS; i++) {
8146         mark_reg_not_init(env, regs, caller_saved[i]);
8147         check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8148     }
8149 
8150     /* mark destination R0 register as readable, since it contains
8151      * the value fetched from the packet.
8152      * Already marked as written above.
8153      */
8154     mark_reg_unknown(env, regs, BPF_REG_0);
8155     /* ld_abs load up to 32-bit skb data. */
8156     regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
8157     return 0;
8158 }
8159 
check_return_code(struct bpf_verifier_env *env)8160 static int check_return_code(struct bpf_verifier_env *env)
8161 {
8162     struct tnum enforce_attach_type_range = tnum_unknown;
8163     const struct bpf_prog *prog = env->prog;
8164     struct bpf_reg_state *reg;
8165     struct tnum range = tnum_range(0, 1);
8166     enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8167     int err;
8168     const bool is_subprog = env->cur_state->frame[0]->subprogno;
8169 
8170     /* LSM and struct_ops func-ptr's return type could be "void" */
8171     if (!is_subprog && (prog_type == BPF_PROG_TYPE_STRUCT_OPS || prog_type == BPF_PROG_TYPE_LSM) &&
8172         !prog->aux->attach_func_proto->type) {
8173         return 0;
8174     }
8175 
8176     /* eBPF calling convetion is such that R0 is used
8177      * to return the value from eBPF program.
8178      * Make sure that it's readable at this time
8179      * of bpf_exit, which means that program wrote
8180      * something into it earlier
8181      */
8182     err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8183     if (err) {
8184         return err;
8185     }
8186 
8187     if (is_pointer_value(env, BPF_REG_0)) {
8188         verbose(env, "R0 leaks addr as return value\n");
8189         return -EACCES;
8190     }
8191 
8192     reg = cur_regs(env) + BPF_REG_0;
8193     if (is_subprog) {
8194         if (reg->type != SCALAR_VALUE) {
8195             verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8196                     reg_type_str(env, reg->type));
8197             return -EINVAL;
8198         }
8199         return 0;
8200     }
8201 
8202     switch (prog_type) {
8203         case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
8204             if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
8205                 env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8206                 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8207                 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8208                 env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8209                 env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) {
8210                 range = tnum_range(1, 1);
8211             }
8212             break;
8213         case BPF_PROG_TYPE_CGROUP_SKB:
8214             if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8215                 range = tnum_range(0, 3);
8216                 enforce_attach_type_range = tnum_range(2, 3);
8217             }
8218             break;
8219         case BPF_PROG_TYPE_CGROUP_SOCK:
8220         case BPF_PROG_TYPE_SOCK_OPS:
8221         case BPF_PROG_TYPE_CGROUP_DEVICE:
8222         case BPF_PROG_TYPE_CGROUP_SYSCTL:
8223         case BPF_PROG_TYPE_CGROUP_SOCKOPT:
8224             break;
8225         case BPF_PROG_TYPE_RAW_TRACEPOINT:
8226             if (!env->prog->aux->attach_btf_id) {
8227                 return 0;
8228             }
8229             range = tnum_const(0);
8230             break;
8231         case BPF_PROG_TYPE_TRACING:
8232             switch (env->prog->expected_attach_type) {
8233                 case BPF_TRACE_FENTRY:
8234                 case BPF_TRACE_FEXIT:
8235                     range = tnum_const(0);
8236                     break;
8237                 case BPF_TRACE_RAW_TP:
8238                 case BPF_MODIFY_RETURN:
8239                     return 0;
8240                 case BPF_TRACE_ITER:
8241                     break;
8242                 default:
8243                     return -ENOTSUPP;
8244             }
8245             break;
8246         case BPF_PROG_TYPE_SK_LOOKUP:
8247             range = tnum_range(SK_DROP, SK_PASS);
8248             break;
8249         case BPF_PROG_TYPE_EXT:
8250             /* freplace program can return anything as its return value
8251              * depends on the to-be-replaced kernel func or bpf program.
8252              */
8253         default:
8254             return 0;
8255     }
8256 
8257     if (reg->type != SCALAR_VALUE) {
8258         verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str(env, reg->type));
8259         return -EINVAL;
8260     }
8261 
8262     if (!tnum_in(range, reg->var_off)) {
8263         char tn_buf[48];
8264 
8265         verbose(env, "At program exit the register R0 ");
8266         if (!tnum_is_unknown(reg->var_off)) {
8267             tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8268             verbose(env, "has value %s", tn_buf);
8269         } else {
8270             verbose(env, "has unknown scalar value");
8271         }
8272         tnum_strn(tn_buf, sizeof(tn_buf), range);
8273         verbose(env, " should have been in %s\n", tn_buf);
8274         return -EINVAL;
8275     }
8276 
8277     if (!tnum_is_unknown(enforce_attach_type_range) && tnum_in(enforce_attach_type_range, reg->var_off)) {
8278         env->prog->enforce_expected_attach_type = 1;
8279     }
8280     return 0;
8281 }
8282 
8283 /* non-recursive DFS pseudo code
8284  * 1  procedure DFS-iterative(G,v):
8285  * 2      label v as discovered
8286  * 3      let S be a stack
8287  * 4      S.push(v)
8288  * 5      while S is not empty
8289  * 6            t <- S.pop()
8290  * 7            if t is what we're looking for:
8291  * 8                return t
8292  * 9            for all edges e in G.adjacentEdges(t) do
8293  * 10               if edge e is already labelled
8294  * 11                   continue with the next edge
8295  * 12               w <- G.adjacentVertex(t,e)
8296  * 13               if vertex w is not discovered and not explored
8297  * 14                   label e as tree-edge
8298  * 15                   label w as discovered
8299  * 16                   S.push(w)
8300  * 17                   continue at 5
8301  * 18               else if vertex w is discovered
8302  * 19                   label e as back-edge
8303  * 20               else
8304  * 21                   // vertex w is explored
8305  * 22                   label e as forward- or cross-edge
8306  * 23           label t as explored
8307  * 24           S.pop()
8308  *
8309  * convention:
8310  * 0x10 - discovered
8311  * 0x11 - discovered and fall-through edge labelled
8312  * 0x12 - discovered and fall-through and branch edges labelled
8313  * 0x20 - explored
8314  */
8315 
8316 enum {
8317     DISCOVERED = 0x10,
8318     EXPLORED = 0x20,
8319     FALLTHROUGH = 1,
8320     BRANCH = 2,
8321 };
8322 
state_htab_size(struct bpf_verifier_env *env)8323 static u32 state_htab_size(struct bpf_verifier_env *env)
8324 {
8325     return env->prog->len;
8326 }
8327 
explored_state(struct bpf_verifier_env *env, int idx)8328 static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
8329 {
8330     struct bpf_verifier_state *cur = env->cur_state;
8331     struct bpf_func_state *state = cur->frame[cur->curframe];
8332 
8333     return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8334 }
8335 
init_explored_state(struct bpf_verifier_env *env, int idx)8336 static void init_explored_state(struct bpf_verifier_env *env, int idx)
8337 {
8338     env->insn_aux_data[idx].prune_point = true;
8339 }
8340 
8341 /* t, w, e - match pseudo-code above:
8342  * t - index of current instruction
8343  * w - next instruction
8344  * e - edge
8345  */
push_insn(int t, int w, int e, struct bpf_verifier_env *env, bool loop_ok)8346 static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, bool loop_ok)
8347 {
8348     int *insn_stack = env->cfg.insn_stack;
8349     int *insn_state = env->cfg.insn_state;
8350 
8351     if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) {
8352         return 0;
8353     }
8354 
8355     if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) {
8356         return 0;
8357     }
8358 
8359     if (w < 0 || w >= env->prog->len) {
8360         verbose_linfo(env, t, "%d: ", t);
8361         verbose(env, "jump out of range from insn %d to %d\n", t, w);
8362         return -EINVAL;
8363     }
8364 
8365     if (e == BRANCH) {
8366         /* mark branch target for state pruning */
8367         init_explored_state(env, w);
8368     }
8369 
8370     if (insn_state[w] == 0) {
8371         /* tree-edge */
8372         insn_state[t] = DISCOVERED | e;
8373         insn_state[w] = DISCOVERED;
8374         if (env->cfg.cur_stack >= env->prog->len) {
8375             return -E2BIG;
8376         }
8377         insn_stack[env->cfg.cur_stack++] = w;
8378         return 1;
8379     } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8380         if (loop_ok && env->bpf_capable) {
8381             return 0;
8382         }
8383         verbose_linfo(env, t, "%d: ", t);
8384         verbose_linfo(env, w, "%d: ", w);
8385         verbose(env, "back-edge from insn %d to %d\n", t, w);
8386         return -EINVAL;
8387     } else if (insn_state[w] == EXPLORED) {
8388         /* forward- or cross-edge */
8389         insn_state[t] = DISCOVERED | e;
8390     } else {
8391         verbose(env, "insn state internal bug\n");
8392         return -EFAULT;
8393     }
8394     return 0;
8395 }
8396 
8397 /* non-recursive depth-first-search to detect loops in BPF program
8398  * loop == back-edge in directed graph
8399  */
check_cfg(struct bpf_verifier_env *env)8400 static int check_cfg(struct bpf_verifier_env *env)
8401 {
8402     struct bpf_insn *insns = env->prog->insnsi;
8403     int insn_cnt = env->prog->len;
8404     int *insn_stack, *insn_state;
8405     int ret = 0;
8406     int i, t;
8407 
8408     insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8409     if (!insn_state) {
8410         return -ENOMEM;
8411     }
8412 
8413     insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8414     if (!insn_stack) {
8415         kvfree(insn_state);
8416         return -ENOMEM;
8417     }
8418 
8419     insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
8420     insn_stack[0] = 0;          /* 0 is the first instruction */
8421     env->cfg.cur_stack = 1;
8422 
8423     while (1) {
8424         if (env->cfg.cur_stack == 0) {
8425             goto check_state;
8426         }
8427         t = insn_stack[env->cfg.cur_stack - 1];
8428 
8429         if (BPF_CLASS(insns[t].code) == BPF_JMP || BPF_CLASS(insns[t].code) == BPF_JMP32) {
8430             u8 opcode = BPF_OP(insns[t].code);
8431             if (opcode == BPF_EXIT) {
8432                 goto mark_explored;
8433             } else if (opcode == BPF_CALL) {
8434                 ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8435                 if (ret == 1) {
8436                     continue;
8437                 } else if (ret < 0) {
8438                     goto err_free;
8439                 }
8440                 if (t + 1 < insn_cnt) {
8441                     init_explored_state(env, t + 1);
8442                 }
8443                 if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8444                     init_explored_state(env, t);
8445                     ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, false);
8446                     if (ret == 1) {
8447                         continue;
8448                     } else if (ret < 0) {
8449                         goto err_free;
8450                     }
8451                 }
8452             } else if (opcode == BPF_JA) {
8453                 if (BPF_SRC(insns[t].code) != BPF_K) {
8454                     ret = -EINVAL;
8455                     goto err_free;
8456                 }
8457                 /* unconditional jump with single edge */
8458                 ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env, true);
8459                 if (ret == 1) {
8460                     continue;
8461                 } else if (ret < 0) {
8462                     goto err_free;
8463                 }
8464                 /* unconditional jmp is not a good pruning point,
8465                  * but it's marked, since backtracking needs
8466                  * to record jmp history in is_state_visited().
8467                  */
8468                 init_explored_state(env, t + insns[t].off + 1);
8469                 /* tell verifier to check for equivalent states
8470                  * after every call and jump
8471                  */
8472                 if (t + 1 < insn_cnt) {
8473                     init_explored_state(env, t + 1);
8474                 }
8475             } else {
8476                 /* conditional jump with two edges */
8477                 init_explored_state(env, t);
8478                 ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8479                 if (ret == 1) {
8480                     continue;
8481                 } else if (ret < 0) {
8482                     goto err_free;
8483                 }
8484 
8485                 ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8486                 if (ret == 1) {
8487                     continue;
8488                 } else if (ret < 0) {
8489                     goto err_free;
8490                 }
8491             }
8492         } else {
8493             /* all other non-branch instructions with single
8494              * fall-through edge
8495              */
8496             ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8497             if (ret == 1) {
8498                 continue;
8499             } else if (ret < 0) {
8500                 goto err_free;
8501             }
8502         }
8503 
8504     mark_explored:
8505         insn_state[t] = EXPLORED;
8506         if (env->cfg.cur_stack-- <= 0) {
8507             verbose(env, "pop stack internal bug\n");
8508             ret = -EFAULT;
8509             goto err_free;
8510         }
8511         continue;
8512     }
8513 
8514 check_state:
8515     for (i = 0; i < insn_cnt; i++) {
8516         if (insn_state[i] != EXPLORED) {
8517             verbose(env, "unreachable insn %d\n", i);
8518             ret = -EINVAL;
8519             goto err_free;
8520         }
8521     }
8522     ret = 0; /* cfg looks good */
8523 
8524 err_free:
8525     kvfree(insn_state);
8526     kvfree(insn_stack);
8527     env->cfg.insn_state = env->cfg.insn_stack = NULL;
8528     return ret;
8529 }
8530 
check_abnormal_return(struct bpf_verifier_env *env)8531 static int check_abnormal_return(struct bpf_verifier_env *env)
8532 {
8533     int i;
8534 
8535     for (i = 1; i < env->subprog_cnt; i++) {
8536         if (env->subprog_info[i].has_ld_abs) {
8537             verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8538             return -EINVAL;
8539         }
8540         if (env->subprog_info[i].has_tail_call) {
8541             verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8542             return -EINVAL;
8543         }
8544     }
8545     return 0;
8546 }
8547 
8548 /* The minimum supported BTF func info size */
8549 #define MIN_BPF_FUNCINFO_SIZE 8
8550 #define MAX_FUNCINFO_REC_SIZE 252
8551 
check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)8552 static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8553 {
8554     const struct btf_type *type, *func_proto, *ret_type;
8555     u32 i, nfuncs, urec_size, min_size;
8556     u32 krec_size = sizeof(struct bpf_func_info);
8557     struct bpf_func_info *krecord;
8558     struct bpf_func_info_aux *info_aux = NULL;
8559     struct bpf_prog *prog;
8560     const struct btf *btf;
8561     void __user *urecord;
8562     u32 prev_offset = 0;
8563     bool scalar_return;
8564     int ret = -ENOMEM;
8565 
8566     nfuncs = attr->func_info_cnt;
8567     if (!nfuncs) {
8568         if (check_abnormal_return(env)) {
8569             return -EINVAL;
8570         }
8571         return 0;
8572     }
8573 
8574     if (nfuncs != env->subprog_cnt) {
8575         verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8576         return -EINVAL;
8577     }
8578 
8579     urec_size = attr->func_info_rec_size;
8580     if (urec_size < MIN_BPF_FUNCINFO_SIZE || urec_size > MAX_FUNCINFO_REC_SIZE || urec_size % sizeof(u32)) {
8581         verbose(env, "invalid func info rec size %u\n", urec_size);
8582         return -EINVAL;
8583     }
8584 
8585     prog = env->prog;
8586     btf = prog->aux->btf;
8587 
8588     urecord = u64_to_user_ptr(attr->func_info);
8589     min_size = min_t(u32, krec_size, urec_size);
8590 
8591     krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8592     if (!krecord) {
8593         return -ENOMEM;
8594     }
8595     info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8596     if (!info_aux) {
8597         goto err_free;
8598     }
8599 
8600     for (i = 0; i < nfuncs; i++) {
8601         ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8602         if (ret) {
8603             if (ret == -E2BIG) {
8604                 verbose(env, "nonzero tailing record in func info");
8605                 /* set the size kernel expects so loader can zero
8606                  * out the rest of the record.
8607                  */
8608                 if (put_user(min_size, &uattr->func_info_rec_size)) {
8609                     ret = -EFAULT;
8610                 }
8611             }
8612             goto err_free;
8613         }
8614 
8615         if (copy_from_user(&krecord[i], urecord, min_size)) {
8616             ret = -EFAULT;
8617             goto err_free;
8618         }
8619 
8620         /* check insn_off */
8621         ret = -EINVAL;
8622         if (i == 0) {
8623             if (krecord[i].insn_off) {
8624                 verbose(env, "nonzero insn_off %u for the first func info record", krecord[i].insn_off);
8625                 goto err_free;
8626             }
8627         } else if (krecord[i].insn_off <= prev_offset) {
8628             verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", krecord[i].insn_off,
8629                     prev_offset);
8630             goto err_free;
8631         }
8632 
8633         if (env->subprog_info[i].start != krecord[i].insn_off) {
8634             verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8635             goto err_free;
8636         }
8637 
8638         /* check type_id */
8639         type = btf_type_by_id(btf, krecord[i].type_id);
8640         if (!type || !btf_type_is_func(type)) {
8641             verbose(env, "invalid type id %d in func info", krecord[i].type_id);
8642             goto err_free;
8643         }
8644         info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8645 
8646         func_proto = btf_type_by_id(btf, type->type);
8647         if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) {
8648             /* btf_func_check() already verified it during BTF load */
8649             goto err_free;
8650         }
8651         ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8652         scalar_return = btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8653         if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8654             verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8655             goto err_free;
8656         }
8657         if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8658             verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8659             goto err_free;
8660         }
8661 
8662         prev_offset = krecord[i].insn_off;
8663         urecord += urec_size;
8664     }
8665 
8666     prog->aux->func_info = krecord;
8667     prog->aux->func_info_cnt = nfuncs;
8668     prog->aux->func_info_aux = info_aux;
8669     return 0;
8670 
8671 err_free:
8672     kvfree(krecord);
8673     kfree(info_aux);
8674     return ret;
8675 }
8676 
adjust_btf_func(struct bpf_verifier_env *env)8677 static void adjust_btf_func(struct bpf_verifier_env *env)
8678 {
8679     struct bpf_prog_aux *aux = env->prog->aux;
8680     int i;
8681 
8682     if (!aux->func_info) {
8683         return;
8684     }
8685 
8686     for (i = 0; i < env->subprog_cnt; i++) {
8687         aux->func_info[i].insn_off = env->subprog_info[i].start;
8688     }
8689 }
8690 
8691 #define MIN_BPF_LINEINFO_SIZE                                                                                          \
8692     (offsetof(struct bpf_line_info, line_col) + sizeof(((struct bpf_line_info *)(0))->line_col))
8693 #define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
8694 
check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)8695 static int check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8696 {
8697     u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8698     struct bpf_subprog_info *sub;
8699     struct bpf_line_info *linfo;
8700     struct bpf_prog *prog;
8701     const struct btf *btf;
8702     void __user *ulinfo;
8703     int err;
8704 
8705     nr_linfo = attr->line_info_cnt;
8706     if (!nr_linfo) {
8707         return 0;
8708     }
8709     if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) {
8710         return -EINVAL;
8711     }
8712 
8713     rec_size = attr->line_info_rec_size;
8714     if (rec_size < MIN_BPF_LINEINFO_SIZE || rec_size > MAX_LINEINFO_REC_SIZE || rec_size & (sizeof(u32) - 1)) {
8715         return -EINVAL;
8716     }
8717 
8718     /* Need to zero it in case the userspace may
8719      * pass in a smaller bpf_line_info object.
8720      */
8721     linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), GFP_KERNEL | __GFP_NOWARN);
8722     if (!linfo) {
8723         return -ENOMEM;
8724     }
8725 
8726     prog = env->prog;
8727     btf = prog->aux->btf;
8728 
8729     s = 0;
8730     sub = env->subprog_info;
8731     ulinfo = u64_to_user_ptr(attr->line_info);
8732     expected_size = sizeof(struct bpf_line_info);
8733     ncopy = min_t(u32, expected_size, rec_size);
8734     for (i = 0; i < nr_linfo; i++) {
8735         err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8736         if (err) {
8737             if (err == -E2BIG) {
8738                 verbose(env, "nonzero tailing record in line_info");
8739                 if (put_user(expected_size, &uattr->line_info_rec_size)) {
8740                     err = -EFAULT;
8741                 }
8742             }
8743             goto err_free;
8744         }
8745 
8746         if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8747             err = -EFAULT;
8748             goto err_free;
8749         }
8750 
8751         /*
8752          * Check insn_off to ensure
8753          * 1) strictly increasing AND
8754          * 2) bounded by prog->len
8755          *
8756          * The linfo[0].insn_off == 0 check logically falls into
8757          * the later "missing bpf_line_info for func..." case
8758          * because the first linfo[0].insn_off must be the
8759          * first sub also and the first sub must have
8760          * subprog_info[0].start == 0.
8761          */
8762         if ((i && linfo[i].insn_off <= prev_offset) || linfo[i].insn_off >= prog->len) {
8763             verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", i, linfo[i].insn_off,
8764                     prev_offset, prog->len);
8765             err = -EINVAL;
8766             goto err_free;
8767         }
8768 
8769         if (!prog->insnsi[linfo[i].insn_off].code) {
8770             verbose(env, "Invalid insn code at line_info[%u].insn_off\n", i);
8771             err = -EINVAL;
8772             goto err_free;
8773         }
8774 
8775         if (!btf_name_by_offset(btf, linfo[i].line_off) || !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8776             verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8777             err = -EINVAL;
8778             goto err_free;
8779         }
8780 
8781         if (s != env->subprog_cnt) {
8782             if (linfo[i].insn_off == sub[s].start) {
8783                 sub[s].linfo_idx = i;
8784                 s++;
8785             } else if (sub[s].start < linfo[i].insn_off) {
8786                 verbose(env, "missing bpf_line_info for func#%u\n", s);
8787                 err = -EINVAL;
8788                 goto err_free;
8789             }
8790         }
8791 
8792         prev_offset = linfo[i].insn_off;
8793         ulinfo += rec_size;
8794     }
8795 
8796     if (s != env->subprog_cnt) {
8797         verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", env->subprog_cnt - s, s);
8798         err = -EINVAL;
8799         goto err_free;
8800     }
8801 
8802     prog->aux->linfo = linfo;
8803     prog->aux->nr_linfo = nr_linfo;
8804 
8805     return 0;
8806 
8807 err_free:
8808     kvfree(linfo);
8809     return err;
8810 }
8811 
check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)8812 static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8813 {
8814     struct btf *btf;
8815     int err;
8816 
8817     if (!attr->func_info_cnt && !attr->line_info_cnt) {
8818         if (check_abnormal_return(env)) {
8819             return -EINVAL;
8820         }
8821         return 0;
8822     }
8823 
8824     btf = btf_get_by_fd(attr->prog_btf_fd);
8825     if (IS_ERR(btf)) {
8826         return PTR_ERR(btf);
8827     }
8828     env->prog->aux->btf = btf;
8829 
8830     err = check_btf_func(env, attr, uattr);
8831     if (err) {
8832         return err;
8833     }
8834 
8835     err = check_btf_line(env, attr, uattr);
8836     if (err) {
8837         return err;
8838     }
8839 
8840     return 0;
8841 }
8842 
8843 /* check %cur's range satisfies %old's */
range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur)8844 static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur)
8845 {
8846     return old->umin_value <= cur->umin_value && old->umax_value >= cur->umax_value &&
8847            old->smin_value <= cur->smin_value && old->smax_value >= cur->smax_value &&
8848            old->u32_min_value <= cur->u32_min_value && old->u32_max_value >= cur->u32_max_value &&
8849            old->s32_min_value <= cur->s32_min_value && old->s32_max_value >= cur->s32_max_value;
8850 }
8851 
8852 /* If in the old state two registers had the same id, then they need to have
8853  * the same id in the new state as well.  But that id could be different from
8854  * the old state, so we need to track the mapping from old to new ids.
8855  * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
8856  * regs with old id 5 must also have new id 9 for the new state to be safe.  But
8857  * regs with a different old id could still have new id 9, we don't care about
8858  * that.
8859  * So we look through our idmap to see if this old id has been seen before.  If
8860  * so, we require the new id to match; otherwise, we add the id pair to the map.
8861  */
check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)8862 static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
8863 {
8864     unsigned int i;
8865 
8866     for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
8867         if (!idmap[i].old) {
8868             /* Reached an empty slot; haven't seen this id before */
8869             idmap[i].old = old_id;
8870             idmap[i].cur = cur_id;
8871             return true;
8872         }
8873         if (idmap[i].old == old_id) {
8874             return idmap[i].cur == cur_id;
8875         }
8876     }
8877     /* We ran out of idmap slots, which should be impossible */
8878     WARN_ON_ONCE(1);
8879     return false;
8880 }
8881 
clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st)8882 static void clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st)
8883 {
8884     enum bpf_reg_liveness live;
8885     int i, j;
8886 
8887     for (i = 0; i < BPF_REG_FP; i++) {
8888         live = st->regs[i].live;
8889         /* liveness must not touch this register anymore */
8890         st->regs[i].live |= REG_LIVE_DONE;
8891         if (!(live & REG_LIVE_READ)) {
8892             /* since the register is unused, clear its state
8893              * to make further comparison simpler
8894              */
8895             verifier_mark_reg_not_init(env, &st->regs[i]);
8896         }
8897     }
8898 
8899     for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
8900         live = st->stack[i].spilled_ptr.live;
8901         /* liveness must not touch this stack slot anymore */
8902         st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
8903         if (!(live & REG_LIVE_READ)) {
8904             verifier_mark_reg_not_init(env, &st->stack[i].spilled_ptr);
8905             for (j = 0; j < BPF_REG_SIZE; j++) {
8906                 st->stack[i].slot_type[j] = STACK_INVALID;
8907             }
8908         }
8909     }
8910 }
8911 
clean_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *st)8912 static void clean_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
8913 {
8914     int i;
8915 
8916     if (st->frame[0]->regs[0].live & REG_LIVE_DONE) {
8917         /* all regs in this state in all frames were already marked */
8918         return;
8919     }
8920 
8921     for (i = 0; i <= st->curframe; i++) {
8922         clean_func_state(env, st->frame[i]);
8923     }
8924 }
8925 
8926 /* the parentage chains form a tree.
8927  * the verifier states are added to state lists at given insn and
8928  * pushed into state stack for future exploration.
8929  * when the verifier reaches bpf_exit insn some of the verifer states
8930  * stored in the state lists have their final liveness state already,
8931  * but a lot of states will get revised from liveness point of view when
8932  * the verifier explores other branches.
8933  * 1: r0 = 1
8934  * 2: if r1 == 100 goto pc+1
8935  * 3: r0 = 2
8936  * 4: exit
8937  * when the verifier reaches exit insn the register r0 in the state list of
8938  * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
8939  * of insn 2 and goes exploring further. At the insn 4 it will walk the
8940  * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
8941  *
8942  * Since the verifier pushes the branch states as it sees them while exploring
8943  * the program the condition of walking the branch instruction for the second
8944  * time means that all states below this branch were already explored and
8945  * their final liveness markes are already propagated.
8946  * Hence when the verifier completes the search of state list in is_state_visited()
8947  * we can call this clean_live_states() function to mark all liveness states
8948  * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
8949  * will not be used.
8950  * This function also clears the registers and stack for states that !READ
8951  * to simplify state merging.
8952  *
8953  * Important note here that walking the same branch instruction in the callee
8954  * doesn't meant that the states are DONE. The verifier has to compare
8955  * the callsites
8956  */
clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur)8957 static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur)
8958 {
8959     struct bpf_verifier_state_list *sl;
8960     int i;
8961 
8962     sl = *explored_state(env, insn);
8963     while (sl) {
8964         if (sl->state.branches) {
8965             goto next;
8966         }
8967         if (sl->state.insn_idx != insn || sl->state.curframe != cur->curframe) {
8968             goto next;
8969         }
8970         for (i = 0; i <= cur->curframe; i++) {
8971             if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) {
8972                 goto next;
8973             }
8974         }
8975         clean_verifier_state(env, &sl->state);
8976     next:
8977         sl = sl->next;
8978     }
8979 }
8980 
8981 /* Returns true if (rold safe implies rcur safe) */
regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur, struct bpf_id_pair *idmap)8982 static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8983                     struct bpf_id_pair *idmap)
8984 {
8985     bool equal;
8986 
8987     if (!(rold->live & REG_LIVE_READ)) {
8988         /* explored state didn't use this */
8989         return true;
8990     }
8991 
8992     equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
8993 
8994     if (rold->type == PTR_TO_STACK) {
8995         /* two stack pointers are equal only if they're pointing to
8996          * the same stack frame, since fp-8 in foo != fp-8 in bar
8997          */
8998         return equal && rold->frameno == rcur->frameno;
8999     }
9000 
9001     if (equal) {
9002         return true;
9003     }
9004 
9005     if (rold->type == NOT_INIT) {
9006         /* explored state can't have used this */
9007         return true;
9008     }
9009     if (rcur->type == NOT_INIT) {
9010         return false;
9011     }
9012     switch (base_type(rold->type)) {
9013         case SCALAR_VALUE:
9014             if (env->explore_alu_limits) {
9015                 return false;
9016             }
9017             if (rcur->type == SCALAR_VALUE) {
9018                 if (!rold->precise && !rcur->precise) {
9019                     return true;
9020                 }
9021                 /* new val must satisfy old val knowledge */
9022                 return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
9023             } else {
9024                 /* We're trying to use a pointer in place of a scalar.
9025                  * Even if the scalar was unbounded, this could lead to
9026                  * pointer leaks because scalars are allowed to leak
9027                  * while pointers are not. We could make this safe in
9028                  * special cases if root is calling us, but it's
9029                  * probably not worth the hassle.
9030                  */
9031                 return false;
9032             }
9033         case PTR_TO_MAP_VALUE:
9034             /* a PTR_TO_MAP_VALUE could be safe to use as a
9035              * PTR_TO_MAP_VALUE_OR_NULL into the same map.
9036              * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
9037              * checked, doing so could have affected others with the same
9038              * id, and we can't check for that because we lost the id when
9039              * we converted to a PTR_TO_MAP_VALUE.
9040              */
9041             if (type_may_be_null(rold->type)) {
9042                 if (!type_may_be_null(rcur->type)) {
9043                     return false;
9044                 }
9045                 if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) {
9046                     return false;
9047                 }
9048                 /* Check our ids match any regs they're supposed to */
9049                 return check_ids(rold->id, rcur->id, idmap);
9050             }
9051 
9052             /* If the new min/max/var_off satisfy the old ones and
9053              * everything else matches, we are OK.
9054              * 'id' is not compared, since it's only used for maps with
9055              * bpf_spin_lock inside map element and in such cases if
9056              * the rest of the prog is valid for one map element then
9057              * it's valid for all map elements regardless of the key
9058              * used in bpf_map_lookup()
9059              */
9060             return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) &&
9061                    tnum_in(rold->var_off, rcur->var_off);
9062         case PTR_TO_PACKET_META:
9063         case PTR_TO_PACKET:
9064             if (rcur->type != rold->type) {
9065                 return false;
9066             }
9067             /* We must have at least as much range as the old ptr
9068              * did, so that any accesses which were safe before are
9069              * still safe.  This is true even if old range < old off,
9070              * since someone could have accessed through (ptr - k), or
9071              * even done ptr -= k in a register, to get a safe access.
9072              */
9073             if (rold->range > rcur->range) {
9074                 return false;
9075             }
9076             /* If the offsets don't match, we can't trust our alignment;
9077              * nor can we be sure that we won't fall out of range.
9078              */
9079             if (rold->off != rcur->off) {
9080                 return false;
9081             }
9082             /* id relations must be preserved */
9083             if (rold->id && !check_ids(rold->id, rcur->id, idmap)) {
9084                 return false;
9085             }
9086             /* new val must satisfy old val knowledge */
9087             return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
9088         case PTR_TO_CTX:
9089         case CONST_PTR_TO_MAP:
9090         case PTR_TO_PACKET_END:
9091         case PTR_TO_FLOW_KEYS:
9092         case PTR_TO_SOCKET:
9093         case PTR_TO_SOCK_COMMON:
9094         case PTR_TO_TCP_SOCK:
9095         case PTR_TO_XDP_SOCK:
9096             /* Only valid matches are exact, which memcmp() above
9097              * would have accepted
9098              */
9099         default:
9100             /* Don't know what's going on, just say it's not safe */
9101             return false;
9102     }
9103 
9104     /* Shouldn't get here; if we do, say it's not safe */
9105     WARN_ON_ONCE(1);
9106     return false;
9107 }
9108 
stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur, struct bpf_id_pair *idmap)9109 static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur,
9110                       struct bpf_id_pair *idmap)
9111 {
9112     int i, spi;
9113 
9114     /* walk slots of the explored stack and ignore any additional
9115      * slots in the current stack, since explored(safe) state
9116      * didn't use them
9117      */
9118     for (i = 0; i < old->allocated_stack; i++) {
9119         spi = i / BPF_REG_SIZE;
9120 
9121         if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9122             i += BPF_REG_SIZE - 1;
9123             /* explored state didn't use this */
9124             continue;
9125         }
9126 
9127         if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) {
9128             continue;
9129         }
9130 
9131         /* explored stack has more populated slots than current stack
9132          * and these slots were used
9133          */
9134         if (i >= cur->allocated_stack) {
9135             return false;
9136         }
9137 
9138         /* if old state was safe with misc data in the stack
9139          * it will be safe with zero-initialized stack.
9140          * The opposite is not true
9141          */
9142         if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
9143             cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) {
9144             continue;
9145         }
9146         if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) {
9147             /* Ex: old explored (safe) state has STACK_SPILL in
9148              * this stack slot, but current has STACK_MISC ->
9149              * this verifier states are not equivalent,
9150              * return false to continue verification of this path
9151              */
9152             return false;
9153         }
9154         if (i % BPF_REG_SIZE) {
9155             continue;
9156         }
9157         if (old->stack[spi].slot_type[0] != STACK_SPILL) {
9158             continue;
9159         }
9160         if (!regsafe(env, &old->stack[spi].spilled_ptr, &cur->stack[spi].spilled_ptr, idmap)) {
9161             /* when explored and current stack slot are both storing
9162              * spilled registers, check that stored pointers types
9163              * are the same as well.
9164              * Ex: explored safe path could have stored
9165              * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
9166              * but current path has stored:
9167              * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
9168              * such verifier states are not equivalent.
9169              * return false to continue verification of this path
9170              */
9171             return false;
9172         }
9173     }
9174     return true;
9175 }
9176 
refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)9177 static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9178 {
9179     if (old->acquired_refs != cur->acquired_refs) {
9180         return false;
9181     }
9182     return !memcmp(old->refs, cur->refs, sizeof(*old->refs) * old->acquired_refs);
9183 }
9184 
9185 /* compare two verifier states
9186  *
9187  * all states stored in state_list are known to be valid, since
9188  * verifier reached 'bpf_exit' instruction through them
9189  *
9190  * this function is called when verifier exploring different branches of
9191  * execution popped from the state stack. If it sees an old state that has
9192  * more strict register state and more strict stack state then this execution
9193  * branch doesn't need to be explored further, since verifier already
9194  * concluded that more strict state leads to valid finish.
9195  *
9196  * Therefore two states are equivalent if register state is more conservative
9197  * and explored stack state is more conservative than the current one.
9198  * Example:
9199  *       explored                   current
9200  * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
9201  * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
9202  *
9203  * In other words if current stack state (one being explored) has more
9204  * valid slots than old one that already passed validation, it means
9205  * the verifier can stop exploring and conclude that current state is valid too
9206  *
9207  * Similarly with registers. If explored state has register type as invalid
9208  * whereas register type in current state is meaningful, it means that
9209  * the current state will reach 'bpf_exit' instruction safely
9210  */
func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur)9211 static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur)
9212 {
9213     int i;
9214 
9215     memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
9216     for (i = 0; i < MAX_BPF_REG; i++) {
9217         if (!regsafe(env, &old->regs[i], &cur->regs[i], env->idmap_scratch)) {
9218             return false;
9219         }
9220     }
9221 
9222     if (!stacksafe(env, old, cur, env->idmap_scratch)) {
9223         return false;
9224     }
9225 
9226     if (!refsafe(old, cur)) {
9227         return false;
9228     }
9229 
9230     return true;
9231 }
9232 
states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, struct bpf_verifier_state *cur)9233 static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
9234 {
9235     int i;
9236 
9237     if (old->curframe != cur->curframe) {
9238         return false;
9239     }
9240 
9241     /* Verification state from speculative execution simulation
9242      * must never prune a non-speculative execution one.
9243      */
9244     if (old->speculative && !cur->speculative) {
9245         return false;
9246     }
9247 
9248     if (old->active_spin_lock != cur->active_spin_lock) {
9249         return false;
9250     }
9251 
9252     /* for states to be equal callsites have to be the same
9253      * and all frame states need to be equivalent
9254      */
9255     for (i = 0; i <= old->curframe; i++) {
9256         if (old->frame[i]->callsite != cur->frame[i]->callsite) {
9257             return false;
9258         }
9259         if (!func_states_equal(env, old->frame[i], cur->frame[i])) {
9260             return false;
9261         }
9262     }
9263     return true;
9264 }
9265 
9266 /* Return 0 if no propagation happened. Return negative error code if error
9267  * happened. Otherwise, return the propagated bit.
9268  */
propagate_liveness_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg, struct bpf_reg_state *parent_reg)9269 static int propagate_liveness_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
9270                                   struct bpf_reg_state *parent_reg)
9271 {
9272     u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9273     u8 flag = reg->live & REG_LIVE_READ;
9274     int err;
9275 
9276     /* When comes here, read flags of PARENT_REG or REG could be any of
9277      * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9278      * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9279      */
9280     if (parent_flag == REG_LIVE_READ64 ||
9281         /* Or if there is no read flag from REG. */
9282         !flag ||
9283         /* Or if the read flag from REG is the same as PARENT_REG. */
9284         parent_flag == flag) {
9285         return 0;
9286     }
9287 
9288     err = mark_reg_read(env, reg, parent_reg, flag);
9289     if (err) {
9290         return err;
9291     }
9292 
9293     return flag;
9294 }
9295 
9296 /* A write screens off any subsequent reads; but write marks come from the
9297  * straight-line code between a state and its parent.  When we arrive at an
9298  * equivalent state (jump target or such) we didn't arrive by the straight-line
9299  * code, so read marks in the state must propagate to the parent regardless
9300  * of the state's write marks. That's what 'parent == state->parent' comparison
9301  * in mark_reg_read() is for.
9302  */
propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, struct bpf_verifier_state *vparent)9303 static int propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
9304                               struct bpf_verifier_state *vparent)
9305 {
9306     struct bpf_reg_state *state_reg, *parent_reg;
9307     struct bpf_func_state *state, *parent;
9308     int i, frame, err = 0;
9309 
9310     if (vparent->curframe != vstate->curframe) {
9311         WARN(1, "propagate_live: parent frame %d current frame %d\n", vparent->curframe, vstate->curframe);
9312         return -EFAULT;
9313     }
9314     /* Propagate read liveness of registers... */
9315     BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
9316     for (frame = 0; frame <= vstate->curframe; frame++) {
9317         parent = vparent->frame[frame];
9318         state = vstate->frame[frame];
9319         parent_reg = parent->regs;
9320         state_reg = state->regs;
9321         /* We don't need to worry about FP liveness, it's read-only */
9322         for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9323             err = propagate_liveness_reg(env, &state_reg[i], &parent_reg[i]);
9324             if (err < 0) {
9325                 return err;
9326             }
9327             if (err == REG_LIVE_READ64) {
9328                 mark_insn_zext(env, &parent_reg[i]);
9329             }
9330         }
9331 
9332         /* Propagate stack slots. */
9333         for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9334             parent_reg = &parent->stack[i].spilled_ptr;
9335             state_reg = &state->stack[i].spilled_ptr;
9336             err = propagate_liveness_reg(env, state_reg, parent_reg);
9337             if (err < 0) {
9338                 return err;
9339             }
9340         }
9341     }
9342     return 0;
9343 }
9344 
9345 /* find precise scalars in the previous equivalent state and
9346  * propagate them into the current state
9347  */
propagate_precision(struct bpf_verifier_env *env, const struct bpf_verifier_state *old)9348 static int propagate_precision(struct bpf_verifier_env *env, const struct bpf_verifier_state *old)
9349 {
9350     struct bpf_reg_state *state_reg;
9351     struct bpf_func_state *state;
9352     int i, err = 0;
9353 
9354     state = old->frame[old->curframe];
9355     state_reg = state->regs;
9356     for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9357         if (state_reg->type != SCALAR_VALUE || !state_reg->precise) {
9358             continue;
9359         }
9360         if (env->log.level & BPF_LOG_LEVEL2) {
9361             verbose(env, "propagating r%d\n", i);
9362         }
9363         err = mark_chain_precision(env, i);
9364         if (err < 0) {
9365             return err;
9366         }
9367     }
9368 
9369     for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9370         if (state->stack[i].slot_type[0] != STACK_SPILL) {
9371             continue;
9372         }
9373         state_reg = &state->stack[i].spilled_ptr;
9374         if (state_reg->type != SCALAR_VALUE || !state_reg->precise) {
9375             continue;
9376         }
9377         if (env->log.level & BPF_LOG_LEVEL2) {
9378             verbose(env, "propagating fp%d\n", (-i - 1) * BPF_REG_SIZE);
9379         }
9380         err = mark_chain_precision_stack(env, i);
9381         if (err < 0) {
9382             return err;
9383         }
9384     }
9385     return 0;
9386 }
9387 
states_maybe_looping(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)9388 static bool states_maybe_looping(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
9389 {
9390     struct bpf_func_state *fold, *fcur;
9391     int i, fr = cur->curframe;
9392 
9393     if (old->curframe != fr) {
9394         return false;
9395     }
9396 
9397     fold = old->frame[fr];
9398     fcur = cur->frame[fr];
9399     for (i = 0; i < MAX_BPF_REG; i++) {
9400         if (memcmp(&fold->regs[i], &fcur->regs[i], offsetof(struct bpf_reg_state, parent))) {
9401             return false;
9402         }
9403     }
9404     return true;
9405 }
9406 
is_state_visited(struct bpf_verifier_env *env, int insn_idx)9407 static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9408 {
9409     struct bpf_verifier_state_list *new_sl;
9410     struct bpf_verifier_state_list *sl, **pprev;
9411     struct bpf_verifier_state *cur = env->cur_state, *new;
9412     int i, j, err, states_cnt = 0;
9413     bool add_new_state = env->test_state_freq ? true : false;
9414 
9415     cur->last_insn_idx = env->prev_insn_idx;
9416     if (!env->insn_aux_data[insn_idx].prune_point) {
9417         /* this 'insn_idx' instruction wasn't marked, so we will not
9418          * be doing state search here
9419          */
9420         return 0;
9421     }
9422 
9423     /* bpf progs typically have pruning point every 4 instructions
9424      * http://vger.kernel.org/bpfconf2019.html#session-1
9425      * Do not add new state for future pruning if the verifier hasn't seen
9426      * at least 2 jumps and at least 8 instructions.
9427      * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9428      * In tests that amounts to up to 50% reduction into total verifier
9429      * memory consumption and 20% verifier time speedup.
9430      */
9431     if (env->jmps_processed - env->prev_jmps_processed >= 2 && env->insn_processed - env->prev_insn_processed >= 8) {
9432         add_new_state = true;
9433     }
9434 
9435     pprev = explored_state(env, insn_idx);
9436     sl = *pprev;
9437 
9438     clean_live_states(env, insn_idx, cur);
9439 
9440     while (sl) {
9441         states_cnt++;
9442         if (sl->state.insn_idx != insn_idx) {
9443             goto next;
9444         }
9445         if (sl->state.branches) {
9446             if (states_maybe_looping(&sl->state, cur) && states_equal(env, &sl->state, cur)) {
9447                 verbose_linfo(env, insn_idx, "; ");
9448                 verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9449                 return -EINVAL;
9450             }
9451             /* if the verifier is processing a loop, avoid adding new state
9452              * too often, since different loop iterations have distinct
9453              * states and may not help future pruning.
9454              * This threshold shouldn't be too low to make sure that
9455              * a loop with large bound will be rejected quickly.
9456              * The most abusive loop will be:
9457              * r1 += 1
9458              * if r1 < 1000000 goto pc-2
9459              * 1M insn_procssed limit / 100 == 10k peak states.
9460              * This threshold shouldn't be too high either, since states
9461              * at the end of the loop are likely to be useful in pruning.
9462              */
9463             if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9464                 env->insn_processed - env->prev_insn_processed < 100) {
9465                 add_new_state = false;
9466             }
9467             goto miss;
9468         }
9469         if (states_equal(env, &sl->state, cur)) {
9470             sl->hit_cnt++;
9471             /* reached equivalent register/stack state,
9472              * prune the search.
9473              * Registers read by the continuation are read by us.
9474              * If we have any write marks in env->cur_state, they
9475              * will prevent corresponding reads in the continuation
9476              * from reaching our parent (an explored_state).  Our
9477              * own state will get the read marks recorded, but
9478              * they'll be immediately forgotten as we're pruning
9479              * this state and will pop a new one.
9480              */
9481             err = propagate_liveness(env, &sl->state, cur);
9482 
9483             /* if previous state reached the exit with precision and
9484              * current state is equivalent to it (except precsion marks)
9485              * the precision needs to be propagated back in
9486              * the current state.
9487              */
9488             err = err ?: push_jmp_history(env, cur);
9489             err = err ?: propagate_precision(env, &sl->state);
9490             if (err) {
9491                 return err;
9492             }
9493             return 1;
9494         }
9495     miss:
9496         /* when new state is not going to be added do not increase miss count.
9497          * Otherwise several loop iterations will remove the state
9498          * recorded earlier. The goal of these heuristics is to have
9499          * states from some iterations of the loop (some in the beginning
9500          * and some at the end) to help pruning.
9501          */
9502         if (add_new_state) {
9503             sl->miss_cnt++;
9504         }
9505         /* heuristic to determine whether this state is beneficial
9506          * to keep checking from state equivalence point of view.
9507          * Higher numbers increase max_states_per_insn and verification time,
9508          * but do not meaningfully decrease insn_processed.
9509          */
9510         if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9511             /* the state is unlikely to be useful. Remove it to
9512              * speed up verification
9513              */
9514             *pprev = sl->next;
9515             if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9516                 u32 br = sl->state.branches;
9517 
9518                 WARN_ONCE(br, "BUG live_done but branches_to_explore %d\n", br);
9519                 free_verifier_state(&sl->state, false);
9520                 kfree(sl);
9521                 env->peak_states--;
9522             } else {
9523                 /* cannot free this state, since parentage chain may
9524                  * walk it later. Add it for free_list instead to
9525                  * be freed at the end of verification
9526                  */
9527                 sl->next = env->free_list;
9528                 env->free_list = sl;
9529             }
9530             sl = *pprev;
9531             continue;
9532         }
9533     next:
9534         pprev = &sl->next;
9535         sl = *pprev;
9536     }
9537 
9538     if (env->max_states_per_insn < states_cnt) {
9539         env->max_states_per_insn = states_cnt;
9540     }
9541 
9542     if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) {
9543         return push_jmp_history(env, cur);
9544     }
9545 
9546     if (!add_new_state) {
9547         return push_jmp_history(env, cur);
9548     }
9549 
9550     /* There were no equivalent states, remember the current one.
9551      * Technically the current state is not proven to be safe yet,
9552      * but it will either reach outer most bpf_exit (which means it's safe)
9553      * or it will be rejected. When there are no loops the verifier won't be
9554      * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9555      * again on the way to bpf_exit.
9556      * When looping the sl->state.branches will be > 0 and this state
9557      * will not be considered for equivalence until branches == 0.
9558      */
9559     new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9560     if (!new_sl) {
9561         return -ENOMEM;
9562     }
9563     env->total_states++;
9564     env->peak_states++;
9565     env->prev_jmps_processed = env->jmps_processed;
9566     env->prev_insn_processed = env->insn_processed;
9567 
9568     /* add new state to the head of linked list */
9569     new = &new_sl->state;
9570     err = copy_verifier_state(new, cur);
9571     if (err) {
9572         free_verifier_state(new, false);
9573         kfree(new_sl);
9574         return err;
9575     }
9576     new->insn_idx = insn_idx;
9577     WARN_ONCE(new->branches != 1, "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9578 
9579     cur->parent = new;
9580     cur->first_insn_idx = insn_idx;
9581     clear_jmp_history(cur);
9582     new_sl->next = *explored_state(env, insn_idx);
9583     *explored_state(env, insn_idx) = new_sl;
9584     /* connect new state to parentage chain. Current frame needs all
9585      * registers connected. Only r6 - r9 of the callers are alive (pushed
9586      * to the stack implicitly by JITs) so in callers' frames connect just
9587      * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9588      * the state of the call instruction (with WRITTEN set), and r0 comes
9589      * from callee with its full parentage chain, anyway.
9590      */
9591     /* clear write marks in current state: the writes we did are not writes
9592      * our child did, so they don't screen off its reads from us.
9593      * (There are no read marks in current state, because reads always mark
9594      * their parent and current state never has children yet.  Only
9595      * explored_states can get read marks.)
9596      */
9597     for (j = 0; j <= cur->curframe; j++) {
9598         for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9599             cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9600         }
9601         for (i = 0; i < BPF_REG_FP; i++) {
9602             cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9603         }
9604     }
9605 
9606     /* all stack frames are accessible from callee, clear them all */
9607     for (j = 0; j <= cur->curframe; j++) {
9608         struct bpf_func_state *frame = cur->frame[j];
9609         struct bpf_func_state *newframe = new->frame[j];
9610 
9611         for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9612             frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9613             frame->stack[i].spilled_ptr.parent = &newframe->stack[i].spilled_ptr;
9614         }
9615     }
9616     return 0;
9617 }
9618 
9619 /* Return true if it's OK to have the same insn return a different type. */
reg_type_mismatch_ok(enum bpf_reg_type type)9620 static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9621 {
9622     switch (base_type(type)) {
9623         case PTR_TO_CTX:
9624         case PTR_TO_SOCKET:
9625         case PTR_TO_SOCK_COMMON:
9626         case PTR_TO_TCP_SOCK:
9627         case PTR_TO_XDP_SOCK:
9628         case PTR_TO_BTF_ID:
9629             return false;
9630         default:
9631             return true;
9632     }
9633 }
9634 
9635 /* If an instruction was previously used with particular pointer types, then we
9636  * need to be careful to avoid cases such as the below, where it may be ok
9637  * for one branch accessing the pointer, but not ok for the other branch:
9638  *
9639  * R1 = sock_ptr
9640  * goto X;
9641  * ...
9642  * R1 = some_other_valid_ptr;
9643  * goto X;
9644  * ...
9645  * R2 = *(u32 *)(R1 + 0);
9646  */
reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)9647 static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9648 {
9649     return src != prev && (!reg_type_mismatch_ok(src) || !reg_type_mismatch_ok(prev));
9650 }
9651 
do_check(struct bpf_verifier_env *env)9652 static int do_check(struct bpf_verifier_env *env)
9653 {
9654     bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9655     struct bpf_verifier_state *state = env->cur_state;
9656     struct bpf_insn *insns = env->prog->insnsi;
9657     struct bpf_reg_state *regs;
9658     int insn_cnt = env->prog->len;
9659     bool do_print_state = false;
9660     int prev_insn_idx = -1;
9661 
9662     for (;;) {
9663         struct bpf_insn *insn;
9664         u8 class;
9665         int err;
9666 
9667         env->prev_insn_idx = prev_insn_idx;
9668         if (env->insn_idx >= insn_cnt) {
9669             verbose(env, "invalid insn idx %d insn_cnt %d\n", env->insn_idx, insn_cnt);
9670             return -EFAULT;
9671         }
9672 
9673         insn = &insns[env->insn_idx];
9674         class = BPF_CLASS(insn->code);
9675 
9676         if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9677             verbose(env, "BPF program is too large. Processed %d insn\n", env->insn_processed);
9678             return -E2BIG;
9679         }
9680 
9681         err = is_state_visited(env, env->insn_idx);
9682         if (err < 0) {
9683             return err;
9684         }
9685         if (err == 1) {
9686             /* found equivalent state, can prune the search */
9687             if (env->log.level & BPF_LOG_LEVEL) {
9688                 if (do_print_state) {
9689                     verbose(env, "\nfrom %d to %d%s: safe\n", env->prev_insn_idx, env->insn_idx,
9690                             env->cur_state->speculative ? " (speculative execution)" : "");
9691                 } else {
9692                     verbose(env, "%d: safe\n", env->insn_idx);
9693                 }
9694             }
9695             goto process_bpf_exit;
9696         }
9697 
9698         if (signal_pending(current)) {
9699             return -EAGAIN;
9700         }
9701 
9702         if (need_resched()) {
9703             cond_resched();
9704         }
9705 
9706         if ((env->log.level & BPF_LOG_LEVEL2) || ((env->log.level & BPF_LOG_LEVEL) && do_print_state)) {
9707             if (env->log.level & BPF_LOG_LEVEL2) {
9708                 verbose(env, "%d:", env->insn_idx);
9709             } else {
9710                 verbose(env, "\nfrom %d to %d%s:", env->prev_insn_idx, env->insn_idx,
9711                         env->cur_state->speculative ? " (speculative execution)" : "");
9712             }
9713             print_verifier_state(env, state->frame[state->curframe]);
9714             do_print_state = false;
9715         }
9716 
9717         if (env->log.level & BPF_LOG_LEVEL) {
9718             const struct bpf_insn_cbs cbs = {
9719                 .cb_print = verbose,
9720                 .private_data = env,
9721             };
9722 
9723             verbose_linfo(env, env->insn_idx, "; ");
9724             verbose(env, "%d: ", env->insn_idx);
9725             print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9726         }
9727 
9728         if (bpf_prog_is_dev_bound(env->prog->aux)) {
9729             err = bpf_prog_offload_verify_insn(env, env->insn_idx, env->prev_insn_idx);
9730             if (err) {
9731                 return err;
9732             }
9733         }
9734 
9735         regs = cur_regs(env);
9736         sanitize_mark_insn_seen(env);
9737         prev_insn_idx = env->insn_idx;
9738 
9739         if (class == BPF_ALU || class == BPF_ALU64) {
9740             err = check_alu_op(env, insn);
9741             if (err) {
9742                 return err;
9743             }
9744         } else if (class == BPF_LDX) {
9745             enum bpf_reg_type *prev_src_type, src_reg_type;
9746 
9747             /* check for reserved fields is already done */
9748 
9749             /* check src operand */
9750             err = check_reg_arg(env, insn->src_reg, SRC_OP);
9751             if (err) {
9752                 return err;
9753             }
9754 
9755             err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9756             if (err) {
9757                 return err;
9758             }
9759 
9760             src_reg_type = regs[insn->src_reg].type;
9761 
9762             /* check that memory (src_reg + off) is readable,
9763              * the state of dst_reg will be updated by this func
9764              */
9765             err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ,
9766                                    insn->dst_reg, false);
9767             if (err) {
9768                 return err;
9769             }
9770 
9771             prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9772 
9773             if (*prev_src_type == NOT_INIT) {
9774                 /* saw a valid insn
9775                  * dst_reg = *(u32 *)(src_reg + off)
9776                  * save type to validate intersecting paths
9777                  */
9778                 *prev_src_type = src_reg_type;
9779             } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9780                 /* ABuser program is trying to use the same insn
9781                  * dst_reg = *(u32*) (src_reg + off)
9782                  * with different pointer types:
9783                  * src_reg == ctx in one branch and
9784                  * src_reg == stack|map in some other branch.
9785                  * Reject it.
9786                  */
9787                 verbose(env, "same insn cannot be used with different pointers\n");
9788                 return -EINVAL;
9789             }
9790         } else if (class == BPF_STX) {
9791             enum bpf_reg_type *prev_dst_type, dst_reg_type;
9792             if (BPF_MODE(insn->code) == BPF_XADD) {
9793                 err = check_xadd(env, env->insn_idx, insn);
9794                 if (err) {
9795                     return err;
9796                 }
9797                 env->insn_idx++;
9798                 continue;
9799             }
9800 
9801             /* check src1 operand */
9802             err = check_reg_arg(env, insn->src_reg, SRC_OP);
9803             if (err) {
9804                 return err;
9805             }
9806             /* check src2 operand */
9807             err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9808             if (err) {
9809                 return err;
9810             }
9811 
9812             dst_reg_type = regs[insn->dst_reg].type;
9813 
9814             /* check that memory (dst_reg + off) is writeable */
9815             err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE,
9816                                    insn->src_reg, false);
9817             if (err) {
9818                 return err;
9819             }
9820 
9821             prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9822 
9823             if (*prev_dst_type == NOT_INIT) {
9824                 *prev_dst_type = dst_reg_type;
9825             } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9826                 verbose(env, "same insn cannot be used with different pointers\n");
9827                 return -EINVAL;
9828             }
9829         } else if (class == BPF_ST) {
9830             if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
9831                 verbose(env, "BPF_ST uses reserved fields\n");
9832                 return -EINVAL;
9833             }
9834             /* check src operand */
9835             err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9836             if (err) {
9837                 return err;
9838             }
9839             if (is_ctx_reg(env, insn->dst_reg)) {
9840                 verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg,
9841                         reg_type_str(env, reg_state(env, insn->dst_reg)->type));
9842                 return -EACCES;
9843             }
9844 
9845             /* check that memory (dst_reg + off) is writeable */
9846             err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1,
9847                                    false);
9848             if (err) {
9849                 return err;
9850             }
9851         } else if (class == BPF_JMP || class == BPF_JMP32) {
9852             u8 opcode = BPF_OP(insn->code);
9853             env->jmps_processed++;
9854             if (opcode == BPF_CALL) {
9855                 if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 ||
9856                     (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || insn->dst_reg != BPF_REG_0 ||
9857                     class == BPF_JMP32) {
9858                     verbose(env, "BPF_CALL uses reserved fields\n");
9859                     return -EINVAL;
9860                 }
9861 
9862                 if (env->cur_state->active_spin_lock &&
9863                     (insn->src_reg == BPF_PSEUDO_CALL || insn->imm != BPF_FUNC_spin_unlock)) {
9864                     verbose(env, "function calls are not allowed while holding a lock\n");
9865                     return -EINVAL;
9866                 }
9867                 if (insn->src_reg == BPF_PSEUDO_CALL) {
9868                     err = check_func_call(env, insn, &env->insn_idx);
9869                 } else {
9870                     err = check_helper_call(env, insn->imm, env->insn_idx);
9871                 }
9872                 if (err) {
9873                     return err;
9874                 }
9875             } else if (opcode == BPF_JA) {
9876                 if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 ||
9877                     insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
9878                     verbose(env, "BPF_JA uses reserved fields\n");
9879                     return -EINVAL;
9880                 }
9881                 env->insn_idx += insn->off + 1;
9882                 continue;
9883             } else if (opcode == BPF_EXIT) {
9884                 if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 ||
9885                     insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
9886                     verbose(env, "BPF_EXIT uses reserved fields\n");
9887                     return -EINVAL;
9888                 }
9889                 if (env->cur_state->active_spin_lock) {
9890                     verbose(env, "bpf_spin_unlock is missing\n");
9891                     return -EINVAL;
9892                 }
9893                 if (state->curframe) {
9894                     /* exit from nested function */
9895                     err = prepare_func_exit(env, &env->insn_idx);
9896                     if (err) {
9897                         return err;
9898                     }
9899                     do_print_state = true;
9900                     continue;
9901                 }
9902 
9903                 err = check_reference_leak(env);
9904                 if (err) {
9905                     return err;
9906                 }
9907 
9908                 err = check_return_code(env);
9909                 if (err) {
9910                     return err;
9911                 }
9912             process_bpf_exit:
9913                 update_branch_counts(env, env->cur_state);
9914                 err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log);
9915                 if (err < 0) {
9916                     if (err != -ENOENT) {
9917                         return err;
9918                     }
9919                     break;
9920                 } else {
9921                     do_print_state = true;
9922                     continue;
9923                 }
9924             } else {
9925                 err = check_cond_jmp_op(env, insn, &env->insn_idx);
9926                 if (err) {
9927                     return err;
9928                 }
9929             }
9930         } else if (class == BPF_LD) {
9931             u8 mode = BPF_MODE(insn->code);
9932             if (mode == BPF_ABS || mode == BPF_IND) {
9933                 err = check_ld_abs(env, insn);
9934                 if (err) {
9935                     return err;
9936                 }
9937             } else if (mode == BPF_IMM) {
9938                 err = check_ld_imm(env, insn);
9939                 if (err) {
9940                     return err;
9941                 }
9942                 env->insn_idx++;
9943                 sanitize_mark_insn_seen(env);
9944             } else {
9945                 verbose(env, "invalid BPF_LD mode\n");
9946                 return -EINVAL;
9947             }
9948         } else {
9949             verbose(env, "unknown insn class %d\n", class);
9950             return -EINVAL;
9951         }
9952         env->insn_idx++;
9953     }
9954 
9955     return 0;
9956 }
9957 
9958 /* replace pseudo btf_id with kernel symbol address */
check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux)9959 static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux)
9960 {
9961     const struct btf_var_secinfo *vsi;
9962     const struct btf_type *datasec;
9963     const struct btf_type *t;
9964     const char *sym_name;
9965     bool percpu = false;
9966     u32 type, id = insn->imm;
9967     s32 datasec_id;
9968     u64 addr;
9969     int i;
9970 
9971     if (!btf_vmlinux) {
9972         verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
9973         return -EINVAL;
9974     }
9975 
9976     if (insn[1].imm != 0) {
9977         verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
9978         return -EINVAL;
9979     }
9980 
9981     t = btf_type_by_id(btf_vmlinux, id);
9982     if (!t) {
9983         verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
9984         return -ENOENT;
9985     }
9986 
9987     if (!btf_type_is_var(t)) {
9988         verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
9989         return -EINVAL;
9990     }
9991 
9992     sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
9993     addr = kallsyms_lookup_name(sym_name);
9994     if (!addr) {
9995         verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", sym_name);
9996         return -ENOENT;
9997     }
9998 
9999     datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", BTF_KIND_DATASEC);
10000     if (datasec_id > 0) {
10001         datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10002         for_each_vsi(i, datasec, vsi)
10003         {
10004             if (vsi->type == id) {
10005                 percpu = true;
10006                 break;
10007             }
10008         }
10009     }
10010 
10011     insn[0].imm = (u32)addr;
10012     insn[1].imm = addr >> VERIFIER_THIRTYTWO;
10013 
10014     type = t->type;
10015     t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10016     if (percpu) {
10017         aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10018         aux->btf_var.btf_id = type;
10019     } else if (!btf_type_is_struct(t)) {
10020         const struct btf_type *ret;
10021         const char *tname;
10022         u32 tsize;
10023 
10024         /* resolve the type size of ksym. */
10025         ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10026         if (IS_ERR(ret)) {
10027             tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10028             verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret));
10029             return -EINVAL;
10030         }
10031         aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
10032         aux->btf_var.mem_size = tsize;
10033     } else {
10034         aux->btf_var.reg_type = PTR_TO_BTF_ID;
10035         aux->btf_var.btf_id = type;
10036     }
10037     return 0;
10038 }
10039 
check_map_prealloc(struct bpf_map *map)10040 static int check_map_prealloc(struct bpf_map *map)
10041 {
10042     return (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10043             map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
10044            !(map->map_flags & BPF_F_NO_PREALLOC);
10045 }
10046 
is_tracing_prog_type(enum bpf_prog_type type)10047 static bool is_tracing_prog_type(enum bpf_prog_type type)
10048 {
10049     switch (type) {
10050         case BPF_PROG_TYPE_KPROBE:
10051         case BPF_PROG_TYPE_TRACEPOINT:
10052         case BPF_PROG_TYPE_PERF_EVENT:
10053         case BPF_PROG_TYPE_RAW_TRACEPOINT:
10054             return true;
10055         default:
10056             return false;
10057     }
10058 }
10059 
is_preallocated_map(struct bpf_map *map)10060 static bool is_preallocated_map(struct bpf_map *map)
10061 {
10062     if (!check_map_prealloc(map)) {
10063         return false;
10064     }
10065     if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) {
10066         return false;
10067     }
10068     return true;
10069 }
10070 
check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog)10071 static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog)
10072 
10073 {
10074     enum bpf_prog_type prog_type = resolve_prog_type(prog);
10075     /*
10076      * Validate that trace type programs use preallocated hash maps.
10077      *
10078      * For programs attached to PERF events this is mandatory as the
10079      * perf NMI can hit any arbitrary code sequence.
10080      *
10081      * All other trace types using preallocated hash maps are unsafe as
10082      * well because tracepoint or kprobes can be inside locked regions
10083      * of the memory allocator or at a place where a recursion into the
10084      * memory allocator would see inconsistent state.
10085      *
10086      * On RT enabled kernels run-time allocation of all trace type
10087      * programs is strictly prohibited due to lock type constraints. On
10088      * !RT kernels it is allowed for backwards compatibility reasons for
10089      * now, but warnings are emitted so developers are made aware of
10090      * the unsafety and can fix their programs before this is enforced.
10091      */
10092     if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10093         if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
10094             verbose(env, "perf_event programs can only use preallocated hash map\n");
10095             return -EINVAL;
10096         }
10097         if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10098             verbose(env, "trace type programs can only use preallocated hash map\n");
10099             return -EINVAL;
10100         }
10101         WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10102         verbose(
10103             env,
10104             "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10105     }
10106 
10107     if ((is_tracing_prog_type(prog_type) || prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && map_value_has_spin_lock(map)) {
10108         verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10109         return -EINVAL;
10110     }
10111 
10112     if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) {
10113         verbose(env, "offload device mismatch between prog and map\n");
10114         return -EINVAL;
10115     }
10116 
10117     if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10118         verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10119         return -EINVAL;
10120     }
10121 
10122     if (prog->aux->sleepable) {
10123         switch (map->map_type) {
10124             case BPF_MAP_TYPE_HASH:
10125             case BPF_MAP_TYPE_LRU_HASH:
10126             case BPF_MAP_TYPE_ARRAY:
10127                 if (!is_preallocated_map(map)) {
10128                     verbose(env, "Sleepable programs can only use preallocated hash maps\n");
10129                     return -EINVAL;
10130                 }
10131                 break;
10132             default:
10133                 verbose(env, "Sleepable programs can only use array and hash maps\n");
10134                 return -EINVAL;
10135         }
10136     }
10137 
10138     return 0;
10139 }
10140 
bpf_map_is_cgroup_storage(struct bpf_map *map)10141 static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10142 {
10143     return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10144 }
10145 
10146 /* find and rewrite pseudo imm in ld_imm64 instructions:
10147  *
10148  * 1. if it accesses map FD, replace it with actual map pointer.
10149  * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10150  *
10151  * NOTE: btf_vmlinux is required for converting pseudo btf_id.
10152  */
resolve_pseudo_ldimm64(struct bpf_verifier_env *env)10153 static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
10154 {
10155     struct bpf_insn *insn = env->prog->insnsi;
10156     int insn_cnt = env->prog->len;
10157     int i, j, err;
10158 
10159     err = bpf_prog_calc_tag(env->prog);
10160     if (err) {
10161         return err;
10162     }
10163 
10164     for (i = 0; i < insn_cnt; i++, insn++) {
10165         if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
10166             verbose(env, "BPF_LDX uses reserved fields\n");
10167             return -EINVAL;
10168         }
10169 
10170         if (BPF_CLASS(insn->code) == BPF_STX &&
10171             ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
10172             verbose(env, "BPF_STX uses reserved fields\n");
10173             return -EINVAL;
10174         }
10175 
10176         if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10177             struct bpf_insn_aux_data *aux;
10178             struct bpf_map *map;
10179             struct fd f;
10180             u64 addr;
10181 
10182             if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
10183                 insn[1].off != 0) {
10184                 verbose(env, "invalid bpf_ld_imm64 insn\n");
10185                 return -EINVAL;
10186             }
10187 
10188             if (insn[0].src_reg == 0) {
10189                 /* valid generic load 64-bit imm */
10190                 goto next_insn;
10191             }
10192 
10193             if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10194                 aux = &env->insn_aux_data[i];
10195                 err = check_pseudo_btf_id(env, insn, aux);
10196                 if (err) {
10197                     return err;
10198                 }
10199                 goto next_insn;
10200             }
10201 
10202             /* In final convert_pseudo_ld_imm64() step, this is
10203              * converted into regular 64-bit imm load insn.
10204              */
10205             if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10206                 (insn[0].src_reg == BPF_PSEUDO_MAP_FD && insn[1].imm != 0)) {
10207                 verbose(env, "unrecognized bpf_ld_imm64 insn\n");
10208                 return -EINVAL;
10209             }
10210 
10211             f = fdget(insn[0].imm);
10212             map = __bpf_map_get(f);
10213             if (IS_ERR(map)) {
10214                 verbose(env, "fd %d is not pointing to valid bpf_map\n", insn[0].imm);
10215                 return PTR_ERR(map);
10216             }
10217 
10218             err = check_map_prog_compatibility(env, map, env->prog);
10219             if (err) {
10220                 fdput(f);
10221                 return err;
10222             }
10223 
10224             aux = &env->insn_aux_data[i];
10225             if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10226                 addr = (unsigned long)map;
10227             } else {
10228                 u32 off = insn[1].imm;
10229 
10230                 if (off >= BPF_MAX_VAR_OFF) {
10231                     verbose(env, "direct value offset of %u is not allowed\n", off);
10232                     fdput(f);
10233                     return -EINVAL;
10234                 }
10235 
10236                 if (!map->ops->map_direct_value_addr) {
10237                     verbose(env, "no direct value access support for this map type\n");
10238                     fdput(f);
10239                     return -EINVAL;
10240                 }
10241 
10242                 err = map->ops->map_direct_value_addr(map, &addr, off);
10243                 if (err) {
10244                     verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", map->value_size, off);
10245                     fdput(f);
10246                     return err;
10247                 }
10248 
10249                 aux->map_off = off;
10250                 addr += off;
10251             }
10252 
10253             insn[0].imm = (u32)addr;
10254             insn[1].imm = addr >> VERIFIER_THIRTYTWO;
10255 
10256             /* check whether we recorded this map already */
10257             for (j = 0; j < env->used_map_cnt; j++) {
10258                 if (env->used_maps[j] == map) {
10259                     aux->map_index = j;
10260                     fdput(f);
10261                     goto next_insn;
10262                 }
10263             }
10264 
10265             if (env->used_map_cnt >= MAX_USED_MAPS) {
10266                 fdput(f);
10267                 return -E2BIG;
10268             }
10269 
10270             /* hold the map. If the program is rejected by verifier,
10271              * the map will be released by release_maps() or it
10272              * will be used by the valid program until it's unloaded
10273              * and all maps are released in free_used_maps()
10274              */
10275             bpf_map_inc(map);
10276 
10277             aux->map_index = env->used_map_cnt;
10278             env->used_maps[env->used_map_cnt++] = map;
10279 
10280             if (bpf_map_is_cgroup_storage(map) && bpf_cgroup_storage_assign(env->prog->aux, map)) {
10281                 verbose(env, "only one cgroup storage of each type is allowed\n");
10282                 fdput(f);
10283                 return -EBUSY;
10284             }
10285 
10286             fdput(f);
10287         next_insn:
10288             insn++;
10289             i++;
10290             continue;
10291         }
10292 
10293         /* Basic sanity check before we invest more work here. */
10294         if (!bpf_opcode_in_insntable(insn->code)) {
10295             verbose(env, "unknown opcode %02x\n", insn->code);
10296             return -EINVAL;
10297         }
10298     }
10299 
10300     /* now all pseudo BPF_LD_IMM64 instructions load valid
10301      * 'struct bpf_map *' into a register instead of user map_fd.
10302      * These pointers will be used later by verifier to validate map access.
10303      */
10304     return 0;
10305 }
10306 
10307 /* drop refcnt of maps used by the rejected program */
release_maps(struct bpf_verifier_env *env)10308 static void release_maps(struct bpf_verifier_env *env)
10309 {
10310     __bpf_free_used_maps(env->prog->aux, env->used_maps, env->used_map_cnt);
10311 }
10312 
10313 /* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
convert_pseudo_ld_imm64(struct bpf_verifier_env *env)10314 static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
10315 {
10316     struct bpf_insn *insn = env->prog->insnsi;
10317     int insn_cnt = env->prog->len;
10318     int i;
10319 
10320     for (i = 0; i < insn_cnt; i++, insn++) {
10321         if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) {
10322             insn->src_reg = 0;
10323         }
10324     }
10325 }
10326 
10327 /* single env->prog->insni[off] instruction was replaced with the range
10328  * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
10329  * [0, off) and [off, end) to new locations, so the patched range stays zero
10330  */
adjust_insn_aux_data(struct bpf_verifier_env *env, struct bpf_insn_aux_data *new_data, struct bpf_prog *new_prog, u32 off, u32 cnt)10331 static void adjust_insn_aux_data(struct bpf_verifier_env *env, struct bpf_insn_aux_data *new_data,
10332                                  struct bpf_prog *new_prog, u32 off, u32 cnt)
10333 {
10334     struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10335     struct bpf_insn *insn = new_prog->insnsi;
10336     u32 old_seen = old_data[off].seen;
10337     u32 prog_len;
10338     int i;
10339 
10340     /* aux info at OFF always needs adjustment, no matter fast path
10341      * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10342      * original insn at old prog.
10343      */
10344     old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10345 
10346     if (cnt == 1) {
10347         return;
10348     }
10349     prog_len = new_prog->len;
10350 
10351     memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
10352     memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
10353     for (i = off; i < off + cnt - 1; i++) {
10354         /* Expand insni[off]'s seen count to the patched range. */
10355         new_data[i].seen = old_seen;
10356         new_data[i].zext_dst = insn_has_def32(env, insn + i);
10357     }
10358     env->insn_aux_data = new_data;
10359     vfree(old_data);
10360 }
10361 
adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)10362 static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
10363 {
10364     int i;
10365 
10366     if (len == 1) {
10367         return;
10368     }
10369     /* NOTE: fake 'exit' subprog should be updated as well. */
10370     for (i = 0; i <= env->subprog_cnt; i++) {
10371         if (env->subprog_info[i].start <= off) {
10372             continue;
10373         }
10374         env->subprog_info[i].start += len - 1;
10375     }
10376 }
10377 
adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)10378 static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10379 {
10380     struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10381     int i, sz = prog->aux->size_poke_tab;
10382     struct bpf_jit_poke_descriptor *desc;
10383 
10384     for (i = 0; i < sz; i++) {
10385         desc = &tab[i];
10386         if (desc->insn_idx <= off) {
10387             continue;
10388         }
10389         desc->insn_idx += len - 1;
10390     }
10391 }
10392 
bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch, u32 len)10393 static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch,
10394                                             u32 len)
10395 {
10396     struct bpf_prog *new_prog;
10397     struct bpf_insn_aux_data *new_data = NULL;
10398 
10399     if (len > 1) {
10400         new_data = vzalloc(array_size(env->prog->len + len - 1, sizeof(struct bpf_insn_aux_data)));
10401         if (!new_data) {
10402             return NULL;
10403         }
10404     }
10405 
10406     new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10407     if (IS_ERR(new_prog)) {
10408         if (PTR_ERR(new_prog) == -ERANGE) {
10409             verbose(env, "insn %d cannot be patched due to 16-bit range\n", env->insn_aux_data[off].orig_idx);
10410         }
10411         vfree(new_data);
10412         return NULL;
10413     }
10414     adjust_insn_aux_data(env, new_data, new_prog, off, len);
10415     adjust_subprog_starts(env, off, len);
10416     adjust_poke_descs(new_prog, off, len);
10417     return new_prog;
10418 }
10419 
adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)10420 static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)
10421 {
10422     int i, j;
10423 
10424     /* find first prog starting at or after off (first to remove) */
10425     for (i = 0; i < env->subprog_cnt; i++) {
10426         if (env->subprog_info[i].start >= off) {
10427             break;
10428         }
10429     }
10430     /* find first prog starting at or after off + cnt (first to stay) */
10431     for (j = i; j < env->subprog_cnt; j++) {
10432         if (env->subprog_info[j].start >= off + cnt) {
10433             break;
10434         }
10435     }
10436     /* if j doesn't start exactly at off + cnt, we are just removing
10437      * the front of previous prog
10438      */
10439     if (env->subprog_info[j].start != off + cnt) {
10440         j--;
10441     }
10442 
10443     if (j > i) {
10444         struct bpf_prog_aux *aux = env->prog->aux;
10445         int move;
10446 
10447         /* move fake 'exit' subprog as well */
10448         move = env->subprog_cnt + 1 - j;
10449 
10450         memmove(env->subprog_info + i, env->subprog_info + j, sizeof(*env->subprog_info) * move);
10451         env->subprog_cnt -= j - i;
10452 
10453         /* remove func_info */
10454         if (aux->func_info) {
10455             move = aux->func_info_cnt - j;
10456 
10457             memmove(aux->func_info + i, aux->func_info + j, sizeof(*aux->func_info) * move);
10458             aux->func_info_cnt -= j - i;
10459             /* func_info->insn_off is set after all code rewrites,
10460              * in adjust_btf_func() - no need to adjust
10461              */
10462         }
10463     } else {
10464         /* convert i from "first prog to remove" to "first to adjust" */
10465         if (env->subprog_info[i].start == off) {
10466             i++;
10467         }
10468     }
10469 
10470     /* update fake 'exit' subprog as well */
10471     for (; i <= env->subprog_cnt; i++) {
10472         env->subprog_info[i].start -= cnt;
10473     }
10474 
10475     return 0;
10476 }
10477 
bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)10478 static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)
10479 {
10480     struct bpf_prog *prog = env->prog;
10481     u32 i, l_off, l_cnt, nr_linfo;
10482     struct bpf_line_info *linfo;
10483 
10484     nr_linfo = prog->aux->nr_linfo;
10485     if (!nr_linfo) {
10486         return 0;
10487     }
10488 
10489     linfo = prog->aux->linfo;
10490 
10491     /* find first line info to remove, count lines to be removed */
10492     for (i = 0; i < nr_linfo; i++) {
10493         if (linfo[i].insn_off >= off) {
10494             break;
10495         }
10496     }
10497 
10498     l_off = i;
10499     l_cnt = 0;
10500     for (; i < nr_linfo; i++) {
10501         if (linfo[i].insn_off < off + cnt) {
10502             l_cnt++;
10503         } else {
10504             break;
10505         }
10506     }
10507 
10508     /* First live insn doesn't match first live linfo, it needs to "inherit"
10509      * last removed linfo.  prog is already modified, so prog->len == off
10510      * means no live instructions after (tail of the program was removed).
10511      */
10512     if (prog->len != off && l_cnt && (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10513         l_cnt--;
10514         linfo[--i].insn_off = off + cnt;
10515     }
10516 
10517     /* remove the line info which refer to the removed instructions */
10518     if (l_cnt) {
10519         memmove(linfo + l_off, linfo + i, sizeof(*linfo) * (nr_linfo - i));
10520 
10521         prog->aux->nr_linfo -= l_cnt;
10522         nr_linfo = prog->aux->nr_linfo;
10523     }
10524 
10525     /* pull all linfo[i].insn_off >= off + cnt in by cnt */
10526     for (i = l_off; i < nr_linfo; i++) {
10527         linfo[i].insn_off -= cnt;
10528     }
10529 
10530     /* fix up all subprogs (incl. 'exit') which start >= off */
10531     for (i = 0; i <= env->subprog_cnt; i++) {
10532         if (env->subprog_info[i].linfo_idx > l_off) {
10533             /* program may have started in the removed region but
10534              * may not be fully removed
10535              */
10536             if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) {
10537                 env->subprog_info[i].linfo_idx -= l_cnt;
10538             } else {
10539                 env->subprog_info[i].linfo_idx = l_off;
10540             }
10541         }
10542     }
10543 
10544     return 0;
10545 }
10546 
verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)10547 static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10548 {
10549     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10550     unsigned int orig_prog_len = env->prog->len;
10551     int err;
10552 
10553     if (bpf_prog_is_dev_bound(env->prog->aux)) {
10554         bpf_prog_offload_remove_insns(env, off, cnt);
10555     }
10556 
10557     err = bpf_remove_insns(env->prog, off, cnt);
10558     if (err) {
10559         return err;
10560     }
10561 
10562     err = adjust_subprog_starts_after_remove(env, off, cnt);
10563     if (err) {
10564         return err;
10565     }
10566 
10567     err = bpf_adj_linfo_after_remove(env, off, cnt);
10568     if (err) {
10569         return err;
10570     }
10571 
10572     memmove(aux_data + off, aux_data + off + cnt, sizeof(*aux_data) * (orig_prog_len - off - cnt));
10573 
10574     return 0;
10575 }
10576 
10577 /* The verifier does more data flow analysis than llvm and will not
10578  * explore branches that are dead at run time. Malicious programs can
10579  * have dead code too. Therefore replace all dead at-run-time code
10580  * with 'ja -1'.
10581  *
10582  * Just nops are not optimal, e.g. if they would sit at the end of the
10583  * program and through another bug we would manage to jump there, then
10584  * we'd execute beyond program memory otherwise. Returning exception
10585  * code also wouldn't work since we can have subprogs where the dead
10586  * code could be located.
10587  */
sanitize_dead_code(struct bpf_verifier_env *env)10588 static void sanitize_dead_code(struct bpf_verifier_env *env)
10589 {
10590     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10591     struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
10592     struct bpf_insn *insn = env->prog->insnsi;
10593     const int insn_cnt = env->prog->len;
10594     int i;
10595 
10596     for (i = 0; i < insn_cnt; i++) {
10597         if (aux_data[i].seen) {
10598             continue;
10599         }
10600         memcpy(insn + i, &trap, sizeof(trap));
10601         aux_data[i].zext_dst = false;
10602     }
10603 }
10604 
insn_is_cond_jump(u8 code)10605 static bool insn_is_cond_jump(u8 code)
10606 {
10607     u8 op;
10608 
10609     if (BPF_CLASS(code) == BPF_JMP32) {
10610         return true;
10611     }
10612 
10613     if (BPF_CLASS(code) != BPF_JMP) {
10614         return false;
10615     }
10616 
10617     op = BPF_OP(code);
10618     return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10619 }
10620 
opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)10621 static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10622 {
10623     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10624     struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10625     struct bpf_insn *insn = env->prog->insnsi;
10626     const int insn_cnt = env->prog->len;
10627     int i;
10628 
10629     for (i = 0; i < insn_cnt; i++, insn++) {
10630         if (!insn_is_cond_jump(insn->code)) {
10631             continue;
10632         }
10633 
10634         if (!aux_data[i + 1].seen) {
10635             ja.off = insn->off;
10636         } else if (!aux_data[i + 1 + insn->off].seen) {
10637             ja.off = 0;
10638         } else {
10639             continue;
10640         }
10641 
10642         if (bpf_prog_is_dev_bound(env->prog->aux)) {
10643             bpf_prog_offload_replace_insn(env, i, &ja);
10644         }
10645 
10646         memcpy(insn, &ja, sizeof(ja));
10647     }
10648 }
10649 
opt_remove_dead_code(struct bpf_verifier_env *env)10650 static int opt_remove_dead_code(struct bpf_verifier_env *env)
10651 {
10652     struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10653     int insn_cnt = env->prog->len;
10654     int i, err;
10655 
10656     for (i = 0; i < insn_cnt; i++) {
10657         int j;
10658 
10659         j = 0;
10660         while (i + j < insn_cnt && !aux_data[i + j].seen) {
10661             j++;
10662         }
10663         if (!j) {
10664             continue;
10665         }
10666 
10667         err = verifier_remove_insns(env, i, j);
10668         if (err) {
10669             return err;
10670         }
10671         insn_cnt = env->prog->len;
10672     }
10673 
10674     return 0;
10675 }
10676 
opt_remove_nops(struct bpf_verifier_env *env)10677 static int opt_remove_nops(struct bpf_verifier_env *env)
10678 {
10679     const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10680     struct bpf_insn *insn = env->prog->insnsi;
10681     int insn_cnt = env->prog->len;
10682     int i, err;
10683 
10684     for (i = 0; i < insn_cnt; i++) {
10685         if (memcmp(&insn[i], &ja, sizeof(ja))) {
10686             continue;
10687         }
10688 
10689         err = verifier_remove_insns(env, i, 1);
10690         if (err) {
10691             return err;
10692         }
10693         insn_cnt--;
10694         i--;
10695     }
10696 
10697     return 0;
10698 }
10699 
opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr)10700 static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr)
10701 {
10702     struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10703     struct bpf_insn_aux_data *aux = env->insn_aux_data;
10704     int i, patch_len, delta = 0, len = env->prog->len;
10705     struct bpf_insn *insns = env->prog->insnsi;
10706     struct bpf_prog *new_prog;
10707     bool rnd_hi32;
10708 
10709     rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10710     zext_patch[1] = BPF_ZEXT_REG(0);
10711     rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10712     rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10713     rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10714     for (i = 0; i < len; i++) {
10715         int adj_idx = i + delta;
10716         struct bpf_insn insn;
10717 
10718         insn = insns[adj_idx];
10719         if (!aux[adj_idx].zext_dst) {
10720             u8 code, class;
10721             u32 imm_rnd;
10722 
10723             if (!rnd_hi32) {
10724                 continue;
10725             }
10726 
10727             code = insn.code;
10728             class = BPF_CLASS(code);
10729             if (insn_no_def(&insn)) {
10730                 continue;
10731             }
10732 
10733             /* NOTE: arg "reg" (the fourth one) is only used for
10734              *       BPF_STX which has been ruled out in above
10735              *       check, it is safe to pass NULL here.
10736              */
10737             if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10738                 if (class == BPF_LD && BPF_MODE(code) == BPF_IMM) {
10739                     i++;
10740                 }
10741                 continue;
10742             }
10743 
10744             /* ctx load could be transformed into wider load. */
10745             if (class == BPF_LDX && aux[adj_idx].ptr_type == PTR_TO_CTX) {
10746                 continue;
10747             }
10748 
10749             imm_rnd = get_random_int();
10750             rnd_hi32_patch[0] = insn;
10751             rnd_hi32_patch[1].imm = imm_rnd;
10752             rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10753             patch = rnd_hi32_patch;
10754             patch_len = VERIFIER_FOUR;
10755             goto apply_patch_buffer;
10756         }
10757 
10758         if (!bpf_jit_needs_zext()) {
10759             continue;
10760         }
10761 
10762         zext_patch[0] = insn;
10763         zext_patch[1].dst_reg = insn.dst_reg;
10764         zext_patch[1].src_reg = insn.dst_reg;
10765         patch = zext_patch;
10766         patch_len = 2;
10767     apply_patch_buffer:
10768         new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10769         if (!new_prog) {
10770             return -ENOMEM;
10771         }
10772         env->prog = new_prog;
10773         insns = new_prog->insnsi;
10774         aux = env->insn_aux_data;
10775         delta += patch_len - 1;
10776     }
10777 
10778     return 0;
10779 }
10780 
10781 /* convert load instructions that access fields of a context type into a
10782  * sequence of instructions that access fields of the underlying structure:
10783  *     struct __sk_buff    -> struct sk_buff
10784  *     struct bpf_sock_ops -> struct sock
10785  */
convert_ctx_accesses(struct bpf_verifier_env *env)10786 static int convert_ctx_accesses(struct bpf_verifier_env *env)
10787 {
10788     const struct bpf_verifier_ops *ops = env->ops;
10789     int i, cnt, size, ctx_field_size, delta = 0;
10790     const int insn_cnt = env->prog->len;
10791     struct bpf_insn insn_buf[VERIFIER_SIXTEEN], *insn;
10792     u32 target_size, size_default, off;
10793     struct bpf_prog *new_prog;
10794     enum bpf_access_type type;
10795     bool is_narrower_load;
10796 
10797     if (ops->gen_prologue || env->seen_direct_write) {
10798         if (!ops->gen_prologue) {
10799             verbose(env, "bpf verifier is misconfigured\n");
10800             return -EINVAL;
10801         }
10802         cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog);
10803         if (cnt >= ARRAY_SIZE(insn_buf)) {
10804             verbose(env, "bpf verifier is misconfigured\n");
10805             return -EINVAL;
10806         } else if (cnt) {
10807             new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10808             if (!new_prog) {
10809                 return -ENOMEM;
10810             }
10811 
10812             env->prog = new_prog;
10813             delta += cnt - 1;
10814         }
10815     }
10816 
10817     if (bpf_prog_is_dev_bound(env->prog->aux)) {
10818         return 0;
10819     }
10820 
10821     insn = env->prog->insnsi + delta;
10822 
10823     for (i = 0; i < insn_cnt; i++, insn++) {
10824         bpf_convert_ctx_access_t convert_ctx_access;
10825         bool ctx_access;
10826 
10827         if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
10828             insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
10829             type = BPF_READ;
10830             ctx_access = true;
10831         } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
10832                    insn->code == (BPF_STX | BPF_MEM | BPF_W) || insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
10833                    insn->code == (BPF_ST | BPF_MEM | BPF_B) || insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
10834                    insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
10835             type = BPF_WRITE;
10836             ctx_access = BPF_CLASS(insn->code) == BPF_STX;
10837         } else {
10838             continue;
10839         }
10840 
10841         if (type == BPF_WRITE && env->insn_aux_data[i + delta].sanitize_stack_spill) {
10842             struct bpf_insn patch[] = {
10843                 *insn,
10844                 BPF_ST_NOSPEC(),
10845             };
10846 
10847             cnt = ARRAY_SIZE(patch);
10848             new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
10849             if (!new_prog) {
10850                 return -ENOMEM;
10851             }
10852 
10853             delta += cnt - 1;
10854             env->prog = new_prog;
10855             insn = new_prog->insnsi + i + delta;
10856             continue;
10857         }
10858 
10859         if (!ctx_access) {
10860             continue;
10861         }
10862 
10863         switch (env->insn_aux_data[i + delta].ptr_type) {
10864             case PTR_TO_CTX:
10865                 if (!ops->convert_ctx_access) {
10866                     continue;
10867                 }
10868                 convert_ctx_access = ops->convert_ctx_access;
10869                 break;
10870             case PTR_TO_SOCKET:
10871             case PTR_TO_SOCK_COMMON:
10872                 convert_ctx_access = bpf_sock_convert_ctx_access;
10873                 break;
10874             case PTR_TO_TCP_SOCK:
10875                 convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10876                 break;
10877             case PTR_TO_XDP_SOCK:
10878                 convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
10879                 break;
10880             case PTR_TO_BTF_ID:
10881                 if (type == BPF_READ) {
10882                     insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
10883                     env->prog->aux->num_exentries++;
10884                 } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
10885                     verbose(env, "Writes through BTF pointers are not allowed\n");
10886                     return -EINVAL;
10887                 }
10888                 continue;
10889             default:
10890                 continue;
10891         }
10892 
10893         ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
10894         size = BPF_LDST_BYTES(insn);
10895 
10896         /* If the read access is a narrower load of the field,
10897          * convert to a 4/8-byte load, to minimum program type specific
10898          * convert_ctx_access changes. If conversion is successful,
10899          * we will apply proper mask to the result.
10900          */
10901         is_narrower_load = size < ctx_field_size;
10902         size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
10903         off = insn->off;
10904         if (is_narrower_load) {
10905             u8 size_code;
10906 
10907             if (type == BPF_WRITE) {
10908                 verbose(env, "bpf verifier narrow ctx access misconfigured\n");
10909                 return -EINVAL;
10910             }
10911 
10912             size_code = BPF_H;
10913             if (ctx_field_size == VERIFIER_FOUR) {
10914                 size_code = BPF_W;
10915             } else if (ctx_field_size == VERIFIER_EIGHT) {
10916                 size_code = BPF_DW;
10917             }
10918 
10919             insn->off = off & ~(size_default - 1);
10920             insn->code = BPF_LDX | BPF_MEM | size_code;
10921         }
10922 
10923         target_size = 0;
10924         cnt = convert_ctx_access(type, insn, insn_buf, env->prog, &target_size);
10925         if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || (ctx_field_size && !target_size)) {
10926             verbose(env, "bpf verifier is misconfigured\n");
10927             return -EINVAL;
10928         }
10929 
10930         if (is_narrower_load && size < target_size) {
10931             u8 shift = bpf_ctx_narrow_access_offset(off, size, size_default) * VERIFIER_EIGHT;
10932             if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
10933                 verbose(env, "bpf verifier narrow ctx load misconfigured\n");
10934                 return -EINVAL;
10935             }
10936             if (ctx_field_size <= VERIFIER_FOUR) {
10937                 if (shift) {
10938                     insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, insn->dst_reg, shift);
10939                 }
10940                 insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, ((1 << size) * VERIFIER_EIGHT) - 1);
10941             } else {
10942                 if (shift) {
10943                     insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, insn->dst_reg, shift);
10944                 }
10945                 insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, ((1ULL << size) * VERIFIER_EIGHT) - 1);
10946             }
10947         }
10948 
10949         new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10950         if (!new_prog) {
10951             return -ENOMEM;
10952         }
10953 
10954         delta += cnt - 1;
10955 
10956         /* keep walking new program and skip insns we just inserted */
10957         env->prog = new_prog;
10958         insn = new_prog->insnsi + i + delta;
10959     }
10960 
10961     return 0;
10962 }
10963 
jit_subprogs(struct bpf_verifier_env *env)10964 static int jit_subprogs(struct bpf_verifier_env *env)
10965 {
10966     struct bpf_prog *prog = env->prog, **func, *tmp;
10967     int i, j, subprog_start, subprog_end = 0, len, subprog;
10968     struct bpf_map *map_ptr;
10969     struct bpf_insn *insn;
10970     void *old_bpf_func;
10971     int err, num_exentries;
10972 
10973     if (env->subprog_cnt <= 1) {
10974         return 0;
10975     }
10976 
10977     for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
10978         if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
10979             continue;
10980         }
10981         /* Upon error here we cannot fall back to interpreter but
10982          * need a hard reject of the program. Thus -EFAULT is
10983          * propagated in any case.
10984          */
10985         subprog = find_subprog(env, i + insn->imm + 1);
10986         if (subprog < 0) {
10987             WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", i + insn->imm + 1);
10988             return -EFAULT;
10989         }
10990         /* temporarily remember subprog id inside insn instead of
10991          * aux_data, since next loop will split up all insns into funcs
10992          */
10993         insn->off = subprog;
10994         /* remember original imm in case JIT fails and fallback
10995          * to interpreter will be needed
10996          */
10997         env->insn_aux_data[i].call_imm = insn->imm;
10998         /* point imm to __bpf_call_base+1 from JITs point of view */
10999         insn->imm = 1;
11000     }
11001 
11002     err = bpf_prog_alloc_jited_linfo(prog);
11003     if (err) {
11004         goto out_undo_insn;
11005     }
11006 
11007     err = -ENOMEM;
11008     func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
11009     if (!func) {
11010         goto out_undo_insn;
11011     }
11012 
11013     for (i = 0; i < env->subprog_cnt; i++) {
11014         subprog_start = subprog_end;
11015         subprog_end = env->subprog_info[i + 1].start;
11016 
11017         len = subprog_end - subprog_start;
11018         /* BPF_PROG_RUN doesn't call subprogs directly,
11019          * hence main prog stats include the runtime of subprogs.
11020          * subprogs don't have IDs and not reachable via prog_get_next_id
11021          * func[i]->aux->stats will never be accessed and stays NULL
11022          */
11023         func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
11024         if (!func[i]) {
11025             goto out_free;
11026         }
11027         memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], len * sizeof(struct bpf_insn));
11028         func[i]->type = prog->type;
11029         func[i]->len = len;
11030         if (bpf_prog_calc_tag(func[i])) {
11031             goto out_free;
11032         }
11033         func[i]->is_func = 1;
11034         func[i]->aux->func_idx = i;
11035         /* Below members will be freed only at prog->aux */
11036         func[i]->aux->btf = prog->aux->btf;
11037         func[i]->aux->func_info = prog->aux->func_info;
11038         func[i]->aux->poke_tab = prog->aux->poke_tab;
11039         func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
11040 
11041         for (j = 0; j < prog->aux->size_poke_tab; j++) {
11042             struct bpf_jit_poke_descriptor *poke;
11043 
11044             poke = &prog->aux->poke_tab[j];
11045             if (poke->insn_idx < subprog_end && poke->insn_idx >= subprog_start) {
11046                 poke->aux = func[i]->aux;
11047             }
11048         }
11049 
11050         /* Use bpf_prog_F_tag to indicate functions in stack traces.
11051          * Long term would need debug info to populate names
11052          */
11053         func[i]->aux->name[0] = 'F';
11054         func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
11055         func[i]->jit_requested = 1;
11056         func[i]->aux->linfo = prog->aux->linfo;
11057         func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11058         func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11059         func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11060         num_exentries = 0;
11061         insn = func[i]->insnsi;
11062         for (j = 0; j < func[i]->len; j++, insn++) {
11063             if (BPF_CLASS(insn->code) == BPF_LDX && BPF_MODE(insn->code) == BPF_PROBE_MEM) {
11064                 num_exentries++;
11065             }
11066         }
11067         func[i]->aux->num_exentries = num_exentries;
11068         func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
11069         func[i] = bpf_int_jit_compile(func[i]);
11070         if (!func[i]->jited) {
11071             err = -ENOTSUPP;
11072             goto out_free;
11073         }
11074         cond_resched();
11075     }
11076 
11077     /* at this point all bpf functions were successfully JITed
11078      * now populate all bpf_calls with correct addresses and
11079      * run last pass of JIT
11080      */
11081     for (i = 0; i < env->subprog_cnt; i++) {
11082         insn = func[i]->insnsi;
11083         for (j = 0; j < func[i]->len; j++, insn++) {
11084             if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11085                 continue;
11086             }
11087             subprog = insn->off;
11088             insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - __bpf_call_base;
11089         }
11090 
11091         /* we use the aux data to keep a list of the start addresses
11092          * of the JITed images for each function in the program
11093          *
11094          * for some architectures, such as powerpc64, the imm field
11095          * might not be large enough to hold the offset of the start
11096          * address of the callee's JITed image from __bpf_call_base
11097          *
11098          * in such cases, we can lookup the start address of a callee
11099          * by using its subprog id, available from the off field of
11100          * the call instruction, as an index for this list
11101          */
11102         func[i]->aux->func = func;
11103         func[i]->aux->func_cnt = env->subprog_cnt;
11104     }
11105     for (i = 0; i < env->subprog_cnt; i++) {
11106         old_bpf_func = func[i]->bpf_func;
11107         tmp = bpf_int_jit_compile(func[i]);
11108         if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
11109             verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
11110             err = -ENOTSUPP;
11111             goto out_free;
11112         }
11113         cond_resched();
11114     }
11115 
11116     /* finally lock prog and jit images for all functions and
11117      * populate kallsysm
11118      */
11119     for (i = 0; i < env->subprog_cnt; i++) {
11120         bpf_prog_lock_ro(func[i]);
11121         bpf_prog_kallsyms_add(func[i]);
11122     }
11123 
11124     /* Last step: make now unused interpreter insns from main
11125      * prog consistent for later dump requests, so they can
11126      * later look the same as if they were interpreted only.
11127      */
11128     for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11129         if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11130             continue;
11131         }
11132         insn->off = env->insn_aux_data[i].call_imm;
11133         subprog = find_subprog(env, i + insn->off + 1);
11134         insn->imm = subprog;
11135     }
11136 
11137     prog->jited = 1;
11138     prog->bpf_func = func[0]->bpf_func;
11139     prog->aux->func = func;
11140     prog->aux->func_cnt = env->subprog_cnt;
11141     bpf_prog_free_unused_jited_linfo(prog);
11142     return 0;
11143 out_free:
11144     /* We failed JIT'ing, so at this point we need to unregister poke
11145      * descriptors from subprogs, so that kernel is not attempting to
11146      * patch it anymore as we're freeing the subprog JIT memory.
11147      */
11148     for (i = 0; i < prog->aux->size_poke_tab; i++) {
11149         map_ptr = prog->aux->poke_tab[i].tail_call.map;
11150         map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11151     }
11152     /* At this point we're guaranteed that poke descriptors are not
11153      * live anymore. We can just unlink its descriptor table as it's
11154      * released with the main prog.
11155      */
11156     for (i = 0; i < env->subprog_cnt; i++) {
11157         if (!func[i]) {
11158             continue;
11159         }
11160         func[i]->aux->poke_tab = NULL;
11161         bpf_jit_free(func[i]);
11162     }
11163     kfree(func);
11164 out_undo_insn:
11165     /* cleanup main prog to be interpreted */
11166     prog->jit_requested = 0;
11167     for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11168         if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11169             continue;
11170         }
11171         insn->off = 0;
11172         insn->imm = env->insn_aux_data[i].call_imm;
11173     }
11174     bpf_prog_free_jited_linfo(prog);
11175     return err;
11176 }
11177 
fixup_call_args(struct bpf_verifier_env *env)11178 static int fixup_call_args(struct bpf_verifier_env *env)
11179 {
11180 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11181     struct bpf_prog *prog = env->prog;
11182     struct bpf_insn *insn = prog->insnsi;
11183     int i, depth;
11184 #endif
11185     int err = 0;
11186 
11187     if (env->prog->jit_requested && !bpf_prog_is_dev_bound(env->prog->aux)) {
11188         err = jit_subprogs(env);
11189         if (err == 0) {
11190             return 0;
11191         }
11192         if (err == -EFAULT) {
11193             return err;
11194         }
11195     }
11196 #ifndef CONFIG_BPF_JIT_ALWAYS_ON
11197     if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11198         /* When JIT fails the progs with bpf2bpf calls and tail_calls
11199          * have to be rejected, since interpreter doesn't support them yet.
11200          */
11201         verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11202         return -EINVAL;
11203     }
11204     for (i = 0; i < prog->len; i++, insn++) {
11205         if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11206             continue;
11207         }
11208         depth = get_callee_stack_depth(env, insn, i);
11209         if (depth < 0) {
11210             return depth;
11211         }
11212         bpf_patch_call_args(insn, depth);
11213     }
11214     err = 0;
11215 #endif
11216     return err;
11217 }
11218 
11219 /* fixup insn->imm field of bpf_call instructions
11220  * and inline eligible helpers as explicit sequence of BPF instructions
11221  *
11222  * this function is called after eBPF program passed verification
11223  */
fixup_bpf_calls(struct bpf_verifier_env *env)11224 static int fixup_bpf_calls(struct bpf_verifier_env *env)
11225 {
11226     struct bpf_prog *prog = env->prog;
11227     bool expect_blinding = bpf_jit_blinding_enabled(prog);
11228     struct bpf_insn *insn = prog->insnsi;
11229     const struct bpf_func_proto *fn;
11230     const int insn_cnt = prog->len;
11231     const struct bpf_map_ops *ops;
11232     struct bpf_insn_aux_data *aux;
11233     struct bpf_insn insn_buf[VERIFIER_SIXTEEN];
11234     struct bpf_prog *new_prog;
11235     struct bpf_map *map_ptr;
11236     int i, ret, cnt, delta = 0;
11237 
11238     for (i = 0; i < insn_cnt; i++, insn++) {
11239         if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
11240             insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
11241             bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
11242             bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11243             struct bpf_insn *patchlet;
11244             struct bpf_insn chk_and_div[] = {
11245                 /* [R,W]x div 0 -> 0 */
11246                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JNE | BPF_K, insn->src_reg, 0, 2, 0),
11247                 BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
11248                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11249                 *insn,
11250             };
11251             struct bpf_insn chk_and_mod[] = {
11252                 /* [R,W]x mod 0 -> [R,W]x */
11253                 BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JEQ | BPF_K, insn->src_reg, 0, 1 + (is64 ? 0 : 1), 0),
11254                 *insn,
11255                 BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11256                 BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
11257             };
11258 
11259             patchlet = isdiv ? chk_and_div : chk_and_mod;
11260             cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod) - (is64 ? 0x2 : 0);
11261 
11262             new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
11263             if (!new_prog) {
11264                 return -ENOMEM;
11265             }
11266 
11267             delta += cnt - 1;
11268             env->prog = prog = new_prog;
11269             insn = new_prog->insnsi + i + delta;
11270             continue;
11271         }
11272 
11273         if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) {
11274             cnt = env->ops->gen_ld_abs(insn, insn_buf);
11275             if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11276                 verbose(env, "bpf verifier is misconfigured\n");
11277                 return -EINVAL;
11278             }
11279 
11280             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11281             if (!new_prog) {
11282                 return -ENOMEM;
11283             }
11284 
11285             delta += cnt - 1;
11286             env->prog = prog = new_prog;
11287             insn = new_prog->insnsi + i + delta;
11288             continue;
11289         }
11290 
11291         if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
11292             const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
11293             const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
11294             struct bpf_insn insn_buf_in[VERIFIER_SIXTEEN];
11295             struct bpf_insn *patch = &insn_buf_in[0];
11296             bool issrc, isneg, isimm;
11297             u32 off_reg;
11298 
11299             aux = &env->insn_aux_data[i + delta];
11300             if (!aux->alu_state || aux->alu_state == BPF_ALU_NON_POINTER) {
11301                 continue;
11302             }
11303 
11304             isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
11305             issrc = (aux->alu_state & BPF_ALU_SANITIZE) == BPF_ALU_SANITIZE_SRC;
11306             isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
11307 
11308             off_reg = issrc ? insn->src_reg : insn->dst_reg;
11309             if (isimm) {
11310                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11311             } else {
11312                 if (isneg) {
11313                     *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11314                 }
11315                 *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11316                 *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
11317                 *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
11318                 *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
11319                 *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, VERIFIER_SIXTYTHREE);
11320                 *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
11321             }
11322             if (!issrc) {
11323                 *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
11324             }
11325             insn->src_reg = BPF_REG_AX;
11326             if (isneg) {
11327                 insn->code = insn->code == code_add ? code_sub : code_add;
11328             }
11329             *patch++ = *insn;
11330             if (issrc && isneg && !isimm) {
11331                 *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11332             }
11333             cnt = patch - insn_buf_in;
11334 
11335             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf_in, cnt);
11336             if (!new_prog) {
11337                 return -ENOMEM;
11338             }
11339 
11340             delta += cnt - 1;
11341             env->prog = prog = new_prog;
11342             insn = new_prog->insnsi + i + delta;
11343             continue;
11344         }
11345 
11346         if (insn->code != (BPF_JMP | BPF_CALL)) {
11347             continue;
11348         }
11349         if (insn->src_reg == BPF_PSEUDO_CALL) {
11350             continue;
11351         }
11352 
11353         if (insn->imm == BPF_FUNC_get_route_realm) {
11354             prog->dst_needed = 1;
11355         }
11356         if (insn->imm == BPF_FUNC_get_prandom_u32) {
11357             bpf_user_rnd_init_once();
11358         }
11359         if (insn->imm == BPF_FUNC_override_return) {
11360             prog->kprobe_override = 1;
11361         }
11362         if (insn->imm == BPF_FUNC_tail_call) {
11363             /* If we tail call into other programs, we
11364              * cannot make any assumptions since they can
11365              * be replaced dynamically during runtime in
11366              * the program array.
11367              */
11368             prog->cb_access = 1;
11369             if (!allow_tail_call_in_subprogs(env)) {
11370                 prog->aux->stack_depth = MAX_BPF_STACK;
11371             }
11372             prog->aux->max_pkt_offset = MAX_PACKET_OFF;
11373 
11374             /* mark bpf_tail_call as different opcode to avoid
11375              * conditional branch in the interpeter for every normal
11376              * call and to prevent accidental JITing by JIT compiler
11377              * that doesn't support bpf_tail_call yet
11378              */
11379             insn->imm = 0;
11380             insn->code = BPF_JMP | BPF_TAIL_CALL;
11381 
11382             aux = &env->insn_aux_data[i + delta];
11383             if (env->bpf_capable && !expect_blinding && prog->jit_requested && !bpf_map_key_poisoned(aux) &&
11384                 !bpf_map_ptr_poisoned(aux) && !bpf_map_ptr_unpriv(aux)) {
11385                 struct bpf_jit_poke_descriptor desc = {
11386                     .reason = BPF_POKE_REASON_TAIL_CALL,
11387                     .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11388                     .tail_call.key = bpf_map_key_immediate(aux),
11389                     .insn_idx = i + delta,
11390                 };
11391 
11392                 ret = bpf_jit_add_poke_descriptor(prog, &desc);
11393                 if (ret < 0) {
11394                     verbose(env, "adding tail call poke descriptor failed\n");
11395                     return ret;
11396                 }
11397 
11398                 insn->imm = ret + 1;
11399                 continue;
11400             }
11401 
11402             if (!bpf_map_ptr_unpriv(aux)) {
11403                 continue;
11404             }
11405 
11406             /* instead of changing every JIT dealing with tail_call
11407              * emit two extra insns:
11408              * if (index >= max_entries) goto out;
11409              * index &= array->index_mask;
11410              * to avoid out-of-bounds cpu speculation
11411              */
11412             if (bpf_map_ptr_poisoned(aux)) {
11413                 verbose(env, "tail_call abusing map_ptr\n");
11414                 return -EINVAL;
11415             }
11416 
11417             map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11418             insn_buf[0x0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, map_ptr->max_entries, 0x2);
11419             insn_buf[0x1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, container_of(map_ptr, struct bpf_array, map)->index_mask);
11420             insn_buf[0x2] = *insn;
11421             cnt = VERIFIER_THREE;
11422             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11423             if (!new_prog) {
11424                 return -ENOMEM;
11425             }
11426 
11427             delta += cnt - 1;
11428             env->prog = prog = new_prog;
11429             insn = new_prog->insnsi + i + delta;
11430             continue;
11431         }
11432 
11433         /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11434          * and other inlining handlers are currently limited to 64 bit
11435          * only.
11436          */
11437         if (prog->jit_requested && BITS_PER_LONG == VERIFIER_SIXTYFOUR &&
11438             (insn->imm == BPF_FUNC_map_lookup_elem || insn->imm == BPF_FUNC_map_update_elem ||
11439              insn->imm == BPF_FUNC_map_delete_elem || insn->imm == BPF_FUNC_map_push_elem ||
11440              insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem)) {
11441             aux = &env->insn_aux_data[i + delta];
11442             if (bpf_map_ptr_poisoned(aux)) {
11443                 goto patch_call_imm;
11444             }
11445 
11446             map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11447             ops = map_ptr->ops;
11448             if (insn->imm == BPF_FUNC_map_lookup_elem && ops->map_gen_lookup) {
11449                 cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11450                 if (cnt == -EOPNOTSUPP) {
11451                     goto patch_map_ops_generic;
11452                 }
11453                 if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11454                     verbose(env, "bpf verifier is misconfigured\n");
11455                     return -EINVAL;
11456                 }
11457 
11458                 new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11459                 if (!new_prog) {
11460                     return -ENOMEM;
11461                 }
11462 
11463                 delta += cnt - 1;
11464                 env->prog = prog = new_prog;
11465                 insn = new_prog->insnsi + i + delta;
11466                 continue;
11467             }
11468 
11469             BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, (void *(*)(struct bpf_map * map, void *key)) NULL));
11470             BUILD_BUG_ON(!__same_type(ops->map_delete_elem, (int (*)(struct bpf_map * map, void *key)) NULL));
11471             BUILD_BUG_ON(!__same_type(ops->map_update_elem,
11472                                       (int (*)(struct bpf_map * map, void *key, void *value, u64 flags)) NULL));
11473             BUILD_BUG_ON(
11474                 !__same_type(ops->map_push_elem, (int (*)(struct bpf_map * map, void *value, u64 flags)) NULL));
11475             BUILD_BUG_ON(!__same_type(ops->map_pop_elem, (int (*)(struct bpf_map * map, void *value)) NULL));
11476             BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map * map, void *value)) NULL));
11477         patch_map_ops_generic:
11478             switch (insn->imm) {
11479                 case BPF_FUNC_map_lookup_elem:
11480                     insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - __bpf_call_base;
11481                     continue;
11482                 case BPF_FUNC_map_update_elem:
11483                     insn->imm = BPF_CAST_CALL(ops->map_update_elem) - __bpf_call_base;
11484                     continue;
11485                 case BPF_FUNC_map_delete_elem:
11486                     insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - __bpf_call_base;
11487                     continue;
11488                 case BPF_FUNC_map_push_elem:
11489                     insn->imm = BPF_CAST_CALL(ops->map_push_elem) - __bpf_call_base;
11490                     continue;
11491                 case BPF_FUNC_map_pop_elem:
11492                     insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - __bpf_call_base;
11493                     continue;
11494                 case BPF_FUNC_map_peek_elem:
11495                     insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - __bpf_call_base;
11496                     continue;
11497                 default:
11498                     break;
11499             }
11500 
11501             goto patch_call_imm;
11502         }
11503 
11504         if (prog->jit_requested && BITS_PER_LONG == VERIFIER_SIXTYFOUR && insn->imm == BPF_FUNC_jiffies64) {
11505             struct bpf_insn ld_jiffies_addr[2] = {
11506                 BPF_LD_IMM64(BPF_REG_0, (unsigned long)&jiffies),
11507             };
11508 
11509             insn_buf[0x0] = ld_jiffies_addr[0];
11510             insn_buf[0x1] = ld_jiffies_addr[1];
11511             insn_buf[0x2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
11512             cnt = VERIFIER_THREE;
11513 
11514             new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11515             if (!new_prog) {
11516                 return -ENOMEM;
11517             }
11518 
11519             delta += cnt - 1;
11520             env->prog = prog = new_prog;
11521             insn = new_prog->insnsi + i + delta;
11522             continue;
11523         }
11524 
11525     patch_call_imm:
11526         fn = env->ops->get_func_proto(insn->imm, env->prog);
11527         /* all functions that have prototype and verifier allowed
11528          * programs to call them, must be real in-kernel functions
11529          */
11530         if (!fn->func) {
11531             verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(insn->imm), insn->imm);
11532             return -EFAULT;
11533         }
11534         insn->imm = fn->func - __bpf_call_base;
11535     }
11536 
11537     /* Since poke tab is now finalized, publish aux to tracker. */
11538     for (i = 0; i < prog->aux->size_poke_tab; i++) {
11539         map_ptr = prog->aux->poke_tab[i].tail_call.map;
11540         if (!map_ptr->ops->map_poke_track || !map_ptr->ops->map_poke_untrack || !map_ptr->ops->map_poke_run) {
11541             verbose(env, "bpf verifier is misconfigured\n");
11542             return -EINVAL;
11543         }
11544 
11545         ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11546         if (ret < 0) {
11547             verbose(env, "tracking tail call prog failed\n");
11548             return ret;
11549         }
11550     }
11551 
11552     return 0;
11553 }
11554 
free_states(struct bpf_verifier_env *env)11555 static void free_states(struct bpf_verifier_env *env)
11556 {
11557     struct bpf_verifier_state_list *sl, *sln;
11558     int i;
11559 
11560     sl = env->free_list;
11561     while (sl) {
11562         sln = sl->next;
11563         free_verifier_state(&sl->state, false);
11564         kfree(sl);
11565         sl = sln;
11566     }
11567     env->free_list = NULL;
11568 
11569     if (!env->explored_states) {
11570         return;
11571     }
11572 
11573     for (i = 0; i < state_htab_size(env); i++) {
11574         sl = env->explored_states[i];
11575 
11576         while (sl) {
11577             sln = sl->next;
11578             free_verifier_state(&sl->state, false);
11579             kfree(sl);
11580             sl = sln;
11581         }
11582         env->explored_states[i] = NULL;
11583     }
11584 }
11585 
do_check_common(struct bpf_verifier_env *env, int subprog)11586 static int do_check_common(struct bpf_verifier_env *env, int subprog)
11587 {
11588     bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11589     struct bpf_verifier_state *state;
11590     struct bpf_reg_state *regs;
11591     int ret, i;
11592 
11593     env->prev_linfo = NULL;
11594     env->pass_cnt++;
11595 
11596     state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11597     if (!state) {
11598         return -ENOMEM;
11599     }
11600     state->curframe = 0;
11601     state->speculative = false;
11602     state->branches = 1;
11603     state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11604     if (!state->frame[0]) {
11605         kfree(state);
11606         return -ENOMEM;
11607     }
11608     env->cur_state = state;
11609     init_func_state(env, state->frame[0], BPF_MAIN_FUNC /* callsite */, 0 /* frameno */, subprog);
11610 
11611     regs = state->frame[state->curframe]->regs;
11612     if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11613         ret = btf_prepare_func_args(env, subprog, regs);
11614         if (ret) {
11615             goto out;
11616         }
11617         for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11618             if (regs[i].type == PTR_TO_CTX) {
11619                 mark_reg_known_zero(env, regs, i);
11620             } else if (regs[i].type == SCALAR_VALUE) {
11621                 mark_reg_unknown(env, regs, i);
11622             }
11623         }
11624     } else {
11625         /* 1st arg to a function */
11626         regs[BPF_REG_1].type = PTR_TO_CTX;
11627         mark_reg_known_zero(env, regs, BPF_REG_1);
11628         ret = btf_check_func_arg_match(env, subprog, regs);
11629         if (ret == -EFAULT) {
11630             /* unlikely verifier bug. abort.
11631              * ret == 0 and ret < 0 are sadly acceptable for
11632              * main() function due to backward compatibility.
11633              * Like socket filter program may be written as:
11634              * int bpf_prog(struct pt_regs *ctx)
11635              * and never dereference that ctx in the program.
11636              * 'struct pt_regs' is a type mismatch for socket
11637              * filter that should be using 'struct __sk_buff'.
11638              */
11639             goto out;
11640         }
11641     }
11642 
11643     ret = do_check(env);
11644 out:
11645     /* check for NULL is necessary, since cur_state can be freed inside
11646      * do_check() under memory pressure.
11647      */
11648     if (env->cur_state) {
11649         free_verifier_state(env->cur_state, true);
11650         env->cur_state = NULL;
11651     }
11652     while (!pop_stack(env, NULL, NULL, false)) {
11653         ;
11654     }
11655     if (!ret && pop_log) {
11656         bpf_vlog_reset(&env->log, 0);
11657     }
11658     free_states(env);
11659     return ret;
11660 }
11661 
11662 /* Verify all global functions in a BPF program one by one based on their BTF.
11663  * All global functions must pass verification. Otherwise the whole program is rejected.
11664  * Consider:
11665  * int bar(int);
11666  * int foo(int f)
11667  * {
11668  *    return bar(f);
11669  * }
11670  * int bar(int b)
11671  * {
11672  *    ...
11673  * }
11674  * foo() will be verified first for R1=any_scalar_value. During verification it
11675  * will be assumed that bar() already verified successfully and call to bar()
11676  * from foo() will be checked for type match only. Later bar() will be verified
11677  * independently to check that it's safe for R1=any_scalar_value.
11678  */
do_check_subprogs(struct bpf_verifier_env *env)11679 static int do_check_subprogs(struct bpf_verifier_env *env)
11680 {
11681     struct bpf_prog_aux *aux = env->prog->aux;
11682     int i, ret;
11683 
11684     if (!aux->func_info) {
11685         return 0;
11686     }
11687 
11688     for (i = 1; i < env->subprog_cnt; i++) {
11689         if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) {
11690             continue;
11691         }
11692         env->insn_idx = env->subprog_info[i].start;
11693         WARN_ON_ONCE(env->insn_idx == 0);
11694         ret = do_check_common(env, i);
11695         if (ret) {
11696             return ret;
11697         } else if (env->log.level & BPF_LOG_LEVEL) {
11698             verbose(env, "Func#%d is safe for any args that match its prototype\n", i);
11699         }
11700     }
11701     return 0;
11702 }
11703 
do_check_main(struct bpf_verifier_env *env)11704 static int do_check_main(struct bpf_verifier_env *env)
11705 {
11706     int ret;
11707 
11708     env->insn_idx = 0;
11709     ret = do_check_common(env, 0);
11710     if (!ret) {
11711         env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11712     }
11713     return ret;
11714 }
11715 
print_verification_stats(struct bpf_verifier_env *env)11716 static void print_verification_stats(struct bpf_verifier_env *env)
11717 {
11718     int i;
11719 
11720     if (env->log.level & BPF_LOG_STATS) {
11721         verbose(env, "verification time %lld usec\n", div_u64(env->verification_time, VERIFIER_ONETHOUSAND));
11722         verbose(env, "stack depth ");
11723         for (i = 0; i < env->subprog_cnt; i++) {
11724             u32 depth = env->subprog_info[i].stack_depth;
11725 
11726             verbose(env, "%d", depth);
11727             if (i + 1 < env->subprog_cnt) {
11728                 verbose(env, "+");
11729             }
11730         }
11731         verbose(env, "\n");
11732     }
11733     verbose(env,
11734             "processed %d insns (limit %d) max_states_per_insn %d "
11735             "total_states %d peak_states %d mark_read %d\n",
11736             env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, env->max_states_per_insn, env->total_states,
11737             env->peak_states, env->longest_mark_read_walk);
11738 }
11739 
check_struct_ops_btf_id(struct bpf_verifier_env *env)11740 static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11741 {
11742     const struct btf_type *t, *func_proto;
11743     const struct bpf_struct_ops *st_ops;
11744     const struct btf_member *member;
11745     struct bpf_prog *prog = env->prog;
11746     u32 btf_id, member_idx;
11747     const char *mname;
11748 
11749     if (!prog->gpl_compatible) {
11750         verbose(env, "struct ops programs must have a GPL compatible license\n");
11751         return -EINVAL;
11752     }
11753 
11754     btf_id = prog->aux->attach_btf_id;
11755     st_ops = bpf_struct_ops_find(btf_id);
11756     if (!st_ops) {
11757         verbose(env, "attach_btf_id %u is not a supported struct\n", btf_id);
11758         return -ENOTSUPP;
11759     }
11760 
11761     t = st_ops->type;
11762     member_idx = prog->expected_attach_type;
11763     if (member_idx >= btf_type_vlen(t)) {
11764         verbose(env, "attach to invalid member idx %u of struct %s\n", member_idx, st_ops->name);
11765         return -EINVAL;
11766     }
11767 
11768     member = &btf_type_member(t)[member_idx];
11769     mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11770     func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL);
11771     if (!func_proto) {
11772         verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", mname, member_idx, st_ops->name);
11773         return -EINVAL;
11774     }
11775 
11776     if (st_ops->check_member) {
11777         int err = st_ops->check_member(t, member);
11778         if (err) {
11779             verbose(env, "attach to unsupported member %s of struct %s\n", mname, st_ops->name);
11780             return err;
11781         }
11782     }
11783 
11784     prog->aux->attach_func_proto = func_proto;
11785     prog->aux->attach_func_name = mname;
11786     env->ops = st_ops->verifier_ops;
11787 
11788     return 0;
11789 }
11790 #define SECURITY_PREFIX "security_"
11791 
check_attach_modify_return(unsigned long addr, const char *func_name)11792 static int check_attach_modify_return(unsigned long addr, const char *func_name)
11793 {
11794     if (within_error_injection_list(addr) || !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) {
11795         return 0;
11796     }
11797     return -EINVAL;
11798 }
11799 
11800 /* non exhaustive list of sleepable bpf_lsm_*() functions */
11801 BTF_SET_START(btf_sleepable_lsm_hooks)
11802 #ifdef CONFIG_BPF_LSM
11803 BTF_ID(func, bpf_lsm_bprm_committed_creds)
11804 #else
11805 BTF_ID_UNUSED
11806 #endif
11807 BTF_SET_END(btf_sleepable_lsm_hooks)
11808 
check_sleepable_lsm_hook(u32 btf_id)11809 static int check_sleepable_lsm_hook(u32 btf_id)
11810 {
11811     return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11812 }
11813 
11814 /* list of non-sleepable functions that are otherwise on
11815  * ALLOW_ERROR_INJECTION list
11816  */
11817 BTF_SET_START(btf_non_sleepable_error_inject)
11818 /* Three functions below can be called from sleepable and non-sleepable context.
11819  * Assume non-sleepable from bpf safety point of view.
11820  */
11821 BTF_ID(func, __add_to_page_cache_locked)
11822 BTF_ID(func, should_fail_alloc_page)
11823 BTF_ID(func, should_failslab)
11824 BTF_SET_END(btf_non_sleepable_error_inject)
11825 
check_non_sleepable_error_inject(u32 btf_id)11826 static int check_non_sleepable_error_inject(u32 btf_id)
11827 {
11828     return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11829 }
11830 
bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog, u32 btf_id, struct bpf_attach_target_info *tgt_info)11831 int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog,
11832                             u32 btf_id, struct bpf_attach_target_info *tgt_info)
11833 {
11834     bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
11835     const char prefix[] = "btf_trace_";
11836     int ret = 0, subprog = -1, i;
11837     const struct btf_type *t;
11838     bool conservative = true;
11839     const char *tname;
11840     struct btf *btf;
11841     long addr = 0;
11842 
11843     if (!btf_id) {
11844         bpf_log(log, "Tracing programs must provide btf_id\n");
11845         return -EINVAL;
11846     }
11847     btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
11848     if (!btf) {
11849         bpf_log(log, "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
11850         return -EINVAL;
11851     }
11852     t = btf_type_by_id(btf, btf_id);
11853     if (!t) {
11854         bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
11855         return -EINVAL;
11856     }
11857     tname = btf_name_by_offset(btf, t->name_off);
11858     if (!tname) {
11859         bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
11860         return -EINVAL;
11861     }
11862     if (tgt_prog) {
11863         struct bpf_prog_aux *aux = tgt_prog->aux;
11864 
11865         for (i = 0; i < aux->func_info_cnt; i++) {
11866             if (aux->func_info[i].type_id == btf_id) {
11867                 subprog = i;
11868                 break;
11869             }
11870         }
11871         if (subprog == -1) {
11872             bpf_log(log, "Subprog %s doesn't exist\n", tname);
11873             return -EINVAL;
11874         }
11875         conservative = aux->func_info_aux[subprog].unreliable;
11876         if (prog_extension) {
11877             if (conservative) {
11878                 bpf_log(log, "Cannot replace static functions\n");
11879                 return -EINVAL;
11880             }
11881             if (!prog->jit_requested) {
11882                 bpf_log(log, "Extension programs should be JITed\n");
11883                 return -EINVAL;
11884             }
11885         }
11886         if (!tgt_prog->jited) {
11887             bpf_log(log, "Can attach to only JITed progs\n");
11888             return -EINVAL;
11889         }
11890         if (tgt_prog->type == prog->type) {
11891             /* Cannot fentry/fexit another fentry/fexit program.
11892              * Cannot attach program extension to another extension.
11893              * It's ok to attach fentry/fexit to extension program.
11894              */
11895             bpf_log(log, "Cannot recursively attach\n");
11896             return -EINVAL;
11897         }
11898         if (tgt_prog->type == BPF_PROG_TYPE_TRACING && prog_extension &&
11899             (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
11900             /* Program extensions can extend all program types
11901              * except fentry/fexit. The reason is the following.
11902              * The fentry/fexit programs are used for performance
11903              * analysis, stats and can be attached to any program
11904              * type except themselves. When extension program is
11905              * replacing XDP function it is necessary to allow
11906              * performance analysis of all functions. Both original
11907              * XDP program and its program extension. Hence
11908              * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
11909              * allowed. If extending of fentry/fexit was allowed it
11910              * would be possible to create long call chain
11911              * fentry->extension->fentry->extension beyond
11912              * reasonable stack size. Hence extending fentry is not
11913              * allowed.
11914              */
11915             bpf_log(log, "Cannot extend fentry/fexit\n");
11916             return -EINVAL;
11917         }
11918     } else {
11919         if (prog_extension) {
11920             bpf_log(log, "Cannot replace kernel functions\n");
11921             return -EINVAL;
11922         }
11923     }
11924 
11925     switch (prog->expected_attach_type) {
11926         case BPF_TRACE_RAW_TP:
11927             if (tgt_prog) {
11928                 bpf_log(log, "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
11929                 return -EINVAL;
11930             }
11931             if (!btf_type_is_typedef(t)) {
11932                 bpf_log(log, "attach_btf_id %u is not a typedef\n", btf_id);
11933                 return -EINVAL;
11934             }
11935             if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
11936                 bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", btf_id, tname);
11937                 return -EINVAL;
11938             }
11939             tname += sizeof(prefix) - 1;
11940             t = btf_type_by_id(btf, t->type);
11941             if (!btf_type_is_ptr(t)) {
11942                 /* should never happen in valid vmlinux build */
11943                 return -EINVAL;
11944             }
11945             t = btf_type_by_id(btf, t->type);
11946             if (!btf_type_is_func_proto(t)) {
11947                 /* should never happen in valid vmlinux build */
11948                 return -EINVAL;
11949             }
11950 
11951             break;
11952         case BPF_TRACE_ITER:
11953             if (!btf_type_is_func(t)) {
11954                 bpf_log(log, "attach_btf_id %u is not a function\n", btf_id);
11955                 return -EINVAL;
11956             }
11957             t = btf_type_by_id(btf, t->type);
11958             if (!btf_type_is_func_proto(t)) {
11959                 return -EINVAL;
11960             }
11961             ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
11962             if (ret) {
11963                 return ret;
11964             }
11965             break;
11966         default:
11967             if (!prog_extension) {
11968                 return -EINVAL;
11969             }
11970             fallthrough;
11971         case BPF_MODIFY_RETURN:
11972         case BPF_LSM_MAC:
11973         case BPF_TRACE_FENTRY:
11974         case BPF_TRACE_FEXIT:
11975             if (!btf_type_is_func(t)) {
11976                 bpf_log(log, "attach_btf_id %u is not a function\n", btf_id);
11977                 return -EINVAL;
11978             }
11979             if (prog_extension && btf_check_type_match(log, prog, btf, t)) {
11980                 return -EINVAL;
11981             }
11982             t = btf_type_by_id(btf, t->type);
11983             if (!btf_type_is_func_proto(t)) {
11984                 return -EINVAL;
11985             }
11986 
11987             if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
11988                 (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
11989                  prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) {
11990                 return -EINVAL;
11991             }
11992 
11993             if (tgt_prog && conservative) {
11994                 t = NULL;
11995             }
11996 
11997             ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
11998             if (ret < 0) {
11999                 return ret;
12000             }
12001 
12002             if (tgt_prog) {
12003                 if (subprog == 0) {
12004                     addr = (long)tgt_prog->bpf_func;
12005                 } else {
12006                     addr = (long)tgt_prog->aux->func[subprog]->bpf_func;
12007                 }
12008             } else {
12009                 addr = kallsyms_lookup_name(tname);
12010                 if (!addr) {
12011                     bpf_log(log, "The address of function %s cannot be found\n", tname);
12012                     return -ENOENT;
12013                 }
12014             }
12015 
12016             if (prog->aux->sleepable) {
12017                 ret = -EINVAL;
12018                 switch (prog->type) {
12019                     case BPF_PROG_TYPE_TRACING:
12020                         /* fentry/fexit/fmod_ret progs can be sleepable only if they are
12021                          * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12022                          */
12023                         if (!check_non_sleepable_error_inject(btf_id) && within_error_injection_list(addr)) {
12024                             ret = 0;
12025                         }
12026                         break;
12027                     case BPF_PROG_TYPE_LSM:
12028                         /* LSM progs check that they are attached to bpf_lsm_*() funcs.
12029                          * Only some of them are sleepable.
12030                          */
12031                         if (check_sleepable_lsm_hook(btf_id)) {
12032                             ret = 0;
12033                         }
12034                         break;
12035                     default:
12036                         break;
12037                 }
12038                 if (ret) {
12039                     bpf_log(log, "%s is not sleepable\n", tname);
12040                     return ret;
12041                 }
12042             } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12043                 if (tgt_prog) {
12044                     bpf_log(log, "can't modify return codes of BPF programs\n");
12045                     return -EINVAL;
12046                 }
12047                 ret = check_attach_modify_return(addr, tname);
12048                 if (ret) {
12049                     bpf_log(log, "%s() is not modifiable\n", tname);
12050                     return ret;
12051                 }
12052             }
12053 
12054             break;
12055     }
12056     tgt_info->tgt_addr = addr;
12057     tgt_info->tgt_name = tname;
12058     tgt_info->tgt_type = t;
12059     return 0;
12060 }
12061 
check_attach_btf_id(struct bpf_verifier_env *env)12062 static int check_attach_btf_id(struct bpf_verifier_env *env)
12063 {
12064     struct bpf_prog *prog = env->prog;
12065     struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12066     struct bpf_attach_target_info tgt_info = {};
12067     u32 btf_id = prog->aux->attach_btf_id;
12068     struct bpf_trampoline *tr;
12069     int ret;
12070     u64 key;
12071 
12072     if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM) {
12073         verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12074         return -EINVAL;
12075     }
12076 
12077     if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
12078         return check_struct_ops_btf_id(env);
12079     }
12080 
12081     if (prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_EXT) {
12082         return 0;
12083     }
12084 
12085     ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12086     if (ret) {
12087         return ret;
12088     }
12089 
12090     if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12091         /* to make freplace equivalent to their targets, they need to
12092          * inherit env->ops and expected_attach_type for the rest of the
12093          * verification
12094          */
12095         env->ops = bpf_verifier_ops[tgt_prog->type];
12096         prog->expected_attach_type = tgt_prog->expected_attach_type;
12097     }
12098 
12099     /* store info about the attachment target that will be used later */
12100     prog->aux->attach_func_proto = tgt_info.tgt_type;
12101     prog->aux->attach_func_name = tgt_info.tgt_name;
12102 
12103     if (tgt_prog) {
12104         prog->aux->saved_dst_prog_type = tgt_prog->type;
12105         prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12106     }
12107 
12108     if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12109         prog->aux->attach_btf_trace = true;
12110         return 0;
12111     } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12112         if (!bpf_iter_prog_supported(prog)) {
12113             return -EINVAL;
12114         }
12115         return 0;
12116     }
12117 
12118     if (prog->type == BPF_PROG_TYPE_LSM) {
12119         ret = bpf_lsm_verify_prog(&env->log, prog);
12120         if (ret < 0) {
12121             return ret;
12122         }
12123     }
12124 
12125     key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12126     tr = bpf_trampoline_get(key, &tgt_info);
12127     if (!tr) {
12128         return -ENOMEM;
12129     }
12130 
12131     prog->aux->dst_trampoline = tr;
12132     return 0;
12133 }
12134 
bpf_get_btf_vmlinux(void)12135 struct btf *bpf_get_btf_vmlinux(void)
12136 {
12137     if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12138         mutex_lock(&bpf_verifier_lock);
12139         if (!btf_vmlinux) {
12140             btf_vmlinux = btf_parse_vmlinux();
12141         }
12142         mutex_unlock(&bpf_verifier_lock);
12143     }
12144     return btf_vmlinux;
12145 }
12146 
bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr)12147 int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr)
12148 {
12149     u64 start_time = ktime_get_ns();
12150     struct bpf_verifier_env *env;
12151     struct bpf_verifier_log *log;
12152     int i, len, ret = -EINVAL;
12153     bool is_priv;
12154 
12155     /* no program is valid */
12156     if (ARRAY_SIZE(bpf_verifier_ops) == 0) {
12157         return -EINVAL;
12158     }
12159 
12160     /* 'struct bpf_verifier_env' can be global, but since it's not small,
12161      * allocate/free it every time bpf_check() is called
12162      */
12163     env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
12164     if (!env) {
12165         return -ENOMEM;
12166     }
12167     log = &env->log;
12168 
12169     len = (*prog)->len;
12170     env->insn_aux_data = vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
12171     ret = -ENOMEM;
12172     if (!env->insn_aux_data) {
12173         goto err_free_env;
12174     }
12175     for (i = 0; i < len; i++) {
12176         env->insn_aux_data[i].orig_idx = i;
12177     }
12178     env->prog = *prog;
12179     env->ops = bpf_verifier_ops[env->prog->type];
12180     is_priv = bpf_capable();
12181 
12182     bpf_get_btf_vmlinux();
12183 
12184     /* grab the mutex to protect few globals used by verifier */
12185     if (!is_priv) {
12186         mutex_lock(&bpf_verifier_lock);
12187     }
12188 
12189     if (attr->log_level || attr->log_buf || attr->log_size) {
12190         /* user requested verbose verifier output
12191          * and supplied buffer to store the verification trace
12192          */
12193         log->level = attr->log_level;
12194         log->ubuf = (char __user *)(unsigned long)attr->log_buf;
12195         log->len_total = attr->log_size;
12196 
12197         /* log attributes have to be sane */
12198         if (!bpf_verifier_log_attr_valid(log)) {
12199             ret = -EINVAL;
12200             goto err_unlock;
12201         }
12202     }
12203 
12204     if (IS_ERR(btf_vmlinux)) {
12205         /* Either gcc or pahole or kernel are broken. */
12206         verbose(env, "in-kernel BTF is malformed\n");
12207         ret = PTR_ERR(btf_vmlinux);
12208         goto skip_full_check;
12209     }
12210 
12211     env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
12212     if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
12213         env->strict_alignment = true;
12214     }
12215     if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) {
12216         env->strict_alignment = false;
12217     }
12218 
12219     env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12220     env->allow_uninit_stack = bpf_allow_uninit_stack();
12221     env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12222     env->bypass_spec_v1 = bpf_bypass_spec_v1();
12223     env->bypass_spec_v4 = bpf_bypass_spec_v4();
12224     env->bpf_capable = bpf_capable();
12225 
12226     if (is_priv) {
12227         env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
12228     }
12229 
12230     env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER);
12231     ret = -ENOMEM;
12232     if (!env->explored_states) {
12233         goto skip_full_check;
12234     }
12235 
12236     ret = check_subprogs(env);
12237     if (ret < 0) {
12238         goto skip_full_check;
12239     }
12240 
12241     ret = check_btf_info(env, attr, uattr);
12242     if (ret < 0) {
12243         goto skip_full_check;
12244     }
12245 
12246     ret = check_attach_btf_id(env);
12247     if (ret) {
12248         goto skip_full_check;
12249     }
12250 
12251     ret = resolve_pseudo_ldimm64(env);
12252     if (ret < 0) {
12253         goto skip_full_check;
12254     }
12255 
12256     if (bpf_prog_is_dev_bound(env->prog->aux)) {
12257         ret = bpf_prog_offload_verifier_prep(env->prog);
12258         if (ret) {
12259             goto skip_full_check;
12260         }
12261     }
12262 
12263     ret = check_cfg(env);
12264     if (ret < 0) {
12265         goto skip_full_check;
12266     }
12267 
12268     ret = do_check_subprogs(env);
12269     ret = ret ?: do_check_main(env);
12270 
12271     if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) {
12272         ret = bpf_prog_offload_finalize(env);
12273     }
12274 
12275 skip_full_check:
12276     kvfree(env->explored_states);
12277 
12278     if (ret == 0) {
12279         ret = check_max_stack_depth(env);
12280     }
12281 
12282     /* instruction rewrites happen after this point */
12283     if (is_priv) {
12284         if (ret == 0) {
12285             opt_hard_wire_dead_code_branches(env);
12286         }
12287         if (ret == 0) {
12288             ret = opt_remove_dead_code(env);
12289         }
12290         if (ret == 0) {
12291             ret = opt_remove_nops(env);
12292         }
12293     } else {
12294         if (ret == 0) {
12295             sanitize_dead_code(env);
12296         }
12297     }
12298 
12299     if (ret == 0) {
12300         /* program is valid, convert *(u32*)(ctx + off) accesses */
12301         ret = convert_ctx_accesses(env);
12302     }
12303 
12304     if (ret == 0) {
12305         ret = fixup_bpf_calls(env);
12306     }
12307 
12308     /* do 32-bit optimization after insn patching has done so those patched
12309      * insns could be handled correctly.
12310      */
12311     if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12312         ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12313         env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret : false;
12314     }
12315 
12316     if (ret == 0) {
12317         ret = fixup_call_args(env);
12318     }
12319 
12320     env->verification_time = ktime_get_ns() - start_time;
12321     print_verification_stats(env);
12322 
12323     if (log->level && bpf_verifier_log_full(log)) {
12324         ret = -ENOSPC;
12325     }
12326     if (log->level && !log->ubuf) {
12327         ret = -EFAULT;
12328         goto err_release_maps;
12329     }
12330 
12331     if (ret == 0 && env->used_map_cnt) {
12332         /* if program passed verifier, update used_maps in bpf_prog_info */
12333         env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, sizeof(env->used_maps[0]), GFP_KERNEL);
12334 
12335         if (!env->prog->aux->used_maps) {
12336             ret = -ENOMEM;
12337             goto err_release_maps;
12338         }
12339 
12340         memcpy(env->prog->aux->used_maps, env->used_maps, sizeof(env->used_maps[0]) * env->used_map_cnt);
12341         env->prog->aux->used_map_cnt = env->used_map_cnt;
12342 
12343         /* program is valid. Convert pseudo bpf_ld_imm64 into generic
12344          * bpf_ld_imm64 instructions
12345          */
12346         convert_pseudo_ld_imm64(env);
12347     }
12348 
12349     if (ret == 0) {
12350         adjust_btf_func(env);
12351     }
12352 
12353 err_release_maps:
12354     if (!env->prog->aux->used_maps) {
12355         /* if we didn't copy map pointers into bpf_prog_info, release
12356          * them now. Otherwise free_used_maps() will release them.
12357          */
12358         release_maps(env);
12359     }
12360 
12361     /* extension progs temporarily inherit the attach_type of their targets
12362        for verification purposes, so set it back to zero before returning
12363      */
12364     if (env->prog->type == BPF_PROG_TYPE_EXT) {
12365         env->prog->expected_attach_type = 0;
12366     }
12367 
12368     *prog = env->prog;
12369 err_unlock:
12370     if (!is_priv) {
12371         mutex_unlock(&bpf_verifier_lock);
12372     }
12373     vfree(env->insn_aux_data);
12374 err_free_env:
12375     kfree(env);
12376     return ret;
12377 }
12378