1// SPDX-License-Identifier: GPL-2.0-only
2/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
3 * Copyright (c) 2016 Facebook
4 * Copyright (c) 2018 Covalent IO, Inc. http://covalent.io
5 */
6#include <uapi/linux/btf.h>
7#include <linux/kernel.h>
8#include <linux/types.h>
9#include <linux/slab.h>
10#include <linux/bpf.h>
11#include <linux/btf.h>
12#include <linux/bpf_verifier.h>
13#include <linux/filter.h>
14#include <net/netlink.h>
15#include <linux/file.h>
16#include <linux/vmalloc.h>
17#include <linux/stringify.h>
18#include <linux/bsearch.h>
19#include <linux/sort.h>
20#include <linux/perf_event.h>
21#include <linux/ctype.h>
22#include <linux/error-injection.h>
23#include <linux/bpf_lsm.h>
24#include <linux/btf_ids.h>
25
26#include "disasm.h"
27
28static const struct bpf_verifier_ops *const bpf_verifier_ops[] = {
29#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) [_id] = &_name##_verifier_ops,
30#define BPF_MAP_TYPE(_id, _ops)
31#define BPF_LINK_TYPE(_id, _name)
32#include <linux/bpf_types.h>
33#undef BPF_PROG_TYPE
34#undef BPF_MAP_TYPE
35#undef BPF_LINK_TYPE
36};
37
38/* bpf_check() is a static code analyzer that walks eBPF program
39 * instruction by instruction and updates register/stack state.
40 * All paths of conditional branches are analyzed until 'bpf_exit' insn.
41 *
42 * The first pass is depth-first-search to check that the program is a DAG.
43 * It rejects the following programs:
44 * - larger than BPF_MAXINSNS insns
45 * - if loop is present (detected via back-edge)
46 * - unreachable insns exist (shouldn't be a forest. program = one function)
47 * - out of bounds or malformed jumps
48 * The second pass is all possible path descent from the 1st insn.
49 * Since it's analyzing all pathes through the program, the length of the
50 * analysis is limited to 64k insn, which may be hit even if total number of
51 * insn is less then 4K, but there are too many branches that change stack/regs.
52 * Number of 'branches to be analyzed' is limited to 1k
53 *
54 * On entry to each instruction, each register has a type, and the instruction
55 * changes the types of the registers depending on instruction semantics.
56 * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
57 * copied to R1.
58 *
59 * All registers are 64-bit.
60 * R0 - return register
61 * R1-R5 argument passing registers
62 * R6-R9 callee saved registers
63 * R10 - frame pointer read-only
64 *
65 * At the start of BPF program the register R1 contains a pointer to bpf_context
66 * and has type PTR_TO_CTX.
67 *
68 * Verifier tracks arithmetic operations on pointers in case:
69 *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
70 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
71 * 1st insn copies R10 (which has FRAME_PTR) type into R1
72 * and 2nd arithmetic instruction is pattern matched to recognize
73 * that it wants to construct a pointer to some element within stack.
74 * So after 2nd insn, the register R1 has type PTR_TO_STACK
75 * (and -20 constant is saved for further stack bounds checking).
76 * Meaning that this reg is a pointer to stack plus known immediate constant.
77 *
78 * Most of the time the registers have SCALAR_VALUE type, which
79 * means the register has some value, but it's not a valid pointer.
80 * (like pointer plus pointer becomes SCALAR_VALUE type)
81 *
82 * When verifier sees load or store instructions the type of base register
83 * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, PTR_TO_STACK, PTR_TO_SOCKET. These are
84 * four pointer types recognized by check_mem_access() function.
85 *
86 * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
87 * and the range of [ptr, ptr + map's value_size) is accessible.
88 *
89 * registers used to pass values to function calls are checked against
90 * function argument constraints.
91 *
92 * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
93 * It means that the register type passed to this function must be
94 * PTR_TO_STACK and it will be used inside the function as
95 * 'pointer to map element key'
96 *
97 * For example the argument constraints for bpf_map_lookup_elem():
98 *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
99 *   .arg1_type = ARG_CONST_MAP_PTR,
100 *   .arg2_type = ARG_PTR_TO_MAP_KEY,
101 *
102 * ret_type says that this function returns 'pointer to map elem value or null'
103 * function expects 1st argument to be a const pointer to 'struct bpf_map' and
104 * 2nd argument should be a pointer to stack, which will be used inside
105 * the helper function as a pointer to map element key.
106 *
107 * On the kernel side the helper function looks like:
108 * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
109 * {
110 *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
111 *    void *key = (void *) (unsigned long) r2;
112 *    void *value;
113 *
114 *    here kernel can access 'key' and 'map' pointers safely, knowing that
115 *    [key, key + map->key_size) bytes are valid and were initialized on
116 *    the stack of eBPF program.
117 * }
118 *
119 * Corresponding eBPF program may look like:
120 *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
121 *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
122 *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
123 *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
124 * here verifier looks at prototype of map_lookup_elem() and sees:
125 * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
126 * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
127 *
128 * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
129 * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
130 * and were initialized prior to this call.
131 * If it's ok, then verifier allows this BPF_CALL insn and looks at
132 * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
133 * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
134 * returns ether pointer to map value or NULL.
135 *
136 * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
137 * insn, the register holding that pointer in the true branch changes state to
138 * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
139 * branch. See check_cond_jmp_op().
140 *
141 * After the call R0 is set to return type of the function and registers R1-R5
142 * are set to NOT_INIT to indicate that they are no longer readable.
143 *
144 * The following reference types represent a potential reference to a kernel
145 * resource which, after first being allocated, must be checked and freed by
146 * the BPF program:
147 * - PTR_TO_SOCKET_OR_NULL, PTR_TO_SOCKET
148 *
149 * When the verifier sees a helper call return a reference type, it allocates a
150 * pointer id for the reference and stores it in the current function state.
151 * Similar to the way that PTR_TO_MAP_VALUE_OR_NULL is converted into
152 * PTR_TO_MAP_VALUE, PTR_TO_SOCKET_OR_NULL becomes PTR_TO_SOCKET when the type
153 * passes through a NULL-check conditional. For the branch wherein the state is
154 * changed to CONST_IMM, the verifier releases the reference.
155 *
156 * For each helper function that allocates a reference, such as
157 * bpf_sk_lookup_tcp(), there is a corresponding release function, such as
158 * bpf_sk_release(). When a reference type passes into the release function,
159 * the verifier also releases the reference. If any unchecked or unreleased
160 * reference remains at the end of the program, the verifier rejects it.
161 */
162
163/* verifier_state + insn_idx are pushed to stack when branch is encountered */
164struct bpf_verifier_stack_elem {
165    /* verifer state is 'st'
166     * before processing instruction 'insn_idx'
167     * and after processing instruction 'prev_insn_idx'
168     */
169    struct bpf_verifier_state st;
170    int insn_idx;
171    int prev_insn_idx;
172    struct bpf_verifier_stack_elem *next;
173    /* length of verifier log at the time this state was pushed on stack */
174    u32 log_pos;
175};
176
177#define BPF_COMPLEXITY_LIMIT_JMP_SEQ 8192
178#define BPF_COMPLEXITY_LIMIT_STATES 64
179
180#define BPF_MAP_KEY_POISON (1ULL << 63)
181#define BPF_MAP_KEY_SEEN (1ULL << 62)
182
183#define BPF_MAP_PTR_UNPRIV 1UL
184#define BPF_MAP_PTR_POISON ((void *)((0xeB9FUL << 1) + POISON_POINTER_DELTA))
185#define BPF_MAP_PTR(X) ((struct bpf_map *)((X) & ~BPF_MAP_PTR_UNPRIV))
186
187#define VERIFIER_TWO 2
188#define VERIFIER_THREE 3
189#define VERIFIER_FOUR 4
190#define VERIFIER_EIGHT 8
191#define VERIFIER_SIXTEEN 16
192#define VERIFIER_THIRTYONE 31
193#define VERIFIER_THIRTYTWO 32
194#define VERIFIER_SIXTYTHREE 63
195#define VERIFIER_SIXTYFOUR 64
196#define VERIFIER_ONEHUNDREDTWENTYEIGHT 128
197#define VERIFIER_TWOHUNDREDFIFTYSIX 256
198#define VERIFIER_ONETHOUSAND 1000
199
200static bool bpf_map_ptr_poisoned(const struct bpf_insn_aux_data *aux)
201{
202    return BPF_MAP_PTR(aux->map_ptr_state) == BPF_MAP_PTR_POISON;
203}
204
205static bool bpf_map_ptr_unpriv(const struct bpf_insn_aux_data *aux)
206{
207    return aux->map_ptr_state & BPF_MAP_PTR_UNPRIV;
208}
209
210static void bpf_map_ptr_store(struct bpf_insn_aux_data *aux, const struct bpf_map *map, bool unpriv)
211{
212    BUILD_BUG_ON((unsigned long)BPF_MAP_PTR_POISON & BPF_MAP_PTR_UNPRIV);
213    unpriv |= bpf_map_ptr_unpriv(aux);
214    aux->map_ptr_state = (unsigned long)map | (unpriv ? BPF_MAP_PTR_UNPRIV : 0UL);
215}
216
217static bool bpf_map_key_poisoned(const struct bpf_insn_aux_data *aux)
218{
219    return aux->map_key_state & BPF_MAP_KEY_POISON;
220}
221
222static bool bpf_map_key_unseen(const struct bpf_insn_aux_data *aux)
223{
224    return !(aux->map_key_state & BPF_MAP_KEY_SEEN);
225}
226
227static u64 bpf_map_key_immediate(const struct bpf_insn_aux_data *aux)
228{
229    return aux->map_key_state & ~(BPF_MAP_KEY_SEEN | BPF_MAP_KEY_POISON);
230}
231
232static void bpf_map_key_store(struct bpf_insn_aux_data *aux, u64 state)
233{
234    bool poisoned = bpf_map_key_poisoned(aux);
235
236    aux->map_key_state = state | BPF_MAP_KEY_SEEN | (poisoned ? BPF_MAP_KEY_POISON : 0ULL);
237}
238
239struct bpf_call_arg_meta {
240    struct bpf_map *map_ptr;
241    bool raw_mode;
242    bool pkt_access;
243    int regno;
244    int access_size;
245    int mem_size;
246    u64 msize_max_value;
247    int ref_obj_id;
248    int func_id;
249    u32 btf_id;
250    u32 ret_btf_id;
251};
252
253struct btf *btf_vmlinux;
254
255static DEFINE_MUTEX(bpf_verifier_lock);
256
257static const struct bpf_line_info *find_linfo(const struct bpf_verifier_env *env, u32 insn_off)
258{
259    const struct bpf_line_info *linfo;
260    const struct bpf_prog *prog;
261    u32 i, nr_linfo;
262
263    prog = env->prog;
264    nr_linfo = prog->aux->nr_linfo;
265
266    if (!nr_linfo || insn_off >= prog->len) {
267        return NULL;
268    }
269
270    linfo = prog->aux->linfo;
271    for (i = 1; i < nr_linfo; i++) {
272        if (insn_off < linfo[i].insn_off) {
273            break;
274        }
275    }
276
277    return &linfo[i - 1];
278}
279
280void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt, va_list args)
281{
282    unsigned int n;
283
284    n = vscnprintf(log->kbuf, BPF_VERIFIER_TMP_LOG_SIZE, fmt, args);
285
286    WARN_ONCE(n >= BPF_VERIFIER_TMP_LOG_SIZE - 1, "verifier log line truncated - local buffer too short\n");
287
288    n = min(log->len_total - log->len_used - 1, n);
289    log->kbuf[n] = '\0';
290
291    if (log->level == BPF_LOG_KERNEL) {
292        pr_err("BPF:%s\n", log->kbuf);
293        return;
294    }
295    if (!copy_to_user(log->ubuf + log->len_used, log->kbuf, n + 1)) {
296        log->len_used += n;
297    } else {
298        log->ubuf = NULL;
299    }
300}
301
302static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
303{
304    char zero = 0;
305
306    if (!bpf_verifier_log_needed(log)) {
307        return;
308    }
309
310    log->len_used = new_pos;
311    if (put_user(zero, log->ubuf + new_pos)) {
312        log->ubuf = NULL;
313    }
314}
315
316/* log_level controls verbosity level of eBPF verifier.
317 * bpf_verifier_log_write() is used to dump the verification trace to the log,
318 * so the user can figure out what's wrong with the program
319 */
320__printf(2, 3) void bpf_verifier_log_write(struct bpf_verifier_env *env, const char *fmt, ...)
321{
322    va_list args;
323
324    if (!bpf_verifier_log_needed(&env->log)) {
325        return;
326    }
327
328    va_start(args, fmt);
329    bpf_verifier_vlog(&env->log, fmt, args);
330    va_end(args);
331}
332EXPORT_SYMBOL_GPL(bpf_verifier_log_write);
333
334__printf(2, 3) static void verbose(void *private_data, const char *fmt, ...)
335{
336    struct bpf_verifier_env *env = private_data;
337    va_list args;
338
339    if (!bpf_verifier_log_needed(&env->log)) {
340        return;
341    }
342
343    va_start(args, fmt);
344    bpf_verifier_vlog(&env->log, fmt, args);
345    va_end(args);
346}
347
348__printf(2, 3) void bpf_log(struct bpf_verifier_log *log, const char *fmt, ...)
349{
350    va_list args;
351
352    if (!bpf_verifier_log_needed(log)) {
353        return;
354    }
355
356    va_start(args, fmt);
357    bpf_verifier_vlog(log, fmt, args);
358    va_end(args);
359}
360
361static const char *ltrim(const char *s)
362{
363    while (isspace(*s)) {
364        s++;
365    }
366
367    return s;
368}
369
370__printf(3, 4) static void verbose_linfo(struct bpf_verifier_env *env, u32 insn_off, const char *prefix_fmt, ...)
371{
372    const struct bpf_line_info *linfo;
373
374    if (!bpf_verifier_log_needed(&env->log)) {
375        return;
376    }
377
378    linfo = find_linfo(env, insn_off);
379    if (!linfo || linfo == env->prev_linfo) {
380        return;
381    }
382
383    if (prefix_fmt) {
384        va_list args;
385
386        va_start(args, prefix_fmt);
387        bpf_verifier_vlog(&env->log, prefix_fmt, args);
388        va_end(args);
389    }
390
391    verbose(env, "%s\n", ltrim(btf_name_by_offset(env->prog->aux->btf, linfo->line_off)));
392
393    env->prev_linfo = linfo;
394}
395
396static bool type_is_pkt_pointer(enum bpf_reg_type type)
397{
398    return type == PTR_TO_PACKET || type == PTR_TO_PACKET_META;
399}
400
401static bool type_is_sk_pointer(enum bpf_reg_type type)
402{
403    return type == PTR_TO_SOCKET || type == PTR_TO_SOCK_COMMON || type == PTR_TO_TCP_SOCK || type == PTR_TO_XDP_SOCK;
404}
405
406static bool reg_type_not_null(enum bpf_reg_type type)
407{
408    return type == PTR_TO_SOCKET || type == PTR_TO_TCP_SOCK || type == PTR_TO_MAP_VALUE || type == PTR_TO_SOCK_COMMON;
409}
410
411static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
412{
413    return reg->type == PTR_TO_MAP_VALUE && map_value_has_spin_lock(reg->map_ptr);
414}
415
416static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
417{
418    return base_type(type) == PTR_TO_SOCKET || base_type(type) == PTR_TO_TCP_SOCK || base_type(type) == PTR_TO_MEM;
419}
420
421static bool type_is_rdonly_mem(u32 type)
422{
423    return type & MEM_RDONLY;
424}
425
426static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
427{
428    return type == ARG_PTR_TO_SOCK_COMMON;
429}
430
431static bool type_may_be_null(u32 type)
432{
433    return type & PTR_MAYBE_NULL;
434}
435
436/* Determine whether the function releases some resources allocated by another
437 * function call. The first reference type argument will be assumed to be
438 * released by release_reference().
439 */
440static bool is_release_function(enum bpf_func_id func_id)
441{
442    return func_id == BPF_FUNC_sk_release || func_id == BPF_FUNC_ringbuf_submit || func_id == BPF_FUNC_ringbuf_discard;
443}
444
445static bool may_be_acquire_function(enum bpf_func_id func_id)
446{
447    return func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp ||
448           func_id == BPF_FUNC_skc_lookup_tcp || func_id == BPF_FUNC_map_lookup_elem ||
449           func_id == BPF_FUNC_ringbuf_reserve;
450}
451
452static bool is_acquire_function(enum bpf_func_id func_id, const struct bpf_map *map)
453{
454    enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
455
456    if (func_id == BPF_FUNC_sk_lookup_tcp || func_id == BPF_FUNC_sk_lookup_udp || func_id == BPF_FUNC_skc_lookup_tcp ||
457        func_id == BPF_FUNC_ringbuf_reserve) {
458        return true;
459    }
460
461    if (func_id == BPF_FUNC_map_lookup_elem &&
462        (map_type == BPF_MAP_TYPE_SOCKMAP || map_type == BPF_MAP_TYPE_SOCKHASH)) {
463        return true;
464    }
465
466    return false;
467}
468
469static bool is_ptr_cast_function(enum bpf_func_id func_id)
470{
471    return func_id == BPF_FUNC_tcp_sock || func_id == BPF_FUNC_sk_fullsock || func_id == BPF_FUNC_skc_to_tcp_sock ||
472           func_id == BPF_FUNC_skc_to_tcp6_sock || func_id == BPF_FUNC_skc_to_udp6_sock ||
473           func_id == BPF_FUNC_skc_to_tcp_timewait_sock || func_id == BPF_FUNC_skc_to_tcp_request_sock;
474}
475
476/* string representation of 'enum bpf_reg_type'
477 *
478 * Note that reg_type_str() can not appear more than once in a single verbose()
479 * statement.
480 */
481static const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type)
482{
483    char postfix[VERIFIER_SIXTEEN] = {0}, prefix[VERIFIER_SIXTEEN] = {0};
484    static const char *const str[] = {
485        [NOT_INIT] = "?",
486        [SCALAR_VALUE] = "inv",
487        [PTR_TO_CTX] = "ctx",
488        [CONST_PTR_TO_MAP] = "map_ptr",
489        [PTR_TO_MAP_VALUE] = "map_value",
490        [PTR_TO_STACK] = "fp",
491        [PTR_TO_PACKET] = "pkt",
492        [PTR_TO_PACKET_META] = "pkt_meta",
493        [PTR_TO_PACKET_END] = "pkt_end",
494        [PTR_TO_FLOW_KEYS] = "flow_keys",
495        [PTR_TO_SOCKET] = "sock",
496        [PTR_TO_SOCK_COMMON] = "sock_common",
497        [PTR_TO_TCP_SOCK] = "tcp_sock",
498        [PTR_TO_TP_BUFFER] = "tp_buffer",
499        [PTR_TO_XDP_SOCK] = "xdp_sock",
500        [PTR_TO_BTF_ID] = "ptr_",
501        [PTR_TO_PERCPU_BTF_ID] = "percpu_ptr_",
502        [PTR_TO_MEM] = "mem",
503        [PTR_TO_BUF] = "buf",
504    };
505
506    if (type & PTR_MAYBE_NULL) {
507        if (base_type(type) == PTR_TO_BTF_ID || base_type(type) == PTR_TO_PERCPU_BTF_ID) {
508            strncpy(postfix, "or_null_", VERIFIER_SIXTEEN);
509        } else {
510            strncpy(postfix, "_or_null", VERIFIER_SIXTEEN);
511        }
512    }
513
514    if (type & MEM_RDONLY) {
515        strncpy(prefix, "rdonly_", VERIFIER_SIXTEEN);
516    }
517    if (type & MEM_ALLOC) {
518        strncpy(prefix, "alloc_", VERIFIER_SIXTEEN);
519    }
520
521    (void)snprintf(env->type_str_buf, TYPE_STR_BUF_LEN, "%s%s%s", prefix, str[base_type(type)], postfix);
522    return env->type_str_buf;
523}
524
525static char slot_type_char[] = {
526    [STACK_INVALID] = '?',
527    [STACK_SPILL] = 'r',
528    [STACK_MISC] = 'm',
529    [STACK_ZERO] = '0',
530};
531
532static void print_liveness(struct bpf_verifier_env *env, enum bpf_reg_liveness live)
533{
534    if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN | REG_LIVE_DONE)) {
535        verbose(env, "_");
536    }
537    if (live & REG_LIVE_READ) {
538        verbose(env, "r");
539    }
540    if (live & REG_LIVE_WRITTEN) {
541        verbose(env, "w");
542    }
543    if (live & REG_LIVE_DONE) {
544        verbose(env, "D");
545    }
546}
547
548static struct bpf_func_state *func(struct bpf_verifier_env *env, const struct bpf_reg_state *reg)
549{
550    struct bpf_verifier_state *cur = env->cur_state;
551
552    return cur->frame[reg->frameno];
553}
554
555const char *kernel_type_name(u32 id)
556{
557    return btf_name_by_offset(btf_vmlinux, btf_type_by_id(btf_vmlinux, id)->name_off);
558}
559
560static void print_verifier_state(struct bpf_verifier_env *env, const struct bpf_func_state *state)
561{
562    const struct bpf_reg_state *reg;
563    enum bpf_reg_type t;
564    int i;
565
566    if (state->frameno) {
567        verbose(env, " frame%d:", state->frameno);
568    }
569    for (i = 0; i < MAX_BPF_REG; i++) {
570        reg = &state->regs[i];
571        t = reg->type;
572        if (t == NOT_INIT) {
573            continue;
574        }
575        verbose(env, " R%d", i);
576        print_liveness(env, reg->live);
577        verbose(env, "=%s", reg_type_str(env, t));
578        if (t == SCALAR_VALUE && reg->precise) {
579            verbose(env, "P");
580        }
581        if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) {
582            /* reg->off should be 0 for SCALAR_VALUE */
583            verbose(env, "%lld", reg->var_off.value + reg->off);
584        } else {
585            if (base_type(t) == PTR_TO_BTF_ID || base_type(t) == PTR_TO_PERCPU_BTF_ID) {
586                verbose(env, "%s", kernel_type_name(reg->btf_id));
587            }
588            verbose(env, "(id=%d", reg->id);
589            if (reg_type_may_be_refcounted_or_null(t)) {
590                verbose(env, ",ref_obj_id=%d", reg->ref_obj_id);
591            }
592            if (t != SCALAR_VALUE) {
593                verbose(env, ",off=%d", reg->off);
594            }
595            if (type_is_pkt_pointer(t)) {
596                verbose(env, ",r=%d", reg->range);
597            } else if (base_type(t) == CONST_PTR_TO_MAP || base_type(t) == PTR_TO_MAP_VALUE) {
598                verbose(env, ",ks=%d,vs=%d", reg->map_ptr->key_size, reg->map_ptr->value_size);
599            }
600            if (tnum_is_const(reg->var_off)) {
601                /* Typically an immediate SCALAR_VALUE, but
602                 * could be a pointer whose offset is too big
603                 * for reg->off
604                 */
605                verbose(env, ",imm=%llx", reg->var_off.value);
606            } else {
607                if (reg->smin_value != reg->umin_value && reg->smin_value != S64_MIN) {
608                    verbose(env, ",smin_value=%lld", (long long)reg->smin_value);
609                }
610                if (reg->smax_value != reg->umax_value && reg->smax_value != S64_MAX) {
611                    verbose(env, ",smax_value=%lld", (long long)reg->smax_value);
612                }
613                if (reg->umin_value != 0) {
614                    verbose(env, ",umin_value=%llu", (unsigned long long)reg->umin_value);
615                }
616                if (reg->umax_value != U64_MAX) {
617                    verbose(env, ",umax_value=%llu", (unsigned long long)reg->umax_value);
618                }
619                if (!tnum_is_unknown(reg->var_off)) {
620                    char tn_buf[48];
621
622                    tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
623                    verbose(env, ",var_off=%s", tn_buf);
624                }
625                if (reg->s32_min_value != reg->smin_value && reg->s32_min_value != S32_MIN) {
626                    verbose(env, ",s32_min_value=%d", (int)(reg->s32_min_value));
627                }
628                if (reg->s32_max_value != reg->smax_value && reg->s32_max_value != S32_MAX) {
629                    verbose(env, ",s32_max_value=%d", (int)(reg->s32_max_value));
630                }
631                if (reg->u32_min_value != reg->umin_value && reg->u32_min_value != U32_MIN) {
632                    verbose(env, ",u32_min_value=%d", (int)(reg->u32_min_value));
633                }
634                if (reg->u32_max_value != reg->umax_value && reg->u32_max_value != U32_MAX) {
635                    verbose(env, ",u32_max_value=%d", (int)(reg->u32_max_value));
636                }
637            }
638            verbose(env, ")");
639        }
640    }
641    for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
642        char types_buf[BPF_REG_SIZE + 1];
643        bool valid = false;
644        int j;
645
646        for (j = 0; j < BPF_REG_SIZE; j++) {
647            if (state->stack[i].slot_type[j] != STACK_INVALID) {
648                valid = true;
649            }
650            types_buf[j] = slot_type_char[state->stack[i].slot_type[j]];
651        }
652        types_buf[BPF_REG_SIZE] = 0;
653        if (!valid) {
654            continue;
655        }
656        verbose(env, " fp%d", (-i - 1) * BPF_REG_SIZE);
657        print_liveness(env, state->stack[i].spilled_ptr.live);
658        if (state->stack[i].slot_type[0] == STACK_SPILL) {
659            reg = &state->stack[i].spilled_ptr;
660            t = reg->type;
661            verbose(env, "=%s", reg_type_str(env, t));
662            if (t == SCALAR_VALUE && reg->precise) {
663                verbose(env, "P");
664            }
665            if (t == SCALAR_VALUE && tnum_is_const(reg->var_off)) {
666                verbose(env, "%lld", reg->var_off.value + reg->off);
667            }
668        } else {
669            verbose(env, "=%s", types_buf);
670        }
671    }
672    if (state->acquired_refs && state->refs[0].id) {
673        verbose(env, " refs=%d", state->refs[0].id);
674        for (i = 1; i < state->acquired_refs; i++) {
675            if (state->refs[i].id) {
676                verbose(env, ",%d", state->refs[i].id);
677            }
678        }
679    }
680    verbose(env, "\n");
681}
682
683#define COPY_STATE_FN(NAME, COUNT, FIELD, SIZE)                                                                        \
684    static int copy_##NAME##_state(struct bpf_func_state *dst, const struct bpf_func_state *src)                       \
685    {                                                                                                                  \
686        if (!src->FIELD)                                                                                               \
687            return 0;                                                                                                  \
688        if (WARN_ON_ONCE(dst->COUNT < src->COUNT)) {                                                                   \
689            /* internal bug, make state invalid to reject the program */                                               \
690            memset(dst, 0, sizeof(*dst));                                                                              \
691            return -EFAULT;                                                                                            \
692        }                                                                                                              \
693        memcpy(dst->FIELD, src->FIELD, sizeof(*src->FIELD) * (src->COUNT / (SIZE)));                                   \
694        return 0;                                                                                                      \
695    }
696/* copy_reference_state() */
697COPY_STATE_FN(reference, acquired_refs, refs, 1)
698/* copy_stack_state() */
699COPY_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
700#undef COPY_STATE_FN
701
702#define REALLOC_STATE_FN(NAME, COUNT, FIELD, SIZE)                                                                     \
703    static int realloc_##NAME##_state(struct bpf_func_state *state, int size, bool copy_old)                           \
704    {                                                                                                                  \
705        u32 old_size = state->COUNT;                                                                                   \
706        struct bpf_##NAME##_state *new_##FIELD;                                                                        \
707        int slot = size / (SIZE);                                                                                      \
708                                                                                                                       \
709        if (size <= old_size || !size) {                                                                               \
710            if (copy_old)                                                                                              \
711                return 0;                                                                                              \
712            state->COUNT = slot * (SIZE);                                                                              \
713            if (!size && old_size) {                                                                                   \
714                kfree(state->FIELD);                                                                                   \
715                state->FIELD = NULL;                                                                                   \
716            }                                                                                                          \
717            return 0;                                                                                                  \
718        }                                                                                                              \
719        new_##FIELD = kmalloc_array(slot, sizeof(struct bpf_##NAME##_state), GFP_KERNEL);                              \
720        if (!new_##FIELD)                                                                                              \
721            return -ENOMEM;                                                                                            \
722        if (copy_old) {                                                                                                \
723            if (state->FIELD)                                                                                          \
724                memcpy(new_##FIELD, state->FIELD, sizeof(*new_##FIELD) * (old_size / (SIZE)));                         \
725            memset(new_##FIELD + old_size / (SIZE), 0, sizeof(*new_##FIELD) * (size - old_size) / (SIZE));             \
726        }                                                                                                              \
727        state->COUNT = slot * (SIZE);                                                                                  \
728        kfree(state->FIELD);                                                                                           \
729        state->FIELD = new_##FIELD;                                                                                    \
730        return 0;                                                                                                      \
731    }
732/* realloc_reference_state() */
733REALLOC_STATE_FN(reference, acquired_refs, refs, 1)
734/* realloc_stack_state() */
735REALLOC_STATE_FN(stack, allocated_stack, stack, BPF_REG_SIZE)
736#undef REALLOC_STATE_FN
737
738/* do_check() starts with zero-sized stack in struct bpf_verifier_state to
739 * make it consume minimal amount of memory. check_stack_write() access from
740 * the program calls into realloc_func_state() to grow the stack size.
741 * Note there is a non-zero 'parent' pointer inside bpf_verifier_state
742 * which realloc_stack_state() copies over. It points to previous
743 * bpf_verifier_state which is never reallocated.
744 */
745static int realloc_func_state(struct bpf_func_state *state, int stack_size, int refs_size, bool copy_old)
746{
747    int err = realloc_reference_state(state, refs_size, copy_old);
748    if (err) {
749        return err;
750    }
751    return realloc_stack_state(state, stack_size, copy_old);
752}
753
754/* Acquire a pointer id from the env and update the state->refs to include
755 * this new pointer reference.
756 * On success, returns a valid pointer id to associate with the register
757 * On failure, returns a negative errno.
758 */
759static int acquire_reference_state(struct bpf_verifier_env *env, int insn_idx)
760{
761    struct bpf_func_state *state = cur_func(env);
762    int new_ofs = state->acquired_refs;
763    int id, err;
764
765    err = realloc_reference_state(state, state->acquired_refs + 1, true);
766    if (err) {
767        return err;
768    }
769    id = ++env->id_gen;
770    state->refs[new_ofs].id = id;
771    state->refs[new_ofs].insn_idx = insn_idx;
772
773    return id;
774}
775
776/* release function corresponding to acquire_reference_state(). Idempotent. */
777static int release_reference_state(struct bpf_func_state *state, int ptr_id)
778{
779    int i, last_idx;
780
781    last_idx = state->acquired_refs - 1;
782    for (i = 0; i < state->acquired_refs; i++) {
783        if (state->refs[i].id == ptr_id) {
784            if (last_idx && i != last_idx) {
785                memcpy(&state->refs[i], &state->refs[last_idx], sizeof(*state->refs));
786            }
787            memset(&state->refs[last_idx], 0, sizeof(*state->refs));
788            state->acquired_refs--;
789            return 0;
790        }
791    }
792    return -EINVAL;
793}
794
795static int transfer_reference_state(struct bpf_func_state *dst, struct bpf_func_state *src)
796{
797    int err = realloc_reference_state(dst, src->acquired_refs, false);
798    if (err) {
799        return err;
800    }
801    err = copy_reference_state(dst, src);
802    if (err) {
803        return err;
804    }
805    return 0;
806}
807
808static void free_func_state(struct bpf_func_state *state)
809{
810    if (!state) {
811        return;
812    }
813    kfree(state->refs);
814    kfree(state->stack);
815    kfree(state);
816}
817
818static void clear_jmp_history(struct bpf_verifier_state *state)
819{
820    kfree(state->jmp_history);
821    state->jmp_history = NULL;
822    state->jmp_history_cnt = 0;
823}
824
825static void free_verifier_state(struct bpf_verifier_state *state, bool free_self)
826{
827    int i;
828
829    for (i = 0; i <= state->curframe; i++) {
830        free_func_state(state->frame[i]);
831        state->frame[i] = NULL;
832    }
833    clear_jmp_history(state);
834    if (free_self) {
835        kfree(state);
836    }
837}
838
839/* copy verifier state from src to dst growing dst stack space
840 * when necessary to accommodate larger src stack
841 */
842static int copy_func_state(struct bpf_func_state *dst, const struct bpf_func_state *src)
843{
844    int err;
845
846    err = realloc_func_state(dst, src->allocated_stack, src->acquired_refs, false);
847    if (err) {
848        return err;
849    }
850    memcpy(dst, src, offsetof(struct bpf_func_state, acquired_refs));
851    err = copy_reference_state(dst, src);
852    if (err) {
853        return err;
854    }
855    return copy_stack_state(dst, src);
856}
857
858static int copy_verifier_state(struct bpf_verifier_state *dst_state, const struct bpf_verifier_state *src)
859{
860    struct bpf_func_state *dst;
861    u32 jmp_sz = sizeof(struct bpf_idx_pair) * src->jmp_history_cnt;
862    int i, err;
863
864    if (dst_state->jmp_history_cnt < src->jmp_history_cnt) {
865        kfree(dst_state->jmp_history);
866        dst_state->jmp_history = kmalloc(jmp_sz, GFP_USER);
867        if (!dst_state->jmp_history) {
868            return -ENOMEM;
869        }
870    }
871    memcpy(dst_state->jmp_history, src->jmp_history, jmp_sz);
872    dst_state->jmp_history_cnt = src->jmp_history_cnt;
873
874    /* if dst has more stack frames then src frame, free them */
875    for (i = src->curframe + 1; i <= dst_state->curframe; i++) {
876        free_func_state(dst_state->frame[i]);
877        dst_state->frame[i] = NULL;
878    }
879    dst_state->speculative = src->speculative;
880    dst_state->curframe = src->curframe;
881    dst_state->active_spin_lock = src->active_spin_lock;
882    dst_state->branches = src->branches;
883    dst_state->parent = src->parent;
884    dst_state->first_insn_idx = src->first_insn_idx;
885    dst_state->last_insn_idx = src->last_insn_idx;
886    for (i = 0; i <= src->curframe; i++) {
887        dst = dst_state->frame[i];
888        if (!dst) {
889            dst = kzalloc(sizeof(*dst), GFP_KERNEL);
890            if (!dst) {
891                return -ENOMEM;
892            }
893            dst_state->frame[i] = dst;
894        }
895        err = copy_func_state(dst, src->frame[i]);
896        if (err) {
897            return err;
898        }
899    }
900    return 0;
901}
902
903static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
904{
905    while (st) {
906        u32 br = --st->branches;
907
908        /* WARN_ON(br > 1) technically makes sense here,
909         * but see comment in push_stack(), hence:
910         */
911        WARN_ONCE((int)br < 0, "BUG update_branch_counts:branches_to_explore=%d\n", br);
912        if (br) {
913            break;
914        }
915        st = st->parent;
916    }
917}
918
919static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx, int *insn_idx, bool pop_log)
920{
921    struct bpf_verifier_state *cur = env->cur_state;
922    struct bpf_verifier_stack_elem *elem, *head = env->head;
923    int err;
924
925    if (env->head == NULL) {
926        return -ENOENT;
927    }
928
929    if (cur) {
930        err = copy_verifier_state(cur, &head->st);
931        if (err) {
932            return err;
933        }
934    }
935    if (pop_log) {
936        bpf_vlog_reset(&env->log, head->log_pos);
937    }
938    if (insn_idx) {
939        *insn_idx = head->insn_idx;
940    }
941    if (prev_insn_idx) {
942        *prev_insn_idx = head->prev_insn_idx;
943    }
944    elem = head->next;
945    free_verifier_state(&head->st, false);
946    kfree(head);
947    env->head = elem;
948    env->stack_size--;
949    return 0;
950}
951
952static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx,
953                                             bool speculative)
954{
955    struct bpf_verifier_state *cur = env->cur_state;
956    struct bpf_verifier_stack_elem *elem;
957    int err;
958
959    elem = kzalloc(sizeof(struct bpf_verifier_stack_elem), GFP_KERNEL);
960    if (!elem) {
961        goto err;
962    }
963
964    elem->insn_idx = insn_idx;
965    elem->prev_insn_idx = prev_insn_idx;
966    elem->next = env->head;
967    elem->log_pos = env->log.len_used;
968    env->head = elem;
969    env->stack_size++;
970    err = copy_verifier_state(&elem->st, cur);
971    if (err) {
972        goto err;
973    }
974    elem->st.speculative |= speculative;
975    if (env->stack_size > BPF_COMPLEXITY_LIMIT_JMP_SEQ) {
976        verbose(env, "The sequence of %d jumps is too complex.\n", env->stack_size);
977        goto err;
978    }
979    if (elem->st.parent) {
980        ++elem->st.parent->branches;
981        /* WARN_ON(branches > 2) technically makes sense here,
982         * but
983         * 1. speculative states will bump 'branches' for non-branch
984         * instructions
985         * 2. is_state_visited() heuristics may decide not to create
986         * a new state for a sequence of branches and all such current
987         * and cloned states will be pointing to a single parent state
988         * which might have large 'branches' count.
989         */
990    }
991    return &elem->st;
992err:
993    free_verifier_state(env->cur_state, true);
994    env->cur_state = NULL;
995    /* pop all elements and return */
996    while (!pop_stack(env, NULL, NULL, false)) {
997        ;
998    }
999    return NULL;
1000}
1001
1002#define CALLER_SAVED_REGS 6
1003static const int caller_saved[CALLER_SAVED_REGS] = {BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5};
1004
1005static void verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg);
1006
1007/* This helper doesn't clear reg->id */
1008static void verifier2_mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1009{
1010    reg->var_off = tnum_const(imm);
1011    reg->smin_value = (s64)imm;
1012    reg->smax_value = (s64)imm;
1013    reg->umin_value = imm;
1014    reg->umax_value = imm;
1015
1016    reg->s32_min_value = (s32)imm;
1017    reg->s32_max_value = (s32)imm;
1018    reg->u32_min_value = (u32)imm;
1019    reg->u32_max_value = (u32)imm;
1020}
1021
1022/* Mark the unknown part of a register (variable offset or scalar value) as
1023 * known to have the value @imm.
1024 */
1025static void verifier_mark_reg_known(struct bpf_reg_state *reg, u64 imm)
1026{
1027    /* Clear id, off, and union(map_ptr, range) */
1028    memset(((u8 *)reg) + sizeof(reg->type), 0, offsetof(struct bpf_reg_state, var_off) - sizeof(reg->type));
1029    verifier2_mark_reg_known(reg, imm);
1030}
1031
1032static void verifier_mark_reg32_known(struct bpf_reg_state *reg, u64 imm)
1033{
1034    reg->var_off = tnum_const_subreg(reg->var_off, imm);
1035    reg->s32_min_value = (s32)imm;
1036    reg->s32_max_value = (s32)imm;
1037    reg->u32_min_value = (u32)imm;
1038    reg->u32_max_value = (u32)imm;
1039}
1040
1041/* Mark the 'variable offset' part of a register as zero.  This should be
1042 * used only on registers holding a pointer type.
1043 */
1044static void verifier_mark_reg_known_zero(struct bpf_reg_state *reg)
1045{
1046    verifier_mark_reg_known(reg, 0);
1047}
1048
1049static void verifier_mark_reg_const_zero(struct bpf_reg_state *reg)
1050{
1051    verifier_mark_reg_known(reg, 0);
1052    reg->type = SCALAR_VALUE;
1053}
1054
1055static void mark_reg_known_zero(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1056{
1057    if (WARN_ON(regno >= MAX_BPF_REG)) {
1058        verbose(env, "mark_reg_known_zero(regs, %u)\n", regno);
1059        /* Something bad happened, let's kill all regs */
1060        for (regno = 0; regno < MAX_BPF_REG; regno++) {
1061            verifier_mark_reg_not_init(env, regs + regno);
1062        }
1063        return;
1064    }
1065    verifier_mark_reg_known_zero(regs + regno);
1066}
1067
1068static bool reg_is_pkt_pointer(const struct bpf_reg_state *reg)
1069{
1070    return type_is_pkt_pointer(reg->type);
1071}
1072
1073static bool reg_is_pkt_pointer_any(const struct bpf_reg_state *reg)
1074{
1075    return reg_is_pkt_pointer(reg) || reg->type == PTR_TO_PACKET_END;
1076}
1077
1078/* Unmodified PTR_TO_PACKET[_META,_END] register from ctx access. */
1079static bool reg_is_init_pkt_pointer(const struct bpf_reg_state *reg, enum bpf_reg_type which)
1080{
1081    /* The register can already have a range from prior markings.
1082     * This is fine as long as it hasn't been advanced from its
1083     * origin.
1084     */
1085    return reg->type == which && reg->id == 0 && reg->off == 0 && tnum_equals_const(reg->var_off, 0);
1086}
1087
1088/* Reset the min/max bounds of a register */
1089static void verifier_mark_reg_unbounded(struct bpf_reg_state *reg)
1090{
1091    reg->smin_value = S64_MIN;
1092    reg->smax_value = S64_MAX;
1093    reg->umin_value = 0;
1094    reg->umax_value = U64_MAX;
1095
1096    reg->s32_min_value = S32_MIN;
1097    reg->s32_max_value = S32_MAX;
1098    reg->u32_min_value = 0;
1099    reg->u32_max_value = U32_MAX;
1100}
1101
1102static void verifier_mark_reg64_unbounded(struct bpf_reg_state *reg)
1103{
1104    reg->smin_value = S64_MIN;
1105    reg->smax_value = S64_MAX;
1106    reg->umin_value = 0;
1107    reg->umax_value = U64_MAX;
1108}
1109
1110static void verifier_mark_reg32_unbounded(struct bpf_reg_state *reg)
1111{
1112    reg->s32_min_value = S32_MIN;
1113    reg->s32_max_value = S32_MAX;
1114    reg->u32_min_value = 0;
1115    reg->u32_max_value = U32_MAX;
1116}
1117
1118static void verifier_update_reg32_bounds(struct bpf_reg_state *reg)
1119{
1120    struct tnum var32_off = tnum_subreg(reg->var_off);
1121
1122    /* min signed is max(sign bit) | min(other bits) */
1123    reg->s32_min_value = max_t(s32, reg->s32_min_value, var32_off.value | (var32_off.mask & S32_MIN));
1124    /* max signed is min(sign bit) | max(other bits) */
1125    reg->s32_max_value = min_t(s32, reg->s32_max_value, var32_off.value | (var32_off.mask & S32_MAX));
1126    reg->u32_min_value = max_t(u32, reg->u32_min_value, (u32)var32_off.value);
1127    reg->u32_max_value = min(reg->u32_max_value, (u32)(var32_off.value | var32_off.mask));
1128}
1129
1130static void verifier_update_reg64_bounds(struct bpf_reg_state *reg)
1131{
1132    /* min signed is max(sign bit) | min(other bits) */
1133    reg->smin_value = max_t(s64, reg->smin_value, reg->var_off.value | (reg->var_off.mask & S64_MIN));
1134    /* max signed is min(sign bit) | max(other bits) */
1135    reg->smax_value = min_t(s64, reg->smax_value, reg->var_off.value | (reg->var_off.mask & S64_MAX));
1136    reg->umin_value = max(reg->umin_value, reg->var_off.value);
1137    reg->umax_value = min(reg->umax_value, reg->var_off.value | reg->var_off.mask);
1138}
1139
1140static void verifier_update_reg_bounds(struct bpf_reg_state *reg)
1141{
1142    verifier_update_reg32_bounds(reg);
1143    verifier_update_reg64_bounds(reg);
1144}
1145
1146/* Uses signed min/max values to inform unsigned, and vice-versa */
1147static void verifier_reg32_deduce_bounds(struct bpf_reg_state *reg)
1148{
1149    /* Learn sign from signed bounds.
1150     * If we cannot cross the sign boundary, then signed and unsigned bounds
1151     * are the same, so combine.  This works even in the negative case, e.g.
1152     * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1153     */
1154    if (reg->s32_min_value >= 0 || reg->s32_max_value < 0) {
1155        reg->s32_min_value = reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
1156        reg->s32_max_value = reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
1157        return;
1158    }
1159    /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1160     * boundary, so we must be careful.
1161     */
1162    if ((s32)reg->u32_max_value >= 0) {
1163        /* Positive.  We can't learn anything from the smin, but smax
1164         * is positive, hence safe.
1165         */
1166        reg->s32_min_value = reg->u32_min_value;
1167        reg->s32_max_value = reg->u32_max_value = min_t(u32, reg->s32_max_value, reg->u32_max_value);
1168    } else if ((s32)reg->u32_min_value < 0) {
1169        /* Negative.  We can't learn anything from the smax, but smin
1170         * is negative, hence safe.
1171         */
1172        reg->s32_min_value = reg->u32_min_value = max_t(u32, reg->s32_min_value, reg->u32_min_value);
1173        reg->s32_max_value = reg->u32_max_value;
1174    }
1175}
1176
1177static void verifier_reg64_deduce_bounds(struct bpf_reg_state *reg)
1178{
1179    /* Learn sign from signed bounds.
1180     * If we cannot cross the sign boundary, then signed and unsigned bounds
1181     * are the same, so combine.  This works even in the negative case, e.g.
1182     * -3 s<= x s<= -1 implies 0xf...fd u<= x u<= 0xf...ff.
1183     */
1184    if (reg->smin_value >= 0 || reg->smax_value < 0) {
1185        reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
1186        reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
1187        return;
1188    }
1189    /* Learn sign from unsigned bounds.  Signed bounds cross the sign
1190     * boundary, so we must be careful.
1191     */
1192    if ((s64)reg->umax_value >= 0) {
1193        /* Positive.  We can't learn anything from the smin, but smax
1194         * is positive, hence safe.
1195         */
1196        reg->smin_value = reg->umin_value;
1197        reg->smax_value = reg->umax_value = min_t(u64, reg->smax_value, reg->umax_value);
1198    } else if ((s64)reg->umin_value < 0) {
1199        /* Negative.  We can't learn anything from the smax, but smin
1200         * is negative, hence safe.
1201         */
1202        reg->smin_value = reg->umin_value = max_t(u64, reg->smin_value, reg->umin_value);
1203        reg->smax_value = reg->umax_value;
1204    }
1205}
1206
1207static void verifier_reg_deduce_bounds(struct bpf_reg_state *reg)
1208{
1209    verifier_reg32_deduce_bounds(reg);
1210    verifier_reg64_deduce_bounds(reg);
1211}
1212
1213/* Attempts to improve var_off based on unsigned min/max information */
1214static void verifier_reg_bound_offset(struct bpf_reg_state *reg)
1215{
1216    struct tnum var64_off = tnum_intersect(reg->var_off, tnum_range(reg->umin_value, reg->umax_value));
1217    struct tnum var32_off =
1218        tnum_intersect(tnum_subreg(reg->var_off), tnum_range(reg->u32_min_value, reg->u32_max_value));
1219
1220    reg->var_off = tnum_or(tnum_clear_subreg(var64_off), var32_off);
1221}
1222
1223static void reg_bounds_sync(struct bpf_reg_state *reg)
1224{
1225    /* We might have learned new bounds from the var_off. */
1226    verifier_update_reg_bounds(reg);
1227    /* We might have learned something about the sign bit. */
1228    verifier_reg_deduce_bounds(reg);
1229    /* We might have learned some bits from the bounds. */
1230    verifier_reg_bound_offset(reg);
1231    /* Intersecting with the old var_off might have improved our bounds
1232     * slightly, e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1233     * then new var_off is (0; 0x7f...fc) which improves our umax.
1234     */
1235    verifier_update_reg_bounds(reg);
1236}
1237static bool verifier_reg32_bound_s64(s32 a)
1238{
1239    return a >= 0 && a <= S32_MAX;
1240}
1241
1242static void verifier_reg_assign_32_into_64(struct bpf_reg_state *reg)
1243{
1244    reg->umin_value = reg->u32_min_value;
1245    reg->umax_value = reg->u32_max_value;
1246
1247    /* Attempt to pull 32-bit signed bounds into 64-bit bounds but must
1248     * be positive otherwise set to worse case bounds and refine later
1249     * from tnum.
1250     */
1251    if (verifier_reg32_bound_s64(reg->s32_min_value) && verifier_reg32_bound_s64(reg->s32_max_value)) {
1252        reg->smin_value = reg->s32_min_value;
1253        reg->smax_value = reg->s32_max_value;
1254    } else {
1255        reg->smin_value = 0;
1256        reg->smax_value = U32_MAX;
1257    }
1258}
1259
1260static void verifier_reg_combine_32_into_64(struct bpf_reg_state *reg)
1261{
1262    /* special case when 64-bit register has upper 32-bit register
1263     * zeroed. Typically happens after zext or <<32, >>32 sequence
1264     * allowing us to use 32-bit bounds directly,
1265     */
1266    if (tnum_equals_const(tnum_clear_subreg(reg->var_off), 0)) {
1267        verifier_reg_assign_32_into_64(reg);
1268    } else {
1269        /* Otherwise the best we can do is push lower 32bit known and
1270         * unknown bits into register (var_off set from jmp logic)
1271         * then learn as much as possible from the 64-bit tnum
1272         * known and unknown bits. The previous smin/smax bounds are
1273         * invalid here because of jmp32 compare so mark them unknown
1274         * so they do not impact tnum bounds calculation.
1275         */
1276        verifier_mark_reg64_unbounded(reg);
1277        verifier_update_reg_bounds(reg);
1278    }
1279
1280    /* Intersecting with the old var_off might have improved our bounds
1281     * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1282     * then new var_off is (0; 0x7f...fc) which improves our umax.
1283     */
1284    reg_bounds_sync(reg);
1285}
1286
1287static bool verifier_reg64_bound_s32(s64 a)
1288{
1289    return a > S32_MIN && a < S32_MAX;
1290}
1291
1292static bool verifier_reg64_bound_u32(u64 a)
1293{
1294    return a > U32_MIN && a < U32_MAX;
1295}
1296
1297static void __reg_combine_64_into_32(struct bpf_reg_state *reg)
1298{
1299    verifier_mark_reg32_unbounded(reg);
1300
1301    if (verifier_reg64_bound_s32(reg->smin_value) && verifier_reg64_bound_s32(reg->smax_value)) {
1302        reg->s32_min_value = (s32)reg->smin_value;
1303        reg->s32_max_value = (s32)reg->smax_value;
1304    }
1305    if (verifier_reg64_bound_u32(reg->umin_value) && verifier_reg64_bound_u32(reg->umax_value)) {
1306        reg->u32_min_value = (u32)reg->umin_value;
1307        reg->u32_max_value = (u32)reg->umax_value;
1308    }
1309
1310    /* Intersecting with the old var_off might have improved our bounds
1311     * slightly.  e.g. if umax was 0x7f...f and var_off was (0; 0xf...fc),
1312     * then new var_off is (0; 0x7f...fc) which improves our umax.
1313     */
1314    reg_bounds_sync(reg);
1315}
1316
1317/* Mark a register as having a completely unknown (scalar) value. */
1318static void __mark_reg_unknown(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1319{
1320    /*
1321     * Clear type, id, off, and union(map_ptr, range) and
1322     * padding between 'type' and union
1323     */
1324    memset(reg, 0, offsetof(struct bpf_reg_state, var_off));
1325    reg->type = SCALAR_VALUE;
1326    reg->var_off = tnum_unknown;
1327    reg->frameno = 0;
1328    reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
1329    verifier_mark_reg_unbounded(reg);
1330}
1331
1332static void mark_reg_unknown(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1333{
1334    if (WARN_ON(regno >= MAX_BPF_REG)) {
1335        verbose(env, "mark_reg_unknown(regs, %u)\n", regno);
1336        /* Something bad happened, let's kill all regs except FP */
1337        for (regno = 0; regno < BPF_REG_FP; regno++) {
1338            verifier_mark_reg_not_init(env, regs + regno);
1339        }
1340        return;
1341    }
1342    __mark_reg_unknown(env, regs + regno);
1343}
1344
1345static void verifier_mark_reg_not_init(const struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1346{
1347    __mark_reg_unknown(env, reg);
1348    reg->type = NOT_INIT;
1349}
1350
1351static void mark_reg_not_init(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno)
1352{
1353    if (WARN_ON(regno >= MAX_BPF_REG)) {
1354        verbose(env, "mark_reg_not_init(regs, %u)\n", regno);
1355        /* Something bad happened, let's kill all regs except FP */
1356        for (regno = 0; regno < BPF_REG_FP; regno++) {
1357            verifier_mark_reg_not_init(env, regs + regno);
1358        }
1359        return;
1360    }
1361    verifier_mark_reg_not_init(env, regs + regno);
1362}
1363
1364static void mark_btf_ld_reg(struct bpf_verifier_env *env, struct bpf_reg_state *regs, u32 regno,
1365                            enum bpf_reg_type reg_type, u32 btf_id)
1366{
1367    if (reg_type == SCALAR_VALUE) {
1368        mark_reg_unknown(env, regs, regno);
1369        return;
1370    }
1371    mark_reg_known_zero(env, regs, regno);
1372    regs[regno].type = PTR_TO_BTF_ID;
1373    regs[regno].btf_id = btf_id;
1374}
1375
1376#define DEF_NOT_SUBREG (0)
1377static void init_reg_state(struct bpf_verifier_env *env, struct bpf_func_state *state)
1378{
1379    struct bpf_reg_state *regs = state->regs;
1380    int i;
1381
1382    for (i = 0; i < MAX_BPF_REG; i++) {
1383        mark_reg_not_init(env, regs, i);
1384        regs[i].live = REG_LIVE_NONE;
1385        regs[i].parent = NULL;
1386        regs[i].subreg_def = DEF_NOT_SUBREG;
1387    }
1388
1389    /* frame pointer */
1390    regs[BPF_REG_FP].type = PTR_TO_STACK;
1391    mark_reg_known_zero(env, regs, BPF_REG_FP);
1392    regs[BPF_REG_FP].frameno = state->frameno;
1393}
1394
1395#define BPF_MAIN_FUNC (-1)
1396static void init_func_state(struct bpf_verifier_env *env, struct bpf_func_state *state, int callsite, int frameno,
1397                            int subprogno)
1398{
1399    state->callsite = callsite;
1400    state->frameno = frameno;
1401    state->subprogno = subprogno;
1402    init_reg_state(env, state);
1403}
1404
1405enum reg_arg_type {
1406    SRC_OP,        /* register is used as source operand */
1407    DST_OP,        /* register is used as destination operand */
1408    DST_OP_NO_MARK /* same as above, check only, don't mark */
1409};
1410
1411static int cmp_subprogs(const void *a, const void *b)
1412{
1413    return ((struct bpf_subprog_info *)a)->start - ((struct bpf_subprog_info *)b)->start;
1414}
1415
1416static int find_subprog(struct bpf_verifier_env *env, int off)
1417{
1418    struct bpf_subprog_info *p;
1419
1420    p = bsearch(&off, env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs);
1421    if (!p) {
1422        return -ENOENT;
1423    }
1424    return p - env->subprog_info;
1425}
1426
1427static int add_subprog(struct bpf_verifier_env *env, int off)
1428{
1429    int insn_cnt = env->prog->len;
1430    int ret;
1431
1432    if (off >= insn_cnt || off < 0) {
1433        verbose(env, "call to invalid destination\n");
1434        return -EINVAL;
1435    }
1436    ret = find_subprog(env, off);
1437    if (ret >= 0) {
1438        return 0;
1439    }
1440    if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
1441        verbose(env, "too many subprograms\n");
1442        return -E2BIG;
1443    }
1444    env->subprog_info[env->subprog_cnt++].start = off;
1445    sort(env->subprog_info, env->subprog_cnt, sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
1446    return 0;
1447}
1448
1449static int check_subprogs(struct bpf_verifier_env *env)
1450{
1451    int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
1452    struct bpf_subprog_info *subprog = env->subprog_info;
1453    struct bpf_insn *insn = env->prog->insnsi;
1454    int insn_cnt = env->prog->len;
1455
1456    /* Add entry function. */
1457    ret = add_subprog(env, 0);
1458    if (ret < 0) {
1459        return ret;
1460    }
1461
1462    /* determine subprog starts. The end is one before the next starts */
1463    for (i = 0; i < insn_cnt; i++) {
1464        if (insn[i].code != (BPF_JMP | BPF_CALL)) {
1465            continue;
1466        }
1467        if (insn[i].src_reg != BPF_PSEUDO_CALL) {
1468            continue;
1469        }
1470        if (!env->bpf_capable) {
1471            verbose(env, "function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
1472            return -EPERM;
1473        }
1474        ret = add_subprog(env, i + insn[i].imm + 1);
1475        if (ret < 0) {
1476            return ret;
1477        }
1478    }
1479
1480    /* Add a fake 'exit' subprog which could simplify subprog iteration
1481     * logic. 'subprog_cnt' should not be increased.
1482     */
1483    subprog[env->subprog_cnt].start = insn_cnt;
1484
1485    if (env->log.level & BPF_LOG_LEVEL2) {
1486        for (i = 0; i < env->subprog_cnt; i++) {
1487            verbose(env, "func#%d @%d\n", i, subprog[i].start);
1488        }
1489    }
1490
1491    /* now check that all jumps are within the same subprog */
1492    subprog_start = subprog[cur_subprog].start;
1493    subprog_end = subprog[cur_subprog + 1].start;
1494    for (i = 0; i < insn_cnt; i++) {
1495        u8 code = insn[i].code;
1496
1497        if (code == (BPF_JMP | BPF_CALL) && insn[i].imm == BPF_FUNC_tail_call && insn[i].src_reg != BPF_PSEUDO_CALL) {
1498            subprog[cur_subprog].has_tail_call = true;
1499        }
1500        if (BPF_CLASS(code) == BPF_LD && (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) {
1501            subprog[cur_subprog].has_ld_abs = true;
1502        }
1503        if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) {
1504            goto next;
1505        }
1506        if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) {
1507            goto next;
1508        }
1509        off = i + insn[i].off + 1;
1510        if (off < subprog_start || off >= subprog_end) {
1511            verbose(env, "jump out of range from insn %d to %d\n", i, off);
1512            return -EINVAL;
1513        }
1514    next:
1515        if (i == subprog_end - 1) {
1516            /* to avoid fall-through from one subprog into another
1517             * the last insn of the subprog should be either exit
1518             * or unconditional jump back
1519             */
1520            if (code != (BPF_JMP | BPF_EXIT) && code != (BPF_JMP | BPF_JA)) {
1521                verbose(env, "last insn is not an exit or jmp\n");
1522                return -EINVAL;
1523            }
1524            subprog_start = subprog_end;
1525            cur_subprog++;
1526            if (cur_subprog < env->subprog_cnt) {
1527                subprog_end = subprog[cur_subprog + 1].start;
1528            }
1529        }
1530    }
1531    return 0;
1532}
1533
1534/* Parentage chain of this register (or stack slot) should take care of all
1535 * issues like callee-saved registers, stack slot allocation time, etc.
1536 */
1537static int mark_reg_read(struct bpf_verifier_env *env, const struct bpf_reg_state *state, struct bpf_reg_state *parent,
1538                         u8 flag)
1539{
1540    bool writes = parent == state->parent; /* Observe write marks */
1541    int cnt = 0;
1542
1543    while (parent) {
1544        /* if read wasn't screened by an earlier write ... */
1545        if (writes && (state->live & REG_LIVE_WRITTEN)) {
1546            break;
1547        }
1548        if (parent->live & REG_LIVE_DONE) {
1549            verbose(env, "verifier BUG type %s var_off %lld off %d\n", reg_type_str(env, parent->type),
1550                    parent->var_off.value, parent->off);
1551            return -EFAULT;
1552        }
1553        /* The first condition is more likely to be true than the
1554         * second, checked it first.
1555         */
1556        if ((parent->live & REG_LIVE_READ) == flag || (parent->live & REG_LIVE_READ64)) {
1557            /* The parentage chain never changes and
1558             * this parent was already marked as LIVE_READ.
1559             * There is no need to keep walking the chain again and
1560             * keep re-marking all parents as LIVE_READ.
1561             * This case happens when the same register is read
1562             * multiple times without writes into it in-between.
1563             * Also, if parent has the stronger REG_LIVE_READ64 set,
1564             * then no need to set the weak REG_LIVE_READ32.
1565             */
1566            break;
1567        }
1568        /* ... then we depend on parent's value */
1569        parent->live |= flag;
1570        /* REG_LIVE_READ64 overrides REG_LIVE_READ32. */
1571        if (flag == REG_LIVE_READ64) {
1572            parent->live &= ~REG_LIVE_READ32;
1573        }
1574        state = parent;
1575        parent = state->parent;
1576        writes = true;
1577        cnt++;
1578    }
1579
1580    if (env->longest_mark_read_walk < cnt) {
1581        env->longest_mark_read_walk = cnt;
1582    }
1583    return 0;
1584}
1585
1586/* This function is supposed to be used by the following 32-bit optimization
1587 * code only. It returns TRUE if the source or destination register operates
1588 * on 64-bit, otherwise return FALSE.
1589 */
1590static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn, u32 regno, struct bpf_reg_state *reg,
1591                     enum reg_arg_type t)
1592{
1593    u8 code, class, op;
1594
1595    code = insn->code;
1596    class = BPF_CLASS(code);
1597    op = BPF_OP(code);
1598    if (class == BPF_JMP) {
1599        /* BPF_EXIT for "main" will reach here. Return TRUE
1600         * conservatively.
1601         */
1602        if (op == BPF_EXIT) {
1603            return true;
1604        }
1605        if (op == BPF_CALL) {
1606            /* BPF to BPF call will reach here because of marking
1607             * caller saved clobber with DST_OP_NO_MARK for which we
1608             * don't care the register def because they are anyway
1609             * marked as NOT_INIT already.
1610             */
1611            if (insn->src_reg == BPF_PSEUDO_CALL) {
1612                return false;
1613            }
1614            /* Helper call will reach here because of arg type
1615             * check, conservatively return TRUE.
1616             */
1617            if (t == SRC_OP) {
1618                return true;
1619            }
1620
1621            return false;
1622        }
1623    }
1624
1625    if (class == BPF_ALU64 || class == BPF_JMP ||
1626        /* BPF_END always use BPF_ALU class. */
1627        (class == BPF_ALU && op == BPF_END && insn->imm == VERIFIER_SIXTYFOUR)) {
1628        return true;
1629    }
1630
1631    if (class == BPF_ALU || class == BPF_JMP32) {
1632        return false;
1633    }
1634
1635    if (class == BPF_LDX) {
1636        if (t != SRC_OP) {
1637            return BPF_SIZE(code) == BPF_DW;
1638        }
1639        /* LDX source must be ptr. */
1640        return true;
1641    }
1642
1643    if (class == BPF_STX) {
1644        if (reg->type != SCALAR_VALUE) {
1645            return true;
1646        }
1647        return BPF_SIZE(code) == BPF_DW;
1648    }
1649
1650    if (class == BPF_LD) {
1651        u8 mode = BPF_MODE(code);
1652        /* LD_IMM64 */
1653        if (mode == BPF_IMM) {
1654            return true;
1655        }
1656
1657        /* Both LD_IND and LD_ABS return 32-bit data. */
1658        if (t != SRC_OP) {
1659            return false;
1660        }
1661
1662        /* Implicit ctx ptr. */
1663        if (regno == BPF_REG_6) {
1664            return true;
1665        }
1666
1667        /* Explicit source could be any width. */
1668        return true;
1669    }
1670
1671    if (class == BPF_ST) {
1672        /* The only source register for BPF_ST is a ptr. */
1673        return true;
1674    }
1675
1676    /* Conservatively return true at default. */
1677    return true;
1678}
1679
1680/* Return TRUE if INSN doesn't have explicit value define. */
1681static bool insn_no_def(struct bpf_insn *insn)
1682{
1683    u8 class = BPF_CLASS(insn->code);
1684
1685    return (class == BPF_JMP || class == BPF_JMP32 || class == BPF_STX || class == BPF_ST);
1686}
1687
1688/* Return TRUE if INSN has defined any 32-bit value explicitly. */
1689static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
1690{
1691    if (insn_no_def(insn)) {
1692        return false;
1693    }
1694
1695    return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
1696}
1697
1698static void mark_insn_zext(struct bpf_verifier_env *env, struct bpf_reg_state *reg)
1699{
1700    s32 def_idx = reg->subreg_def;
1701
1702    if (def_idx == DEF_NOT_SUBREG) {
1703        return;
1704    }
1705
1706    env->insn_aux_data[def_idx - 1].zext_dst = true;
1707    /* The dst will be zero extended, so won't be sub-register anymore. */
1708    reg->subreg_def = DEF_NOT_SUBREG;
1709}
1710
1711static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, enum reg_arg_type t)
1712{
1713    struct bpf_verifier_state *vstate = env->cur_state;
1714    struct bpf_func_state *state = vstate->frame[vstate->curframe];
1715    struct bpf_insn *insn = env->prog->insnsi + env->insn_idx;
1716    struct bpf_reg_state *reg, *regs = state->regs;
1717    bool rw64;
1718
1719    if (regno >= MAX_BPF_REG) {
1720        verbose(env, "R%d is invalid\n", regno);
1721        return -EINVAL;
1722    }
1723
1724    reg = &regs[regno];
1725    rw64 = is_reg64(env, insn, regno, reg, t);
1726    if (t == SRC_OP) {
1727        /* check whether register used as source operand can be read */
1728        if (reg->type == NOT_INIT) {
1729            verbose(env, "R%d !read_ok\n", regno);
1730            return -EACCES;
1731        }
1732        /* We don't need to worry about FP liveness because it's read-only */
1733        if (regno == BPF_REG_FP) {
1734            return 0;
1735        }
1736
1737        if (rw64) {
1738            mark_insn_zext(env, reg);
1739        }
1740
1741        return mark_reg_read(env, reg, reg->parent, rw64 ? REG_LIVE_READ64 : REG_LIVE_READ32);
1742    } else {
1743        /* check whether register used as dest operand can be written to */
1744        if (regno == BPF_REG_FP) {
1745            verbose(env, "frame pointer is read only\n");
1746            return -EACCES;
1747        }
1748        reg->live |= REG_LIVE_WRITTEN;
1749        reg->subreg_def = rw64 ? DEF_NOT_SUBREG : env->insn_idx + 1;
1750        if (t == DST_OP) {
1751            mark_reg_unknown(env, regs, regno);
1752        }
1753    }
1754    return 0;
1755}
1756
1757/* for any branch, call, exit record the history of jmps in the given state */
1758static int push_jmp_history(struct bpf_verifier_env *env, struct bpf_verifier_state *cur)
1759{
1760    u32 cnt = cur->jmp_history_cnt;
1761    struct bpf_idx_pair *p;
1762
1763    cnt++;
1764    p = krealloc(cur->jmp_history, cnt * sizeof(*p), GFP_USER);
1765    if (!p) {
1766        return -ENOMEM;
1767    }
1768    p[cnt - 1].idx = env->insn_idx;
1769    p[cnt - 1].prev_idx = env->prev_insn_idx;
1770    cur->jmp_history = p;
1771    cur->jmp_history_cnt = cnt;
1772    return 0;
1773}
1774
1775/* Backtrack one insn at a time. If idx is not at the top of recorded
1776 * history then previous instruction came from straight line execution.
1777 */
1778static int get_prev_insn_idx(struct bpf_verifier_state *st, int i, u32 *history)
1779{
1780    u32 cnt = *history;
1781
1782    if (cnt && st->jmp_history[cnt - 1].idx == i) {
1783        i = st->jmp_history[cnt - 1].prev_idx;
1784        (*history)--;
1785    } else {
1786        i--;
1787    }
1788    return i;
1789}
1790
1791/* For given verifier state backtrack_insn() is called from the last insn to
1792 * the first insn. Its purpose is to compute a bitmask of registers and
1793 * stack slots that needs precision in the parent verifier state.
1794 */
1795static int backtrack_insn(struct bpf_verifier_env *env, int idx, u32 *reg_mask, u64 *stack_mask)
1796{
1797    const struct bpf_insn_cbs cbs = {
1798        .cb_print = verbose,
1799        .private_data = env,
1800    };
1801    struct bpf_insn *insn = env->prog->insnsi + idx;
1802    u8 class = BPF_CLASS(insn->code);
1803    u8 opcode = BPF_OP(insn->code);
1804    u8 mode = BPF_MODE(insn->code);
1805    u32 dreg = 1u << insn->dst_reg;
1806    u32 sreg = 1u << insn->src_reg;
1807    u32 spi;
1808
1809    if (insn->code == 0) {
1810        return 0;
1811    }
1812    if (env->log.level & BPF_LOG_LEVEL) {
1813        verbose(env, "regs=%x stack=%llx before ", *reg_mask, *stack_mask);
1814        verbose(env, "%d: ", idx);
1815        print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
1816    }
1817
1818    if (class == BPF_ALU || class == BPF_ALU64) {
1819        if (!(*reg_mask & dreg)) {
1820            return 0;
1821        }
1822        if (opcode == BPF_MOV) {
1823            if (BPF_SRC(insn->code) == BPF_X) {
1824                /* dreg = sreg
1825                 * dreg needs precision after this insn
1826                 * sreg needs precision before this insn
1827                 */
1828                *reg_mask &= ~dreg;
1829                *reg_mask |= sreg;
1830            } else {
1831                /* dreg = K
1832                 * dreg needs precision after this insn.
1833                 * Corresponding register is already marked
1834                 * as precise=true in this verifier state.
1835                 * No further markings in parent are necessary
1836                 */
1837                *reg_mask &= ~dreg;
1838            }
1839        } else {
1840            if (BPF_SRC(insn->code) == BPF_X) {
1841                /* dreg += sreg
1842                 * both dreg and sreg need precision
1843                 * before this insn
1844                 */
1845                *reg_mask |= sreg;
1846            }
1847            /* else dreg += K
1848             * dreg still needs precision before this insn
1849             */
1850        }
1851    } else if (class == BPF_LDX) {
1852        if (!(*reg_mask & dreg)) {
1853            return 0;
1854        }
1855        *reg_mask &= ~dreg;
1856
1857        /* scalars can only be spilled into stack w/o losing precision.
1858         * Load from any other memory can be zero extended.
1859         * The desire to keep that precision is already indicated
1860         * by 'precise' mark in corresponding register of this state.
1861         * No further tracking necessary.
1862         */
1863        if (insn->src_reg != BPF_REG_FP) {
1864            return 0;
1865        }
1866        if (BPF_SIZE(insn->code) != BPF_DW) {
1867            return 0;
1868        }
1869
1870        /* dreg = *(u64 *)[fp - off] was a fill from the stack.
1871         * that [fp - off] slot contains scalar that needs to be
1872         * tracked with precision
1873         */
1874        spi = (-insn->off - 1) / BPF_REG_SIZE;
1875        if (spi >= VERIFIER_SIXTYFOUR) {
1876            verbose(env, "BUG spi %d\n", spi);
1877            WARN_ONCE(1, "verifier backtracking bug");
1878            return -EFAULT;
1879        }
1880        *stack_mask |= 1ull << spi;
1881    } else if (class == BPF_STX || class == BPF_ST) {
1882        if (*reg_mask & dreg) {
1883            /* stx & st shouldn't be using _scalar_ dst_reg
1884             * to access memory. It means backtracking
1885             * encountered a case of pointer subtraction.
1886             */
1887            return -ENOTSUPP;
1888        }
1889        /* scalars can only be spilled into stack */
1890        if (insn->dst_reg != BPF_REG_FP) {
1891            return 0;
1892        }
1893        if (BPF_SIZE(insn->code) != BPF_DW) {
1894            return 0;
1895        }
1896        spi = (-insn->off - 1) / BPF_REG_SIZE;
1897        if (spi >= VERIFIER_SIXTYFOUR) {
1898            verbose(env, "BUG spi %d\n", spi);
1899            WARN_ONCE(1, "verifier backtracking bug");
1900            return -EFAULT;
1901        }
1902        if (!(*stack_mask & (1ull << spi))) {
1903            return 0;
1904        }
1905        *stack_mask &= ~(1ull << spi);
1906        if (class == BPF_STX) {
1907            *reg_mask |= sreg;
1908        }
1909    } else if (class == BPF_JMP || class == BPF_JMP32) {
1910        if (opcode == BPF_CALL) {
1911            if (insn->src_reg == BPF_PSEUDO_CALL) {
1912                return -ENOTSUPP;
1913            }
1914            /* regular helper call sets R0 */
1915            *reg_mask &= ~1;
1916            if (*reg_mask & 0x3f) {
1917                /* if backtracing was looking for registers R1-R5
1918                 * they should have been found already.
1919                 */
1920                verbose(env, "BUG regs %x\n", *reg_mask);
1921                WARN_ONCE(1, "verifier backtracking bug");
1922                return -EFAULT;
1923            }
1924        } else if (opcode == BPF_EXIT) {
1925            return -ENOTSUPP;
1926        }
1927    } else if (class == BPF_LD) {
1928        if (!(*reg_mask & dreg)) {
1929            return 0;
1930        }
1931        *reg_mask &= ~dreg;
1932        /* It's ld_imm64 or ld_abs or ld_ind.
1933         * For ld_imm64 no further tracking of precision
1934         * into parent is necessary
1935         */
1936        if (mode == BPF_IND || mode == BPF_ABS) {
1937            /* to be analyzed */
1938            return -ENOTSUPP;
1939        }
1940    }
1941    return 0;
1942}
1943
1944/* the scalar precision tracking algorithm:
1945 * . at the start all registers have precise=false.
1946 * . scalar ranges are tracked as normal through alu and jmp insns.
1947 * . once precise value of the scalar register is used in:
1948 *   .  ptr + scalar alu
1949 *   . if (scalar cond K|scalar)
1950 *   .  helper_call(.., scalar, ...) where ARG_CONST is expected
1951 *   backtrack through the verifier states and mark all registers and
1952 *   stack slots with spilled constants that these scalar regisers
1953 *   should be precise.
1954 * . during state pruning two registers (or spilled stack slots)
1955 *   are equivalent if both are not precise.
1956 *
1957 * Note the verifier cannot simply walk register parentage chain,
1958 * since many different registers and stack slots could have been
1959 * used to compute single precise scalar.
1960 *
1961 * The approach of starting with precise=true for all registers and then
1962 * backtrack to mark a register as not precise when the verifier detects
1963 * that program doesn't care about specific value (e.g., when helper
1964 * takes register as ARG_ANYTHING parameter) is not safe.
1965 *
1966 * It's ok to walk single parentage chain of the verifier states.
1967 * It's possible that this backtracking will go all the way till 1st insn.
1968 * All other branches will be explored for needing precision later.
1969 *
1970 * The backtracking needs to deal with cases like:
1971 *   R8=map_value(id=0,off=0,ks=4,vs=1952,imm=0) R9_w=map_value(id=0,off=40,ks=4,vs=1952,imm=0)
1972 * r9 -= r8
1973 * r5 = r9
1974 * if r5 > 0x79f goto pc+7
1975 *    R5_w=inv(id=0,umax_value=1951,var_off=(0x0; 0x7ff))
1976 * r5 += 1
1977 * ...
1978 * call bpf_perf_event_output#25
1979 *   where .arg5_type = ARG_CONST_SIZE_OR_ZERO
1980 *
1981 * and this case:
1982 * r6 = 1
1983 * call foo // uses callee's r6 inside to compute r0
1984 * r0 += r6
1985 * if r0 == 0 goto
1986 *
1987 * to track above reg_mask/stack_mask needs to be independent for each frame.
1988 *
1989 * Also if parent's curframe > frame where backtracking started,
1990 * the verifier need to mark registers in both frames, otherwise callees
1991 * may incorrectly prune callers. This is similar to
1992 * commit 7640ead93924 ("bpf: verifier: make sure callees don't prune with caller differences")
1993 *
1994 * For now backtracking falls back into conservative marking.
1995 */
1996static void mark_all_scalars_precise(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
1997{
1998    struct bpf_func_state *func;
1999    struct bpf_reg_state *reg;
2000    int i, j;
2001
2002    /* big hammer: mark all scalars precise in this path.
2003     * pop_stack may still get !precise scalars.
2004     */
2005    for (; st; st = st->parent) {
2006        for (i = 0; i <= st->curframe; i++) {
2007            func = st->frame[i];
2008            for (j = 0; j < BPF_REG_FP; j++) {
2009                reg = &func->regs[j];
2010                if (reg->type != SCALAR_VALUE) {
2011                    continue;
2012                }
2013                reg->precise = true;
2014            }
2015            for (j = 0; j < func->allocated_stack / BPF_REG_SIZE; j++) {
2016                if (func->stack[j].slot_type[0] != STACK_SPILL) {
2017                    continue;
2018                }
2019                reg = &func->stack[j].spilled_ptr;
2020                if (reg->type != SCALAR_VALUE) {
2021                    continue;
2022                }
2023                reg->precise = true;
2024            }
2025        }
2026    }
2027}
2028
2029static int __mark_chain_precision(struct bpf_verifier_env *env, int regno, int spi)
2030{
2031    struct bpf_verifier_state *st = env->cur_state;
2032    int first_idx = st->first_insn_idx;
2033    int last_idx = env->insn_idx;
2034    struct bpf_func_state *func;
2035    struct bpf_reg_state *reg;
2036    u32 reg_mask = regno >= 0 ? 1u << regno : 0;
2037    u64 stack_mask = spi >= 0 ? 1ull << spi : 0;
2038    bool skip_first = true;
2039    bool new_marks = false;
2040    int i, err;
2041
2042    if (!env->bpf_capable) {
2043        return 0;
2044    }
2045
2046    func = st->frame[st->curframe];
2047    if (regno >= 0) {
2048        reg = &func->regs[regno];
2049        if (reg->type != SCALAR_VALUE) {
2050            WARN_ONCE(1, "backtracing misuse");
2051            return -EFAULT;
2052        }
2053        if (!reg->precise) {
2054            new_marks = true;
2055        } else {
2056            reg_mask = 0;
2057        }
2058        reg->precise = true;
2059    }
2060
2061    while (spi >= 0) {
2062        if (func->stack[spi].slot_type[0] != STACK_SPILL) {
2063            stack_mask = 0;
2064            break;
2065        }
2066        reg = &func->stack[spi].spilled_ptr;
2067        if (reg->type != SCALAR_VALUE) {
2068            stack_mask = 0;
2069            break;
2070        }
2071        if (!reg->precise) {
2072            new_marks = true;
2073        } else {
2074            stack_mask = 0;
2075        }
2076        reg->precise = true;
2077        break;
2078    }
2079
2080    if (!new_marks) {
2081        return 0;
2082    }
2083    if (!reg_mask && !stack_mask) {
2084        return 0;
2085    }
2086    for (;;) {
2087        DECLARE_BITMAP(mask, VERIFIER_SIXTYFOUR);
2088        u32 history = st->jmp_history_cnt;
2089
2090        if (env->log.level & BPF_LOG_LEVEL) {
2091            verbose(env, "last_idx %d first_idx %d\n", last_idx, first_idx);
2092        }
2093        for (i = last_idx;;) {
2094            if (skip_first) {
2095                err = 0;
2096                skip_first = false;
2097            } else {
2098                err = backtrack_insn(env, i, &reg_mask, &stack_mask);
2099            }
2100            if (err == -ENOTSUPP) {
2101                mark_all_scalars_precise(env, st);
2102                return 0;
2103            } else if (err) {
2104                return err;
2105            }
2106            if (!reg_mask && !stack_mask) {
2107                /* Found assignment(s) into tracked register in this state.
2108                 * Since this state is already marked, just return.
2109                 * Nothing to be tracked further in the parent state.
2110                 */
2111                return 0;
2112            }
2113            if (i == first_idx) {
2114                break;
2115            }
2116            i = get_prev_insn_idx(st, i, &history);
2117            if (i >= env->prog->len) {
2118                /* This can happen if backtracking reached insn 0
2119                 * and there are still reg_mask or stack_mask
2120                 * to backtrack.
2121                 * It means the backtracking missed the spot where
2122                 * particular register was initialized with a constant.
2123                 */
2124                verbose(env, "BUG backtracking idx %d\n", i);
2125                WARN_ONCE(1, "verifier backtracking bug");
2126                return -EFAULT;
2127            }
2128        }
2129        st = st->parent;
2130        if (!st) {
2131            break;
2132        }
2133
2134        new_marks = false;
2135        func = st->frame[st->curframe];
2136        bitmap_from_u64(mask, reg_mask);
2137        for_each_set_bit(i, mask, 0x20)
2138        {
2139            reg = &func->regs[i];
2140            if (reg->type != SCALAR_VALUE) {
2141                reg_mask &= ~(1u << i);
2142                continue;
2143            }
2144            if (!reg->precise) {
2145                new_marks = true;
2146            }
2147            reg->precise = true;
2148        }
2149
2150        bitmap_from_u64(mask, stack_mask);
2151        for_each_set_bit(i, mask, VERIFIER_SIXTYFOUR)
2152        {
2153            if (i >= func->allocated_stack / BPF_REG_SIZE) {
2154                /* the sequence of instructions:
2155                 * 2: (bf) r3 = r10
2156                 * 3: (7b) *(u64 *)(r3 -8) = r0
2157                 * 4: (79) r4 = *(u64 *)(r10 -8)
2158                 * doesn't contain jmps. It's backtracked
2159                 * as a single block.
2160                 * During backtracking insn 3 is not recognized as
2161                 * stack access, so at the end of backtracking
2162                 * stack slot fp-8 is still marked in stack_mask.
2163                 * However the parent state may not have accessed
2164                 * fp-8 and it's "unallocated" stack space.
2165                 * In such case fallback to conservative.
2166                 */
2167                mark_all_scalars_precise(env, st);
2168                return 0;
2169            }
2170
2171            if (func->stack[i].slot_type[0] != STACK_SPILL) {
2172                stack_mask &= ~(1ull << i);
2173                continue;
2174            }
2175            reg = &func->stack[i].spilled_ptr;
2176            if (reg->type != SCALAR_VALUE) {
2177                stack_mask &= ~(1ull << i);
2178                continue;
2179            }
2180            if (!reg->precise) {
2181                new_marks = true;
2182            }
2183            reg->precise = true;
2184        }
2185        if (env->log.level & BPF_LOG_LEVEL) {
2186            print_verifier_state(env, func);
2187            verbose(env, "parent %s regs=%x stack=%llx marks\n", new_marks ? "didn't have" : "already had", reg_mask,
2188                    stack_mask);
2189        }
2190
2191        if (!reg_mask && !stack_mask) {
2192            break;
2193        }
2194        if (!new_marks) {
2195            break;
2196        }
2197
2198        last_idx = st->last_insn_idx;
2199        first_idx = st->first_insn_idx;
2200    }
2201    return 0;
2202}
2203
2204static int mark_chain_precision(struct bpf_verifier_env *env, int regno)
2205{
2206    return __mark_chain_precision(env, regno, -1);
2207}
2208
2209static int mark_chain_precision_stack(struct bpf_verifier_env *env, int spi)
2210{
2211    return __mark_chain_precision(env, -1, spi);
2212}
2213
2214static bool is_spillable_regtype(enum bpf_reg_type type)
2215{
2216    switch (base_type(type)) {
2217        case PTR_TO_MAP_VALUE:
2218        case PTR_TO_STACK:
2219        case PTR_TO_CTX:
2220        case PTR_TO_PACKET:
2221        case PTR_TO_PACKET_META:
2222        case PTR_TO_PACKET_END:
2223        case PTR_TO_FLOW_KEYS:
2224        case CONST_PTR_TO_MAP:
2225        case PTR_TO_SOCKET:
2226        case PTR_TO_SOCK_COMMON:
2227        case PTR_TO_TCP_SOCK:
2228        case PTR_TO_XDP_SOCK:
2229        case PTR_TO_BTF_ID:
2230        case PTR_TO_BUF:
2231        case PTR_TO_PERCPU_BTF_ID:
2232        case PTR_TO_MEM:
2233            return true;
2234        default:
2235            return false;
2236    }
2237}
2238
2239/* Does this register contain a constant zero? */
2240static bool register_is_null(struct bpf_reg_state *reg)
2241{
2242    return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0);
2243}
2244
2245static bool register_is_const(struct bpf_reg_state *reg)
2246{
2247    return reg->type == SCALAR_VALUE && tnum_is_const(reg->var_off);
2248}
2249
2250static bool __is_scalar_unbounded(struct bpf_reg_state *reg)
2251{
2252    return tnum_is_unknown(reg->var_off) && reg->smin_value == S64_MIN && reg->smax_value == S64_MAX &&
2253           reg->umin_value == 0 && reg->umax_value == U64_MAX && reg->s32_min_value == S32_MIN &&
2254           reg->s32_max_value == S32_MAX && reg->u32_min_value == 0 && reg->u32_max_value == U32_MAX;
2255}
2256
2257static bool register_is_bounded(struct bpf_reg_state *reg)
2258{
2259    return reg->type == SCALAR_VALUE && !__is_scalar_unbounded(reg);
2260}
2261
2262static bool __is_pointer_value(bool allow_ptr_leaks, const struct bpf_reg_state *reg)
2263{
2264    if (allow_ptr_leaks) {
2265        return false;
2266    }
2267
2268    return reg->type != SCALAR_VALUE;
2269}
2270
2271static void save_register_state(struct bpf_func_state *state, int spi, struct bpf_reg_state *reg)
2272{
2273    int i;
2274
2275    state->stack[spi].spilled_ptr = *reg;
2276    state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2277
2278    for (i = 0; i < BPF_REG_SIZE; i++) {
2279        state->stack[spi].slot_type[i] = STACK_SPILL;
2280    }
2281}
2282
2283/* check_stack_{read,write}_fixed_off functions track spill/fill of registers,
2284 * stack boundary and alignment are checked in check_mem_access()
2285 */
2286static int check_stack_write_fixed_off(struct bpf_verifier_env *env,
2287                                       /* stack frame we're writing to */
2288                                       struct bpf_func_state *state, int off, int size, int value_regno, int insn_idx)
2289{
2290    struct bpf_func_state *cur; /* state of the current function */
2291    int i, slot = -off - 1, spi = slot / BPF_REG_SIZE, err;
2292    u32 dst_reg = env->prog->insnsi[insn_idx].dst_reg;
2293    struct bpf_reg_state *reg = NULL;
2294
2295    err = realloc_func_state(state, round_up(slot + 1, BPF_REG_SIZE), state->acquired_refs, true);
2296    if (err) {
2297        return err;
2298    }
2299    /* caller checked that off % size == 0 and -MAX_BPF_STACK <= off < 0,
2300     * so it's aligned access and [off, off + size) are within stack limits
2301     */
2302    if (!env->allow_ptr_leaks && state->stack[spi].slot_type[0] == STACK_SPILL && size != BPF_REG_SIZE) {
2303        verbose(env, "attempt to corrupt spilled pointer on stack\n");
2304        return -EACCES;
2305    }
2306
2307    cur = env->cur_state->frame[env->cur_state->curframe];
2308    if (value_regno >= 0) {
2309        reg = &cur->regs[value_regno];
2310    }
2311    if (!env->bypass_spec_v4) {
2312        bool sanitize = reg && is_spillable_regtype(reg->type);
2313
2314        for (i = 0; i < size; i++) {
2315            if (state->stack[spi].slot_type[i] == STACK_INVALID) {
2316                sanitize = true;
2317                break;
2318            }
2319        }
2320
2321        if (sanitize) {
2322            env->insn_aux_data[insn_idx].sanitize_stack_spill = true;
2323        }
2324    }
2325
2326    if (reg && size == BPF_REG_SIZE && register_is_bounded(reg) && !register_is_null(reg) && env->bpf_capable) {
2327        if (dst_reg != BPF_REG_FP) {
2328            /* The backtracking logic can only recognize explicit
2329             * stack slot address like [fp - 8]. Other spill of
2330             * scalar via different register has to be conervative.
2331             * Backtrack from here and mark all registers as precise
2332             * that contributed into 'reg' being a constant.
2333             */
2334            err = mark_chain_precision(env, value_regno);
2335            if (err) {
2336                return err;
2337            }
2338        }
2339        save_register_state(state, spi, reg);
2340    } else if (reg && is_spillable_regtype(reg->type)) {
2341        /* register containing pointer is being spilled into stack */
2342        if (size != BPF_REG_SIZE) {
2343            verbose_linfo(env, insn_idx, "; ");
2344            verbose(env, "invalid size of register spill\n");
2345            return -EACCES;
2346        }
2347        if (state != cur && reg->type == PTR_TO_STACK) {
2348            verbose(env, "cannot spill pointers to stack into stack frame of the caller\n");
2349            return -EINVAL;
2350        }
2351        save_register_state(state, spi, reg);
2352    } else {
2353        u8 type = STACK_MISC;
2354
2355        /* regular write of data into stack destroys any spilled ptr */
2356        state->stack[spi].spilled_ptr.type = NOT_INIT;
2357        /* Mark slots as STACK_MISC if they belonged to spilled ptr. */
2358        if (state->stack[spi].slot_type[0] == STACK_SPILL) {
2359            for (i = 0; i < BPF_REG_SIZE; i++) {
2360                state->stack[spi].slot_type[i] = STACK_MISC;
2361            }
2362        }
2363
2364        /* only mark the slot as written if all 8 bytes were written
2365         * otherwise read propagation may incorrectly stop too soon
2366         * when stack slots are partially written.
2367         * This heuristic means that read propagation will be
2368         * conservative, since it will add reg_live_read marks
2369         * to stack slots all the way to first state when programs
2370         * writes+reads less than 8 bytes
2371         */
2372        if (size == BPF_REG_SIZE) {
2373            state->stack[spi].spilled_ptr.live |= REG_LIVE_WRITTEN;
2374        }
2375
2376        /* when we zero initialize stack slots mark them as such */
2377        if (reg && register_is_null(reg)) {
2378            /* backtracking doesn't work for STACK_ZERO yet. */
2379            err = mark_chain_precision(env, value_regno);
2380            if (err) {
2381                return err;
2382            }
2383            type = STACK_ZERO;
2384        }
2385
2386        /* Mark slots affected by this stack write. */
2387        for (i = 0; i < size; i++) {
2388            state->stack[spi].slot_type[(slot - i) % BPF_REG_SIZE] = type;
2389        }
2390    }
2391    return 0;
2392}
2393
2394/* Write the stack: 'stack[ptr_regno + off] = value_regno'. 'ptr_regno' is
2395 * known to contain a variable offset.
2396 * This function checks whether the write is permitted and conservatively
2397 * tracks the effects of the write, considering that each stack slot in the
2398 * dynamic range is potentially written to.
2399 *
2400 * 'off' includes 'regno->off'.
2401 * 'value_regno' can be -1, meaning that an unknown value is being written to
2402 * the stack.
2403 *
2404 * Spilled pointers in range are not marked as written because we don't know
2405 * what's going to be actually written. This means that read propagation for
2406 * future reads cannot be terminated by this write.
2407 *
2408 * For privileged programs, uninitialized stack slots are considered
2409 * initialized by this write (even though we don't know exactly what offsets
2410 * are going to be written to). The idea is that we don't want the verifier to
2411 * reject future reads that access slots written to through variable offsets.
2412 */
2413static int check_stack_write_var_off(struct bpf_verifier_env *env,
2414                                     /* func where register points to */
2415                                     struct bpf_func_state *state, int ptr_regno, int off, int size, int value_regno,
2416                                     int insn_idx)
2417{
2418    struct bpf_func_state *cur; /* state of the current function */
2419    int min_off, max_off;
2420    int i, err;
2421    struct bpf_reg_state *ptr_reg = NULL, *value_reg = NULL;
2422    bool writing_zero = false;
2423    /* set if the fact that we're writing a zero is used to let any
2424     * stack slots remain STACK_ZERO
2425     */
2426    bool zero_used = false;
2427
2428    cur = env->cur_state->frame[env->cur_state->curframe];
2429    ptr_reg = &cur->regs[ptr_regno];
2430    min_off = ptr_reg->smin_value + off;
2431    max_off = ptr_reg->smax_value + off + size;
2432    if (value_regno >= 0) {
2433        value_reg = &cur->regs[value_regno];
2434    }
2435    if (value_reg && register_is_null(value_reg)) {
2436        writing_zero = true;
2437    }
2438
2439    err = realloc_func_state(state, round_up(-min_off, BPF_REG_SIZE), state->acquired_refs, true);
2440    if (err) {
2441        return err;
2442    }
2443
2444    /* Variable offset writes destroy any spilled pointers in range. */
2445    for (i = min_off; i < max_off; i++) {
2446        u8 new_type, *stype;
2447        int slot, spi;
2448
2449        slot = -i - 1;
2450        spi = slot / BPF_REG_SIZE;
2451        stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
2452
2453        if (!env->allow_ptr_leaks && *stype != NOT_INIT && *stype != SCALAR_VALUE) {
2454            /* Reject the write if there's are spilled pointers in
2455             * range. If we didn't reject here, the ptr status
2456             * would be erased below (even though not all slots are
2457             * actually overwritten), possibly opening the door to
2458             * leaks.
2459             */
2460            verbose(env, "spilled ptr in range of var-offset stack write; insn %d, ptr off: %d", insn_idx, i);
2461            return -EINVAL;
2462        }
2463
2464        /* Erase all spilled pointers. */
2465        state->stack[spi].spilled_ptr.type = NOT_INIT;
2466
2467        /* Update the slot type. */
2468        new_type = STACK_MISC;
2469        if (writing_zero && *stype == STACK_ZERO) {
2470            new_type = STACK_ZERO;
2471            zero_used = true;
2472        }
2473        /* If the slot is STACK_INVALID, we check whether it's OK to
2474         * pretend that it will be initialized by this write. The slot
2475         * might not actually be written to, and so if we mark it as
2476         * initialized future reads might leak uninitialized memory.
2477         * For privileged programs, we will accept such reads to slots
2478         * that may or may not be written because, if we're reject
2479         * them, the error would be too confusing.
2480         */
2481        if (*stype == STACK_INVALID && !env->allow_uninit_stack) {
2482            verbose(env, "uninit stack in range of var-offset write prohibited for !root; insn %d, off: %d", insn_idx,
2483                    i);
2484            return -EINVAL;
2485        }
2486        *stype = new_type;
2487    }
2488    if (zero_used) {
2489        /* backtracking doesn't work for STACK_ZERO yet. */
2490        err = mark_chain_precision(env, value_regno);
2491        if (err) {
2492            return err;
2493        }
2494    }
2495    return 0;
2496}
2497
2498/* When register 'dst_regno' is assigned some values from stack[min_off,
2499 * max_off), we set the register's type according to the types of the
2500 * respective stack slots. If all the stack values are known to be zeros, then
2501 * so is the destination reg. Otherwise, the register is considered to be
2502 * SCALAR. This function does not deal with register filling; the caller must
2503 * ensure that all spilled registers in the stack range have been marked as
2504 * read.
2505 */
2506static void mark_reg_stack_read(struct bpf_verifier_env *env,
2507                                /* func where src register points to */
2508                                struct bpf_func_state *ptr_state, int min_off, int max_off, int dst_regno)
2509{
2510    struct bpf_verifier_state *vstate = env->cur_state;
2511    struct bpf_func_state *state = vstate->frame[vstate->curframe];
2512    int i, slot, spi;
2513    u8 *stype;
2514    int zeros = 0;
2515
2516    for (i = min_off; i < max_off; i++) {
2517        slot = -i - 1;
2518        spi = slot / BPF_REG_SIZE;
2519        stype = ptr_state->stack[spi].slot_type;
2520        if (stype[slot % BPF_REG_SIZE] != STACK_ZERO) {
2521            break;
2522        }
2523        zeros++;
2524    }
2525    if (zeros == max_off - min_off) {
2526        /* any access_size read into register is zero extended,
2527         * so the whole register == const_zero
2528         */
2529        verifier_mark_reg_const_zero(&state->regs[dst_regno]);
2530        /* backtracking doesn't support STACK_ZERO yet,
2531         * so mark it precise here, so that later
2532         * backtracking can stop here.
2533         * Backtracking may not need this if this register
2534         * doesn't participate in pointer adjustment.
2535         * Forward propagation of precise flag is not
2536         * necessary either. This mark is only to stop
2537         * backtracking. Any register that contributed
2538         * to const 0 was marked precise before spill.
2539         */
2540        state->regs[dst_regno].precise = true;
2541    } else {
2542        /* have read misc data from the stack */
2543        mark_reg_unknown(env, state->regs, dst_regno);
2544    }
2545    state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2546}
2547
2548/* Read the stack at 'off' and put the results into the register indicated by
2549 * 'dst_regno'. It handles reg filling if the addressed stack slot is a
2550 * spilled reg.
2551 *
2552 * 'dst_regno' can be -1, meaning that the read value is not going to a
2553 * register.
2554 *
2555 * The access is assumed to be within the current stack bounds.
2556 */
2557static int check_stack_read_fixed_off(struct bpf_verifier_env *env,
2558                                      /* func where src register points to */
2559                                      struct bpf_func_state *reg_state, int off, int size, int dst_regno)
2560{
2561    struct bpf_verifier_state *vstate = env->cur_state;
2562    struct bpf_func_state *state = vstate->frame[vstate->curframe];
2563    int i, slot = -off - 1, spi = slot / BPF_REG_SIZE;
2564    struct bpf_reg_state *reg;
2565    u8 *stype;
2566
2567    stype = reg_state->stack[spi].slot_type;
2568    reg = &reg_state->stack[spi].spilled_ptr;
2569
2570    if (stype[0] == STACK_SPILL) {
2571        if (size != BPF_REG_SIZE) {
2572            if (reg->type != SCALAR_VALUE) {
2573                verbose_linfo(env, env->insn_idx, "; ");
2574                verbose(env, "invalid size of register fill\n");
2575                return -EACCES;
2576            }
2577            if (dst_regno >= 0) {
2578                mark_reg_unknown(env, state->regs, dst_regno);
2579                state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2580            }
2581            mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2582            return 0;
2583        }
2584        for (i = 1; i < BPF_REG_SIZE; i++) {
2585            if (stype[(slot - i) % BPF_REG_SIZE] != STACK_SPILL) {
2586                verbose(env, "corrupted spill memory\n");
2587                return -EACCES;
2588            }
2589        }
2590
2591        if (dst_regno >= 0) {
2592            /* restore register state from stack */
2593            state->regs[dst_regno] = *reg;
2594            /* mark reg as written since spilled pointer state likely
2595             * has its liveness marks cleared by is_state_visited()
2596             * which resets stack/reg liveness for state transitions
2597             */
2598            state->regs[dst_regno].live |= REG_LIVE_WRITTEN;
2599        } else if (__is_pointer_value(env->allow_ptr_leaks, reg)) {
2600            /* If dst_regno==-1, the caller is asking us whether
2601             * it is acceptable to use this value as a SCALAR_VALUE
2602             * (e.g. for XADD).
2603             * We must not allow unprivileged callers to do that
2604             * with spilled pointers.
2605             */
2606            verbose(env, "leaking pointer from stack off %d\n", off);
2607            return -EACCES;
2608        }
2609        mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2610    } else {
2611        u8 type;
2612
2613        for (i = 0; i < size; i++) {
2614            type = stype[(slot - i) % BPF_REG_SIZE];
2615            if (type == STACK_MISC) {
2616                continue;
2617            }
2618            if (type == STACK_ZERO) {
2619                continue;
2620            }
2621            verbose(env, "invalid read from stack off %d+%d size %d\n", off, i, size);
2622            return -EACCES;
2623        }
2624        mark_reg_read(env, reg, reg->parent, REG_LIVE_READ64);
2625        if (dst_regno >= 0) {
2626            mark_reg_stack_read(env, reg_state, off, off + size, dst_regno);
2627        }
2628    }
2629    return 0;
2630}
2631
2632enum stack_access_src {
2633    ACCESS_DIRECT = 1, /* the access is performed by an instruction */
2634    ACCESS_HELPER = 2, /* the access is performed by a helper */
2635};
2636
2637static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size,
2638                                         bool zero_size_allowed, enum stack_access_src type,
2639                                         struct bpf_call_arg_meta *meta);
2640
2641static struct bpf_reg_state *reg_state(struct bpf_verifier_env *env, int regno)
2642{
2643    return cur_regs(env) + regno;
2644}
2645
2646/* Read the stack at 'ptr_regno + off' and put the result into the register
2647 * 'dst_regno'.
2648 * 'off' includes the pointer register's fixed offset(i.e. 'ptr_regno.off'),
2649 * but not its variable offset.
2650 * 'size' is assumed to be <= reg size and the access is assumed to be aligned.
2651 *
2652 * As opposed to check_stack_read_fixed_off, this function doesn't deal with
2653 * filling registers (i.e. reads of spilled register cannot be detected when
2654 * the offset is not fixed). We conservatively mark 'dst_regno' as containing
2655 * SCALAR_VALUE. That's why we assert that the 'ptr_regno' has a variable
2656 * offset; for a fixed offset check_stack_read_fixed_off should be used
2657 * instead.
2658 */
2659static int check_stack_read_var_off(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)
2660{
2661    /* The state of the source register. */
2662    struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2663    struct bpf_func_state *ptr_state = func(env, reg);
2664    int err;
2665    int min_off, max_off;
2666
2667    /* Note that we pass a NULL meta, so raw access will not be permitted.
2668     */
2669    err = check_stack_range_initialized(env, ptr_regno, off, size, false, ACCESS_DIRECT, NULL);
2670    if (err) {
2671        return err;
2672    }
2673
2674    min_off = reg->smin_value + off;
2675    max_off = reg->smax_value + off;
2676    mark_reg_stack_read(env, ptr_state, min_off, max_off + size, dst_regno);
2677    return 0;
2678}
2679
2680/* check_stack_read dispatches to check_stack_read_fixed_off or
2681 * check_stack_read_var_off.
2682 *
2683 * The caller must ensure that the offset falls within the allocated stack
2684 * bounds.
2685 *
2686 * 'dst_regno' is a register which will receive the value from the stack. It
2687 * can be -1, meaning that the read value is not going to a register.
2688 */
2689static int check_stack_read(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int dst_regno)
2690{
2691    struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2692    struct bpf_func_state *state = func(env, reg);
2693    int err;
2694    /* Some accesses are only permitted with a static offset. */
2695    bool var_off = !tnum_is_const(reg->var_off);
2696    /* The offset is required to be static when reads don't go to a
2697     * register, in order to not leak pointers (see
2698     * check_stack_read_fixed_off).
2699     */
2700    if (dst_regno < 0 && var_off) {
2701        char tn_buf[48];
2702
2703        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2704        verbose(env, "variable offset stack pointer cannot be passed into helper function; var_off=%s off=%d size=%d\n",
2705                tn_buf, off, size);
2706        return -EACCES;
2707    }
2708    /* Variable offset is prohibited for unprivileged mode for simplicity
2709     * since it requires corresponding support in Spectre masking for stack
2710     * ALU. See also retrieve_ptr_limit().
2711     */
2712    if (!env->bypass_spec_v1 && var_off) {
2713        char tn_buf[48];
2714
2715        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
2716        verbose(env, "R%d variable offset stack access prohibited for !root, var_off=%s\n", ptr_regno, tn_buf);
2717        return -EACCES;
2718    }
2719
2720    if (!var_off) {
2721        off += reg->var_off.value;
2722        err = check_stack_read_fixed_off(env, state, off, size, dst_regno);
2723    } else {
2724        /* Variable offset stack reads need more conservative handling
2725         * than fixed offset ones. Note that dst_regno >= 0 on this
2726         * branch.
2727         */
2728        err = check_stack_read_var_off(env, ptr_regno, off, size, dst_regno);
2729    }
2730    return err;
2731}
2732
2733/* check_stack_write dispatches to check_stack_write_fixed_off or
2734 * check_stack_write_var_off.
2735 *
2736 * 'ptr_regno' is the register used as a pointer into the stack.
2737 * 'off' includes 'ptr_regno->off', but not its variable offset (if any).
2738 * 'value_regno' is the register whose value we're writing to the stack. It can
2739 * be -1, meaning that we're not writing from a register.
2740 *
2741 * The caller must ensure that the offset falls within the maximum stack size.
2742 */
2743static int check_stack_write(struct bpf_verifier_env *env, int ptr_regno, int off, int size, int value_regno,
2744                             int insn_idx)
2745{
2746    struct bpf_reg_state *reg = reg_state(env, ptr_regno);
2747    struct bpf_func_state *state = func(env, reg);
2748    int err;
2749
2750    if (tnum_is_const(reg->var_off)) {
2751        off += reg->var_off.value;
2752        err = check_stack_write_fixed_off(env, state, off, size, value_regno, insn_idx);
2753    } else {
2754        /* Variable offset stack reads need more conservative handling
2755         * than fixed offset ones.
2756         */
2757        err = check_stack_write_var_off(env, state, ptr_regno, off, size, value_regno, insn_idx);
2758    }
2759    return err;
2760}
2761
2762static int check_map_access_type(struct bpf_verifier_env *env, u32 regno, int off, int size, enum bpf_access_type type)
2763{
2764    struct bpf_reg_state *regs = cur_regs(env);
2765    struct bpf_map *map = regs[regno].map_ptr;
2766    u32 cap = bpf_map_flags_to_cap(map);
2767    if (type == BPF_WRITE && !(cap & BPF_MAP_CAN_WRITE)) {
2768        verbose(env, "write into map forbidden, value_size=%d off=%d size=%d\n", map->value_size, off, size);
2769        return -EACCES;
2770    }
2771    if (type == BPF_READ && !(cap & BPF_MAP_CAN_READ)) {
2772        verbose(env, "read from map forbidden, value_size=%d off=%d size=%d\n", map->value_size, off, size);
2773        return -EACCES;
2774    }
2775
2776    return 0;
2777}
2778
2779/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
2780static int __check_mem_access(struct bpf_verifier_env *env, int regno, int off, int size, u32 mem_size,
2781                              bool zero_size_allowed)
2782{
2783    bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
2784    struct bpf_reg_state *reg;
2785
2786    if (off >= 0 && size_ok && (u64)off + size <= mem_size) {
2787        return 0;
2788    }
2789
2790    reg = &cur_regs(env)[regno];
2791    switch (reg->type) {
2792        case PTR_TO_MAP_VALUE:
2793            verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n", mem_size, off, size);
2794            break;
2795        case PTR_TO_PACKET:
2796        case PTR_TO_PACKET_META:
2797        case PTR_TO_PACKET_END:
2798            verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", off, size, regno,
2799                    reg->id, off, mem_size);
2800            break;
2801        case PTR_TO_MEM:
2802        default:
2803            verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n", mem_size, off, size);
2804    }
2805
2806    return -EACCES;
2807}
2808
2809/* check read/write into a memory region with possible variable offset */
2810static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno, int off, int size, u32 mem_size,
2811                                   bool zero_size_allowed)
2812{
2813    struct bpf_verifier_state *vstate = env->cur_state;
2814    struct bpf_func_state *state = vstate->frame[vstate->curframe];
2815    struct bpf_reg_state *reg = &state->regs[regno];
2816    int err;
2817
2818    /* We may have adjusted the register pointing to memory region, so we
2819     * need to try adding each of min_value and max_value to off
2820     * to make sure our theoretical access will be safe.
2821     */
2822    if (env->log.level & BPF_LOG_LEVEL) {
2823        print_verifier_state(env, state);
2824    }
2825
2826    /* The minimum value is only important with signed
2827     * comparisons where we can't assume the floor of a
2828     * value is 0.  If we are using signed variables for our
2829     * index'es we need to make sure that whatever we use
2830     * will have a set floor within our range.
2831     */
2832    if (reg->smin_value < 0 &&
2833        (reg->smin_value == S64_MIN || (off + reg->smin_value != (s64)(s32)(off + reg->smin_value)) ||
2834         reg->smin_value + off < 0)) {
2835        verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
2836        return -EACCES;
2837    }
2838    err = __check_mem_access(env, regno, reg->smin_value + off, size, mem_size, zero_size_allowed);
2839    if (err) {
2840        verbose(env, "R%d min value is outside of the allowed memory range\n", regno);
2841        return err;
2842    }
2843
2844    /* If we haven't set a max value then we need to bail since we can't be
2845     * sure we won't do bad things.
2846     * If reg->umax_value + off could overflow, treat that as unbounded too.
2847     */
2848    if (reg->umax_value >= BPF_MAX_VAR_OFF) {
2849        verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n", regno);
2850        return -EACCES;
2851    }
2852    err = __check_mem_access(env, regno, reg->umax_value + off, size, mem_size, zero_size_allowed);
2853    if (err) {
2854        verbose(env, "R%d max value is outside of the allowed memory range\n", regno);
2855        return err;
2856    }
2857
2858    return 0;
2859}
2860
2861/* check read/write into a map element with possible variable offset */
2862static int check_map_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)
2863{
2864    struct bpf_verifier_state *vstate = env->cur_state;
2865    struct bpf_func_state *state = vstate->frame[vstate->curframe];
2866    struct bpf_reg_state *reg = &state->regs[regno];
2867    struct bpf_map *map = reg->map_ptr;
2868    int err;
2869
2870    err = check_mem_region_access(env, regno, off, size, map->value_size, zero_size_allowed);
2871    if (err) {
2872        return err;
2873    }
2874
2875    if (map_value_has_spin_lock(map)) {
2876        u32 lock = map->spin_lock_off;
2877
2878        /* if any part of struct bpf_spin_lock can be touched by
2879         * load/store reject this program.
2880         * To check that [x1, x2) overlaps with [y1, y2)
2881         * it is sufficient to check x1 < y2 && y1 < x2.
2882         */
2883        if (reg->smin_value + off < lock + sizeof(struct bpf_spin_lock) && lock < reg->umax_value + off + size) {
2884            verbose(env, "bpf_spin_lock cannot be accessed directly by load/store\n");
2885            return -EACCES;
2886        }
2887    }
2888    return err;
2889}
2890
2891#define MAX_PACKET_OFF 0xffff
2892
2893static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
2894{
2895    return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
2896}
2897
2898static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta,
2899                                       enum bpf_access_type t)
2900{
2901    enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
2902
2903    switch (prog_type) {
2904        /* Program types only with direct read access go here! */
2905        case BPF_PROG_TYPE_LWT_IN:
2906        case BPF_PROG_TYPE_LWT_OUT:
2907        case BPF_PROG_TYPE_LWT_SEG6LOCAL:
2908        case BPF_PROG_TYPE_SK_REUSEPORT:
2909        case BPF_PROG_TYPE_FLOW_DISSECTOR:
2910        case BPF_PROG_TYPE_CGROUP_SKB:
2911            if (t == BPF_WRITE) {
2912                return false;
2913            }
2914            fallthrough;
2915
2916        /* Program types with direct read + write access go here! */
2917        case BPF_PROG_TYPE_SCHED_CLS:
2918        case BPF_PROG_TYPE_SCHED_ACT:
2919        case BPF_PROG_TYPE_XDP:
2920        case BPF_PROG_TYPE_LWT_XMIT:
2921        case BPF_PROG_TYPE_SK_SKB:
2922        case BPF_PROG_TYPE_SK_MSG:
2923            if (meta) {
2924                return meta->pkt_access;
2925            }
2926
2927            env->seen_direct_write = true;
2928            return true;
2929
2930        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
2931            if (t == BPF_WRITE) {
2932                env->seen_direct_write = true;
2933            }
2934
2935            return true;
2936
2937        default:
2938            return false;
2939    }
2940}
2941
2942static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off, int size, bool zero_size_allowed)
2943{
2944    struct bpf_reg_state *regs = cur_regs(env);
2945    struct bpf_reg_state *reg = &regs[regno];
2946    int err;
2947
2948    /* We may have added a variable offset to the packet pointer; but any
2949     * reg->range we have comes after that.  We are only checking the fixed
2950     * offset.
2951     */
2952
2953    /* We don't allow negative numbers, because we aren't tracking enough
2954     * detail to prove they're safe.
2955     */
2956    if (reg->smin_value < 0) {
2957        verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
2958        return -EACCES;
2959    }
2960    err = __check_mem_access(env, regno, off, size, reg->range, zero_size_allowed);
2961    if (err) {
2962        verbose(env, "R%d offset is outside of the packet\n", regno);
2963        return err;
2964    }
2965
2966    /* __check_mem_access has made sure "off + size - 1" is within u16.
2967     * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
2968     * otherwise find_good_pkt_pointers would have refused to set range info
2969     * that __check_mem_access would have rejected this pkt access.
2970     * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
2971     */
2972    env->prog->aux->max_pkt_offset = max_t(u32, env->prog->aux->max_pkt_offset, off + reg->umax_value + size - 1);
2973
2974    return err;
2975}
2976
2977/* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
2978static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size, enum bpf_access_type t,
2979                            enum bpf_reg_type *reg_type, u32 *btf_id)
2980{
2981    struct bpf_insn_access_aux info = {
2982        .reg_type = *reg_type,
2983        .log = &env->log,
2984    };
2985
2986    if (env->ops->is_valid_access && env->ops->is_valid_access(off, size, t, env->prog, &info)) {
2987        /* A non zero info.ctx_field_size indicates that this field is a
2988         * candidate for later verifier transformation to load the whole
2989         * field and then apply a mask when accessed with a narrower
2990         * access than actual ctx access size. A zero info.ctx_field_size
2991         * will only allow for whole field access and rejects any other
2992         * type of narrower access.
2993         */
2994        *reg_type = info.reg_type;
2995
2996        if (base_type(*reg_type) == PTR_TO_BTF_ID) {
2997            *btf_id = info.btf_id;
2998        } else {
2999            env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3000        }
3001        /* remember the offset of last byte accessed in ctx */
3002        if (env->prog->aux->max_ctx_offset < off + size) {
3003            env->prog->aux->max_ctx_offset = off + size;
3004        }
3005        return 0;
3006    }
3007
3008    verbose(env, "invalid bpf_context access off=%d size=%d\n", off, size);
3009    return -EACCES;
3010}
3011
3012static int check_flow_keys_access(struct bpf_verifier_env *env, int off, int size)
3013{
3014    if (size < 0 || off < 0 || (u64)off + size > sizeof(struct bpf_flow_keys)) {
3015        verbose(env, "invalid access to flow keys off=%d size=%d\n", off, size);
3016        return -EACCES;
3017    }
3018    return 0;
3019}
3020
3021static int check_sock_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int size,
3022                             enum bpf_access_type t)
3023{
3024    struct bpf_reg_state *regs = cur_regs(env);
3025    struct bpf_reg_state *reg = &regs[regno];
3026    struct bpf_insn_access_aux info = {};
3027    bool valid;
3028
3029    if (reg->smin_value < 0) {
3030        verbose(env, "R%d min value is negative, either use unsigned index or do a if (index >=0) check.\n", regno);
3031        return -EACCES;
3032    }
3033
3034    switch (reg->type) {
3035        case PTR_TO_SOCK_COMMON:
3036            valid = bpf_sock_common_is_valid_access(off, size, t, &info);
3037            break;
3038        case PTR_TO_SOCKET:
3039            valid = bpf_sock_is_valid_access(off, size, t, &info);
3040            break;
3041        case PTR_TO_TCP_SOCK:
3042            valid = bpf_tcp_sock_is_valid_access(off, size, t, &info);
3043            break;
3044        case PTR_TO_XDP_SOCK:
3045            valid = bpf_xdp_sock_is_valid_access(off, size, t, &info);
3046            break;
3047        default:
3048            valid = false;
3049    }
3050
3051    if (valid) {
3052        env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
3053        return 0;
3054    }
3055
3056    verbose(env, "R%d invalid %s access off=%d size=%d\n", regno, reg_type_str(env, reg->type), off, size);
3057
3058    return -EACCES;
3059}
3060
3061static bool is_pointer_value(struct bpf_verifier_env *env, int regno)
3062{
3063    return __is_pointer_value(env->allow_ptr_leaks, reg_state(env, regno));
3064}
3065
3066static bool is_ctx_reg(struct bpf_verifier_env *env, int regno)
3067{
3068    const struct bpf_reg_state *reg = reg_state(env, regno);
3069
3070    return reg->type == PTR_TO_CTX;
3071}
3072
3073static bool is_sk_reg(struct bpf_verifier_env *env, int regno)
3074{
3075    const struct bpf_reg_state *reg = reg_state(env, regno);
3076
3077    return type_is_sk_pointer(reg->type);
3078}
3079
3080static bool is_pkt_reg(struct bpf_verifier_env *env, int regno)
3081{
3082    const struct bpf_reg_state *reg = reg_state(env, regno);
3083
3084    return type_is_pkt_pointer(reg->type);
3085}
3086
3087static bool is_flow_key_reg(struct bpf_verifier_env *env, int regno)
3088{
3089    const struct bpf_reg_state *reg = reg_state(env, regno);
3090
3091    /* Separate to is_ctx_reg() since we still want to allow BPF_ST here. */
3092    return reg->type == PTR_TO_FLOW_KEYS;
3093}
3094
3095static int check_pkt_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size,
3096                                   bool strict)
3097{
3098    struct tnum reg_off;
3099    int ip_align;
3100
3101    /* Byte size accesses are always allowed. */
3102    if (!strict || size == 1) {
3103        return 0;
3104    }
3105
3106    /* For platforms that do not have a Kconfig enabling
3107     * CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS the value of
3108     * NET_IP_ALIGN is universally set to '2'.  And on platforms
3109     * that do set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS, we get
3110     * to this code only in strict mode where we want to emulate
3111     * the NET_IP_ALIGN==2 checking.  Therefore use an
3112     * unconditional IP align value of '2'.
3113     */
3114    ip_align = 2;
3115
3116    reg_off = tnum_add(reg->var_off, tnum_const(ip_align + reg->off + off));
3117    if (!tnum_is_aligned(reg_off, size)) {
3118        char tn_buf[48];
3119
3120        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3121        verbose(env, "misaligned packet access off %d+%s+%d+%d size %d\n", ip_align, tn_buf, reg->off, off, size);
3122        return -EACCES;
3123    }
3124
3125    return 0;
3126}
3127
3128static int check_generic_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg,
3129                                       const char *pointer_desc, int off, int size, bool strict)
3130{
3131    struct tnum reg_off;
3132
3133    /* Byte size accesses are always allowed. */
3134    if (!strict || size == 1) {
3135        return 0;
3136    }
3137
3138    reg_off = tnum_add(reg->var_off, tnum_const(reg->off + off));
3139    if (!tnum_is_aligned(reg_off, size)) {
3140        char tn_buf[48];
3141
3142        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3143        verbose(env, "misaligned %saccess off %s+%d+%d size %d\n", pointer_desc, tn_buf, reg->off, off, size);
3144        return -EACCES;
3145    }
3146
3147    return 0;
3148}
3149
3150static int check_ptr_alignment(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int off, int size,
3151                               bool strict_alignment_once)
3152{
3153    bool strict = env->strict_alignment || strict_alignment_once;
3154    const char *pointer_desc = "";
3155
3156    switch (reg->type) {
3157        case PTR_TO_PACKET:
3158        case PTR_TO_PACKET_META:
3159            /* Special case, because of NET_IP_ALIGN. Given metadata sits
3160             * right in front, treat it the very same way.
3161             */
3162            return check_pkt_ptr_alignment(env, reg, off, size, strict);
3163        case PTR_TO_FLOW_KEYS:
3164            pointer_desc = "flow keys ";
3165            break;
3166        case PTR_TO_MAP_VALUE:
3167            pointer_desc = "value ";
3168            break;
3169        case PTR_TO_CTX:
3170            pointer_desc = "context ";
3171            break;
3172        case PTR_TO_STACK:
3173            pointer_desc = "stack ";
3174            /* The stack spill tracking logic in check_stack_write_fixed_off()
3175             * and check_stack_read_fixed_off() relies on stack accesses being
3176             * aligned.
3177             */
3178            strict = true;
3179            break;
3180        case PTR_TO_SOCKET:
3181            pointer_desc = "sock ";
3182            break;
3183        case PTR_TO_SOCK_COMMON:
3184            pointer_desc = "sock_common ";
3185            break;
3186        case PTR_TO_TCP_SOCK:
3187            pointer_desc = "tcp_sock ";
3188            break;
3189        case PTR_TO_XDP_SOCK:
3190            pointer_desc = "xdp_sock ";
3191            break;
3192        default:
3193            break;
3194    }
3195    return check_generic_ptr_alignment(env, reg, pointer_desc, off, size, strict);
3196}
3197
3198static int update_stack_depth(struct bpf_verifier_env *env, const struct bpf_func_state *func, int off)
3199{
3200    u16 stack = env->subprog_info[func->subprogno].stack_depth;
3201
3202    if (stack >= -off) {
3203        return 0;
3204    }
3205
3206    /* update known max for given subprogram */
3207    env->subprog_info[func->subprogno].stack_depth = -off;
3208    return 0;
3209}
3210
3211/* starting from main bpf function walk all instructions of the function
3212 * and recursively walk all callees that given function can call.
3213 * Ignore jump and exit insns.
3214 * Since recursion is prevented by check_cfg() this algorithm
3215 * only needs a local stack of MAX_CALL_FRAMES to remember callsites
3216 */
3217static int check_max_stack_depth(struct bpf_verifier_env *env)
3218{
3219    int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
3220    struct bpf_subprog_info *subprog = env->subprog_info;
3221    struct bpf_insn *insn = env->prog->insnsi;
3222    bool tail_call_reachable = false;
3223    int ret_insn[MAX_CALL_FRAMES];
3224    int ret_prog[MAX_CALL_FRAMES];
3225    int j;
3226    int process_flag = 0;
3227    int continue_flag = 0;
3228
3229    while (1) {
3230        if (process_flag == 0 && continue_flag == 0) {
3231            /* protect against potential stack overflow that might happen when
3232             * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
3233             * depth for such case down to 256 so that the worst case scenario
3234             * would result in 8k stack size (32 which is tailcall limit * 256 =
3235             * 8k).
3236             *
3237             * To get the idea what might happen, see an example:
3238             * func1 -> sub rsp, 128
3239             *  subfunc1 -> sub rsp, 256
3240             *  tailcall1 -> add rsp, 256
3241             *   func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
3242             *   subfunc2 -> sub rsp, 64
3243             *   subfunc22 -> sub rsp, 128
3244             *   tailcall2 -> add rsp, 128
3245             *    func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
3246             *
3247             * tailcall will unwind the current stack frame but it will not get rid
3248             * of caller's stack as shown on the example above.
3249             */
3250            if (idx && subprog[idx].has_tail_call && depth >= VERIFIER_TWOHUNDREDFIFTYSIX) {
3251                verbose(env, "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
3252                        depth);
3253                return -EACCES;
3254            }
3255            /* round up to 32-bytes, since this is granularity
3256             * of interpreter stack size
3257             */
3258            depth += round_up(max_t(u32, subprog[idx].stack_depth, 1), VERIFIER_THIRTYTWO);
3259            if (depth > MAX_BPF_STACK) {
3260                verbose(env, "combined stack size of %d calls is %d. Too large\n", frame + 1, depth);
3261                return -EACCES;
3262            }
3263        }
3264        while (1) {
3265            continue_flag = 0;
3266            subprog_end = subprog[idx + 1].start;
3267            for (; i < subprog_end; i++) {
3268                if (insn[i].code != (BPF_JMP | BPF_CALL)) {
3269                    continue;
3270                }
3271                if (insn[i].src_reg != BPF_PSEUDO_CALL) {
3272                    continue;
3273                }
3274                /* remember insn and function to return to */
3275                ret_insn[frame] = i + 1;
3276                ret_prog[frame] = idx;
3277
3278                /* find the callee */
3279                i = i + insn[i].imm + 1;
3280                idx = find_subprog(env, i);
3281                if (idx < 0) {
3282                    WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", i);
3283                    return -EFAULT;
3284                }
3285
3286                if (subprog[idx].has_tail_call) {
3287                    tail_call_reachable = true;
3288                }
3289
3290                frame++;
3291                if (frame >= MAX_CALL_FRAMES) {
3292                    verbose(env, "the call stack of %d frames is too deep !\n", frame);
3293                    return -E2BIG;
3294                }
3295                process_flag = 1;
3296                break;
3297            }
3298            if (process_flag == 1) {
3299                break;
3300            }
3301        }
3302        if (process_flag == 1) {
3303            process_flag = 0;
3304            continue;
3305        }
3306        /* if tail call got detected across bpf2bpf calls then mark each of the
3307         * currently present subprog frames as tail call reachable subprogs;
3308         * this info will be utilized by JIT so that we will be preserving the
3309         * tail call counter throughout bpf2bpf calls combined with tailcalls
3310         */
3311        if (tail_call_reachable) {
3312            for (j = 0; j < frame; j++) {
3313                subprog[ret_prog[j]].tail_call_reachable = true;
3314            }
3315        }
3316        if (subprog[0].tail_call_reachable) {
3317            env->prog->aux->tail_call_reachable = true;
3318        }
3319
3320        /* end of for() loop means the last insn of the 'subprog'
3321         * was reached. Doesn't matter whether it was JA or EXIT
3322         */
3323        if (frame == 0) {
3324            return 0;
3325        }
3326        depth -= round_up(max_t(u32, subprog[idx].stack_depth, 1), VERIFIER_THIRTYTWO);
3327        frame--;
3328        i = ret_insn[frame];
3329        idx = ret_prog[frame];
3330        continue_flag = 1;
3331        continue;
3332    }
3333}
3334
3335#ifndef CONFIG_BPF_JIT_ALWAYS_ON
3336static int get_callee_stack_depth(struct bpf_verifier_env *env, const struct bpf_insn *insn, int idx)
3337{
3338    int start = idx + insn->imm + 1, subprog;
3339
3340    subprog = find_subprog(env, start);
3341    if (subprog < 0) {
3342        WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", start);
3343        return -EFAULT;
3344    }
3345    return env->subprog_info[subprog].stack_depth;
3346}
3347#endif
3348
3349static int __check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno,
3350                               bool fixed_off_ok)
3351{
3352    /* Access to this pointer-typed register or passing it to a helper
3353     * is only allowed in its original, unmodified form.
3354     */
3355
3356    if (!fixed_off_ok && reg->off) {
3357        verbose(env, "dereference of modified %s ptr R%d off=%d disallowed\n", reg_type_str(env, reg->type), regno,
3358                reg->off);
3359        return -EACCES;
3360    }
3361
3362    if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3363        char tn_buf[48];
3364
3365        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3366        verbose(env, "variable %s access var_off=%s disallowed\n", reg_type_str(env, reg->type), tn_buf);
3367        return -EACCES;
3368    }
3369
3370    return 0;
3371}
3372
3373int check_ptr_off_reg(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno)
3374{
3375    return __check_ptr_off_reg(env, reg, regno, false);
3376}
3377
3378static int __check_buffer_access(struct bpf_verifier_env *env, const char *buf_info, const struct bpf_reg_state *reg,
3379                                 int regno, int off, int size)
3380{
3381    if (off < 0) {
3382        verbose(env, "R%d invalid %s buffer access: off=%d, size=%d\n", regno, buf_info, off, size);
3383        return -EACCES;
3384    }
3385    if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3386        char tn_buf[48];
3387
3388        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3389        verbose(env, "R%d invalid variable buffer offset: off=%d, var_off=%s\n", regno, off, tn_buf);
3390        return -EACCES;
3391    }
3392
3393    return 0;
3394}
3395
3396static int check_tp_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off,
3397                                  int size)
3398{
3399    int err;
3400
3401    err = __check_buffer_access(env, "tracepoint", reg, regno, off, size);
3402    if (err) {
3403        return err;
3404    }
3405
3406    if (off + size > env->prog->aux->max_tp_access) {
3407        env->prog->aux->max_tp_access = off + size;
3408    }
3409
3410    return 0;
3411}
3412
3413static int check_buffer_access(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, int regno, int off,
3414                               int size, bool zero_size_allowed, const char *buf_info, u32 *max_access)
3415{
3416    int err;
3417
3418    err = __check_buffer_access(env, buf_info, reg, regno, off, size);
3419    if (err) {
3420        return err;
3421    }
3422
3423    if (off + size > *max_access) {
3424        *max_access = off + size;
3425    }
3426
3427    return 0;
3428}
3429
3430/* BPF architecture zero extends alu32 ops into 64-bit registesr */
3431static void zext_32_to_64(struct bpf_reg_state *reg)
3432{
3433    reg->var_off = tnum_subreg(reg->var_off);
3434    verifier_reg_assign_32_into_64(reg);
3435}
3436
3437/* truncate register to smaller size (in bytes)
3438 * must be called with size < BPF_REG_SIZE
3439 */
3440static void coerce_reg_to_size(struct bpf_reg_state *reg, int size)
3441{
3442    u64 mask;
3443
3444    /* clear high bits in bit representation */
3445    reg->var_off = tnum_cast(reg->var_off, size);
3446
3447    /* fix arithmetic bounds */
3448    mask = ((u64)1 << (size * VERIFIER_EIGHT)) - 1;
3449    if ((reg->umin_value & ~mask) == (reg->umax_value & ~mask)) {
3450        reg->umin_value &= mask;
3451        reg->umax_value &= mask;
3452    } else {
3453        reg->umin_value = 0;
3454        reg->umax_value = mask;
3455    }
3456    reg->smin_value = reg->umin_value;
3457    reg->smax_value = reg->umax_value;
3458
3459    /* If size is smaller than 32bit register the 32bit register
3460     * values are also truncated so we push 64-bit bounds into
3461     * 32-bit bounds. Above were truncated < 32-bits already.
3462     */
3463    if (size >= VERIFIER_FOUR) {
3464        return;
3465    }
3466    __reg_combine_64_into_32(reg);
3467}
3468
3469static bool bpf_map_is_rdonly(const struct bpf_map *map)
3470{
3471    return (map->map_flags & BPF_F_RDONLY_PROG) && map->frozen;
3472}
3473
3474static int bpf_map_direct_read(struct bpf_map *map, int off, int size, u64 *val)
3475{
3476    void *ptr;
3477    u64 addr;
3478    int err;
3479
3480    err = map->ops->map_direct_value_addr(map, &addr, off);
3481    if (err) {
3482        return err;
3483    }
3484    ptr = (void *)(long)addr + off;
3485
3486    switch (size) {
3487        case sizeof(u8):
3488            *val = (u64) * (u8 *)ptr;
3489            break;
3490        case sizeof(u16):
3491            *val = (u64) * (u16 *)ptr;
3492            break;
3493        case sizeof(u32):
3494            *val = (u64) * (u32 *)ptr;
3495            break;
3496        case sizeof(u64):
3497            *val = *(u64 *)ptr;
3498            break;
3499        default:
3500            return -EINVAL;
3501    }
3502    return 0;
3503}
3504
3505static int check_ptr_to_btf_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off,
3506                                   int size, enum bpf_access_type atype, int value_regno)
3507{
3508    struct bpf_reg_state *reg = regs + regno;
3509    const struct btf_type *t = btf_type_by_id(btf_vmlinux, reg->btf_id);
3510    const char *tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3511    u32 btf_id;
3512    int ret;
3513
3514    if (off < 0) {
3515        verbose(env, "R%d is ptr_%s invalid negative access: off=%d\n", regno, tname, off);
3516        return -EACCES;
3517    }
3518    if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
3519        char tn_buf[48];
3520
3521        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3522        verbose(env, "R%d is ptr_%s invalid variable offset: off=%d, var_off=%s\n", regno, tname, off, tn_buf);
3523        return -EACCES;
3524    }
3525
3526    if (env->ops->btf_struct_access) {
3527        ret = env->ops->btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3528    } else {
3529        if (atype != BPF_READ) {
3530            verbose(env, "only read is supported\n");
3531            return -EACCES;
3532        }
3533
3534        ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3535    }
3536
3537    if (ret < 0) {
3538        return ret;
3539    }
3540
3541    if (atype == BPF_READ && value_regno >= 0) {
3542        mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3543    }
3544
3545    return 0;
3546}
3547
3548static int check_ptr_to_map_access(struct bpf_verifier_env *env, struct bpf_reg_state *regs, int regno, int off,
3549                                   int size, enum bpf_access_type atype, int value_regno)
3550{
3551    struct bpf_reg_state *reg = regs + regno;
3552    struct bpf_map *map = reg->map_ptr;
3553    const struct btf_type *t;
3554    const char *tname;
3555    u32 btf_id;
3556    int ret;
3557
3558    if (!btf_vmlinux) {
3559        verbose(env, "map_ptr access not supported without CONFIG_DEBUG_INFO_BTF\n");
3560        return -ENOTSUPP;
3561    }
3562
3563    if (!map->ops->map_btf_id || !*map->ops->map_btf_id) {
3564        verbose(env, "map_ptr access not supported for map type %d\n", map->map_type);
3565        return -ENOTSUPP;
3566    }
3567
3568    t = btf_type_by_id(btf_vmlinux, *map->ops->map_btf_id);
3569    tname = btf_name_by_offset(btf_vmlinux, t->name_off);
3570
3571    if (!env->allow_ptr_to_map_access) {
3572        verbose(env, "%s access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN\n", tname);
3573        return -EPERM;
3574    }
3575
3576    if (off < 0) {
3577        verbose(env, "R%d is %s invalid negative access: off=%d\n", regno, tname, off);
3578        return -EACCES;
3579    }
3580
3581    if (atype != BPF_READ) {
3582        verbose(env, "only read from %s is supported\n", tname);
3583        return -EACCES;
3584    }
3585
3586    ret = btf_struct_access(&env->log, t, off, size, atype, &btf_id);
3587    if (ret < 0) {
3588        return ret;
3589    }
3590
3591    if (value_regno >= 0) {
3592        mark_btf_ld_reg(env, regs, value_regno, ret, btf_id);
3593    }
3594
3595    return 0;
3596}
3597
3598/* Check that the stack access at the given offset is within bounds. The
3599 * maximum valid offset is -1.
3600 *
3601 * The minimum valid offset is -MAX_BPF_STACK for writes, and
3602 * -state->allocated_stack for reads.
3603 */
3604static int check_stack_slot_within_bounds(int off, struct bpf_func_state *state, enum bpf_access_type t)
3605{
3606    int min_valid_off;
3607
3608    if (t == BPF_WRITE) {
3609        min_valid_off = -MAX_BPF_STACK;
3610    } else {
3611        min_valid_off = -state->allocated_stack;
3612    }
3613
3614    if (off < min_valid_off || off > -1) {
3615        return -EACCES;
3616    }
3617    return 0;
3618}
3619
3620/* Check that the stack access at 'regno + off' falls within the maximum stack
3621 * bounds.
3622 *
3623 * 'off' includes `regno->offset`, but not its dynamic part (if any).
3624 */
3625static int check_stack_access_within_bounds(struct bpf_verifier_env *env, int regno, int off, int access_size,
3626                                            enum stack_access_src src, enum bpf_access_type type)
3627{
3628    struct bpf_reg_state *regs = cur_regs(env);
3629    struct bpf_reg_state *reg = regs + regno;
3630    struct bpf_func_state *state = func(env, reg);
3631    int min_off, max_off;
3632    int err;
3633    char *err_extra;
3634
3635    if (src == ACCESS_HELPER) {
3636        /* We don't know if helpers are reading or writing (or both). */
3637        err_extra = " indirect access to";
3638    } else if (type == BPF_READ) {
3639        err_extra = " read from";
3640    } else {
3641        err_extra = " write to";
3642    }
3643
3644    if (tnum_is_const(reg->var_off)) {
3645        min_off = reg->var_off.value + off;
3646        if (access_size > 0) {
3647            max_off = min_off + access_size - 1;
3648        } else {
3649            max_off = min_off;
3650        }
3651    } else {
3652        if (reg->smax_value >= BPF_MAX_VAR_OFF || reg->smin_value <= -BPF_MAX_VAR_OFF) {
3653            verbose(env, "invalid unbounded variable-offset%s stack R%d\n", err_extra, regno);
3654            return -EACCES;
3655        }
3656        min_off = reg->smin_value + off;
3657        if (access_size > 0) {
3658            max_off = reg->smax_value + off + access_size - 1;
3659        } else {
3660            max_off = min_off;
3661        }
3662    }
3663
3664    err = check_stack_slot_within_bounds(min_off, state, type);
3665    if (!err) {
3666        err = check_stack_slot_within_bounds(max_off, state, type);
3667    }
3668
3669    if (err) {
3670        if (tnum_is_const(reg->var_off)) {
3671            verbose(env, "invalid%s stack R%d off=%d size=%d\n", err_extra, regno, off, access_size);
3672        } else {
3673            char tn_buf[48];
3674
3675            tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3676            verbose(env, "invalid variable-offset%s stack R%d var_off=%s size=%d\n", err_extra, regno, tn_buf,
3677                    access_size);
3678        }
3679    }
3680    return err;
3681}
3682
3683/* check whether memory at (regno + off) is accessible for t = (read | write)
3684 * if t==write, value_regno is a register which value is stored into memory
3685 * if t==read, value_regno is a register which will receive the value from memory
3686 * if t==write && value_regno==-1, some unknown value is stored into memory
3687 * if t==read && value_regno==-1, don't care what we read from memory
3688 */
3689static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regno, int off, int bpf_size,
3690                            enum bpf_access_type t, int value_regno, bool strict_alignment_once)
3691{
3692    struct bpf_reg_state *regs = cur_regs(env);
3693    struct bpf_reg_state *reg = regs + regno;
3694    struct bpf_func_state *state;
3695    int size, err = 0;
3696
3697    size = bpf_size_to_bytes(bpf_size);
3698    if (size < 0) {
3699        return size;
3700    }
3701
3702    /* alignment checks will add in reg->off themselves */
3703    err = check_ptr_alignment(env, reg, off, size, strict_alignment_once);
3704    if (err) {
3705        return err;
3706    }
3707
3708    /* for access checks, reg->off is just part of off */
3709    off += reg->off;
3710
3711    if (reg->type == PTR_TO_MAP_VALUE) {
3712        if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3713            verbose(env, "R%d leaks addr into map\n", value_regno);
3714            return -EACCES;
3715        }
3716        err = check_map_access_type(env, regno, off, size, t);
3717        if (err) {
3718            return err;
3719        }
3720        err = check_map_access(env, regno, off, size, false);
3721        if (!err && t == BPF_READ && value_regno >= 0) {
3722            struct bpf_map *map = reg->map_ptr;
3723
3724            /* if map is read-only, track its contents as scalars */
3725            if (tnum_is_const(reg->var_off) && bpf_map_is_rdonly(map) && map->ops->map_direct_value_addr) {
3726                int map_off = off + reg->var_off.value;
3727                u64 val = 0;
3728
3729                err = bpf_map_direct_read(map, map_off, size, &val);
3730                if (err) {
3731                    return err;
3732                }
3733
3734                regs[value_regno].type = SCALAR_VALUE;
3735                verifier_mark_reg_known(&regs[value_regno], val);
3736            } else {
3737                mark_reg_unknown(env, regs, value_regno);
3738            }
3739        }
3740    } else if (base_type(reg->type) == PTR_TO_MEM) {
3741        bool rdonly_mem = type_is_rdonly_mem(reg->type);
3742
3743        if (type_may_be_null(reg->type)) {
3744            verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type));
3745            return -EACCES;
3746        }
3747
3748        if (t == BPF_WRITE && rdonly_mem) {
3749            verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3750            return -EACCES;
3751        }
3752
3753        if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3754            verbose(env, "R%d leaks addr into mem\n", value_regno);
3755            return -EACCES;
3756        }
3757
3758        err = check_mem_region_access(env, regno, off, size, reg->mem_size, false);
3759        if (!err && value_regno >= 0 && (t == BPF_READ || rdonly_mem)) {
3760            mark_reg_unknown(env, regs, value_regno);
3761        }
3762    } else if (reg->type == PTR_TO_CTX) {
3763        enum bpf_reg_type reg_type = SCALAR_VALUE;
3764        u32 btf_id = 0;
3765
3766        if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3767            verbose(env, "R%d leaks addr into ctx\n", value_regno);
3768            return -EACCES;
3769        }
3770
3771        err = check_ptr_off_reg(env, reg, regno);
3772        if (err < 0) {
3773            return err;
3774        }
3775
3776        err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf_id);
3777        if (err) {
3778            verbose_linfo(env, insn_idx, "; ");
3779        }
3780        if (!err && t == BPF_READ && value_regno >= 0) {
3781            /* ctx access returns either a scalar, or a
3782             * PTR_TO_PACKET[_META,_END]. In the latter
3783             * case, we know the offset is zero.
3784             */
3785            if (reg_type == SCALAR_VALUE) {
3786                mark_reg_unknown(env, regs, value_regno);
3787            } else {
3788                mark_reg_known_zero(env, regs, value_regno);
3789                if (type_may_be_null(reg_type)) {
3790                    regs[value_regno].id = ++env->id_gen;
3791                }
3792                /* A load of ctx field could have different
3793                 * actual load size with the one encoded in the
3794                 * insn. When the dst is PTR, it is for sure not
3795                 * a sub-register.
3796                 */
3797                regs[value_regno].subreg_def = DEF_NOT_SUBREG;
3798                if (base_type(reg_type) == PTR_TO_BTF_ID) {
3799                    regs[value_regno].btf_id = btf_id;
3800                }
3801            }
3802            regs[value_regno].type = reg_type;
3803        }
3804    } else if (reg->type == PTR_TO_STACK) {
3805        /* Basic bounds checks. */
3806        err = check_stack_access_within_bounds(env, regno, off, size, ACCESS_DIRECT, t);
3807        if (err) {
3808            return err;
3809        }
3810        state = func(env, reg);
3811        err = update_stack_depth(env, state, off);
3812        if (err) {
3813            return err;
3814        }
3815
3816        if (t == BPF_READ) {
3817            err = check_stack_read(env, regno, off, size, value_regno);
3818        } else {
3819            err = check_stack_write(env, regno, off, size, value_regno, insn_idx);
3820        }
3821    } else if (reg_is_pkt_pointer(reg)) {
3822        if (t == BPF_WRITE && !may_access_direct_pkt_data(env, NULL, t)) {
3823            verbose(env, "cannot write into packet\n");
3824            return -EACCES;
3825        }
3826        if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3827            verbose(env, "R%d leaks addr into packet\n", value_regno);
3828            return -EACCES;
3829        }
3830        err = check_packet_access(env, regno, off, size, false);
3831        if (!err && t == BPF_READ && value_regno >= 0) {
3832            mark_reg_unknown(env, regs, value_regno);
3833        }
3834    } else if (reg->type == PTR_TO_FLOW_KEYS) {
3835        if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) {
3836            verbose(env, "R%d leaks addr into flow keys\n", value_regno);
3837            return -EACCES;
3838        }
3839
3840        err = check_flow_keys_access(env, off, size);
3841        if (!err && t == BPF_READ && value_regno >= 0) {
3842            mark_reg_unknown(env, regs, value_regno);
3843        }
3844    } else if (type_is_sk_pointer(reg->type)) {
3845        if (t == BPF_WRITE) {
3846            verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3847            return -EACCES;
3848        }
3849        err = check_sock_access(env, insn_idx, regno, off, size, t);
3850        if (!err && value_regno >= 0) {
3851            mark_reg_unknown(env, regs, value_regno);
3852        }
3853    } else if (reg->type == PTR_TO_TP_BUFFER) {
3854        err = check_tp_buffer_access(env, reg, regno, off, size);
3855        if (!err && t == BPF_READ && value_regno >= 0) {
3856            mark_reg_unknown(env, regs, value_regno);
3857        }
3858    } else if (reg->type == PTR_TO_BTF_ID) {
3859        err = check_ptr_to_btf_access(env, regs, regno, off, size, t, value_regno);
3860    } else if (reg->type == CONST_PTR_TO_MAP) {
3861        err = check_ptr_to_map_access(env, regs, regno, off, size, t, value_regno);
3862    } else if (base_type(reg->type) == PTR_TO_BUF) {
3863        bool rdonly_mem = type_is_rdonly_mem(reg->type);
3864        const char *buf_info;
3865        u32 *max_access;
3866
3867        if (rdonly_mem) {
3868            if (t == BPF_WRITE) {
3869                verbose(env, "R%d cannot write into %s\n", regno, reg_type_str(env, reg->type));
3870                return -EACCES;
3871            }
3872            buf_info = "rdonly";
3873            max_access = &env->prog->aux->max_rdonly_access;
3874        } else {
3875            buf_info = "rdwr";
3876            max_access = &env->prog->aux->max_rdwr_access;
3877        }
3878
3879        err = check_buffer_access(env, reg, regno, off, size, false, buf_info, max_access);
3880        if (!err && value_regno >= 0 && (rdonly_mem || t == BPF_READ)) {
3881            mark_reg_unknown(env, regs, value_regno);
3882        }
3883    } else {
3884        verbose(env, "R%d invalid mem access '%s'\n", regno, reg_type_str(env, reg->type));
3885        return -EACCES;
3886    }
3887
3888    if (!err && size < BPF_REG_SIZE && value_regno >= 0 && t == BPF_READ && regs[value_regno].type == SCALAR_VALUE) {
3889        /* b/h/w load zero-extends, mark upper bits as known 0 */
3890        coerce_reg_to_size(&regs[value_regno], size);
3891    }
3892    return err;
3893}
3894
3895static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_insn *insn)
3896{
3897    int err;
3898
3899    if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) || insn->imm != 0) {
3900        verbose(env, "BPF_XADD uses reserved fields\n");
3901        return -EINVAL;
3902    }
3903
3904    /* check src1 operand */
3905    err = check_reg_arg(env, insn->src_reg, SRC_OP);
3906    if (err) {
3907        return err;
3908    }
3909
3910    /* check src2 operand */
3911    err = check_reg_arg(env, insn->dst_reg, SRC_OP);
3912    if (err) {
3913        return err;
3914    }
3915
3916    if (is_pointer_value(env, insn->src_reg)) {
3917        verbose(env, "R%d leaks addr into mem\n", insn->src_reg);
3918        return -EACCES;
3919    }
3920
3921    if (is_ctx_reg(env, insn->dst_reg) || is_pkt_reg(env, insn->dst_reg) || is_flow_key_reg(env, insn->dst_reg) ||
3922        is_sk_reg(env, insn->dst_reg)) {
3923        verbose(env, "BPF_XADD stores into R%d %s is not allowed\n", insn->dst_reg,
3924                reg_type_str(env, reg_state(env, insn->dst_reg)->type));
3925        return -EACCES;
3926    }
3927
3928    /* check whether atomic_add can read the memory */
3929    err = check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_READ, -1, true);
3930    if (err) {
3931        return err;
3932    }
3933
3934    /* check whether atomic_add can write into the same memory */
3935    return check_mem_access(env, insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1, true);
3936}
3937
3938/* When register 'regno' is used to read the stack (either directly or through
3939 * a helper function) make sure that it's within stack boundary and, depending
3940 * on the access type, that all elements of the stack are initialized.
3941 *
3942 * 'off' includes 'regno->off', but not its dynamic part (if any).
3943 *
3944 * All registers that have been spilled on the stack in the slots within the
3945 * read offsets are marked as read.
3946 */
3947static int check_stack_range_initialized(struct bpf_verifier_env *env, int regno, int off, int access_size,
3948                                         bool zero_size_allowed, enum stack_access_src type,
3949                                         struct bpf_call_arg_meta *meta)
3950{
3951    struct bpf_reg_state *reg = reg_state(env, regno);
3952    struct bpf_func_state *state = func(env, reg);
3953    int err, min_off, max_off, i, j, slot, spi;
3954    char *err_extra = type == ACCESS_HELPER ? " indirect" : "";
3955    enum bpf_access_type bounds_check_type;
3956    /* Some accesses can write anything into the stack, others are
3957     * read-only.
3958     */
3959    bool clobber = false;
3960
3961    if (access_size == 0 && !zero_size_allowed) {
3962        verbose(env, "invalid zero-sized read\n");
3963        return -EACCES;
3964    }
3965
3966    if (type == ACCESS_HELPER) {
3967        /* The bounds checks for writes are more permissive than for
3968         * reads. However, if raw_mode is not set, we'll do extra
3969         * checks below.
3970         */
3971        bounds_check_type = BPF_WRITE;
3972        clobber = true;
3973    } else {
3974        bounds_check_type = BPF_READ;
3975    }
3976    err = check_stack_access_within_bounds(env, regno, off, access_size, type, bounds_check_type);
3977    if (err) {
3978        return err;
3979    }
3980
3981    if (tnum_is_const(reg->var_off)) {
3982        min_off = max_off = reg->var_off.value + off;
3983    } else {
3984        /* Variable offset is prohibited for unprivileged mode for
3985         * simplicity since it requires corresponding support in
3986         * Spectre masking for stack ALU.
3987         * See also retrieve_ptr_limit().
3988         */
3989        if (!env->bypass_spec_v1) {
3990            char tn_buf[48];
3991
3992            tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
3993            verbose(env, "R%d%s variable offset stack access prohibited for !root, var_off=%s\n", regno, err_extra,
3994                    tn_buf);
3995            return -EACCES;
3996        }
3997        /* Only initialized buffer on stack is allowed to be accessed
3998         * with variable offset. With uninitialized buffer it's hard to
3999         * guarantee that whole memory is marked as initialized on
4000         * helper return since specific bounds are unknown what may
4001         * cause uninitialized stack leaking.
4002         */
4003        if (meta && meta->raw_mode) {
4004            meta = NULL;
4005        }
4006
4007        min_off = reg->smin_value + off;
4008        max_off = reg->smax_value + off;
4009    }
4010
4011    if (meta && meta->raw_mode) {
4012        meta->access_size = access_size;
4013        meta->regno = regno;
4014        return 0;
4015    }
4016
4017    for (i = min_off; i < max_off + access_size; i++) {
4018        u8 *stype;
4019
4020        slot = -i - 1;
4021        spi = slot / BPF_REG_SIZE;
4022        if (state->allocated_stack <= slot) {
4023            goto err;
4024        }
4025        stype = &state->stack[spi].slot_type[slot % BPF_REG_SIZE];
4026        if (*stype == STACK_MISC) {
4027            goto mark;
4028        }
4029        if (*stype == STACK_ZERO) {
4030            if (clobber) {
4031                /* helper can write anything into the stack */
4032                *stype = STACK_MISC;
4033            }
4034            goto mark;
4035        }
4036
4037        if (state->stack[spi].slot_type[0] == STACK_SPILL && state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID) {
4038            goto mark;
4039        }
4040
4041        if (state->stack[spi].slot_type[0] == STACK_SPILL &&
4042            (state->stack[spi].spilled_ptr.type == SCALAR_VALUE || env->allow_ptr_leaks)) {
4043            if (clobber) {
4044                __mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
4045                for (j = 0; j < BPF_REG_SIZE; j++) {
4046                    state->stack[spi].slot_type[j] = STACK_MISC;
4047                }
4048            }
4049            goto mark;
4050        }
4051
4052    err:
4053        if (tnum_is_const(reg->var_off)) {
4054            verbose(env, "invalid%s read from stack R%d off %d+%d size %d\n", err_extra, regno, min_off, i - min_off,
4055                    access_size);
4056        } else {
4057            char tn_buf[48];
4058
4059            tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
4060            verbose(env, "invalid%s read from stack R%d var_off %s+%d size %d\n", err_extra, regno, tn_buf, i - min_off,
4061                    access_size);
4062        }
4063        return -EACCES;
4064    mark:
4065        /* reading any byte out of 8-byte 'spill_slot' will cause
4066         * the whole slot to be marked as 'read'
4067         */
4068        mark_reg_read(env, &state->stack[spi].spilled_ptr, state->stack[spi].spilled_ptr.parent, REG_LIVE_READ64);
4069    }
4070    return update_stack_depth(env, state, min_off);
4071}
4072
4073static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed,
4074                                   struct bpf_call_arg_meta *meta)
4075{
4076    struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4077    const char *buf_info;
4078    u32 *max_access;
4079
4080    switch (base_type(reg->type)) {
4081        case PTR_TO_PACKET:
4082        case PTR_TO_PACKET_META:
4083            return check_packet_access(env, regno, reg->off, access_size, zero_size_allowed);
4084        case PTR_TO_MAP_VALUE:
4085            if (check_map_access_type(env, regno, reg->off, access_size,
4086                                      meta && meta->raw_mode ? BPF_WRITE : BPF_READ)) {
4087                return -EACCES;
4088            }
4089            return check_map_access(env, regno, reg->off, access_size, zero_size_allowed);
4090        case PTR_TO_MEM:
4091            return check_mem_region_access(env, regno, reg->off, access_size, reg->mem_size, zero_size_allowed);
4092        case PTR_TO_BUF:
4093            if (type_is_rdonly_mem(reg->type)) {
4094                if (meta && meta->raw_mode) {
4095                    return -EACCES;
4096                }
4097
4098                buf_info = "rdonly";
4099                max_access = &env->prog->aux->max_rdonly_access;
4100            } else {
4101                buf_info = "rdwr";
4102                max_access = &env->prog->aux->max_rdwr_access;
4103            }
4104            return check_buffer_access(env, reg, regno, reg->off, access_size, zero_size_allowed, buf_info, max_access);
4105        case PTR_TO_STACK:
4106            return check_stack_range_initialized(env, regno, reg->off, access_size, zero_size_allowed, ACCESS_HELPER,
4107                                                 meta);
4108        default: /* scalar_value or invalid ptr */
4109            /* Allow zero-byte read from NULL, regardless of pointer type */
4110            if (zero_size_allowed && access_size == 0 && register_is_null(reg)) {
4111                return 0;
4112            }
4113
4114            verbose(env, "R%d type=%s ", regno, reg_type_str(env, reg->type));
4115            verbose(env, "expected=%s\n", reg_type_str(env, PTR_TO_STACK));
4116            return -EACCES;
4117    }
4118}
4119
4120/* Implementation details:
4121 * bpf_map_lookup returns PTR_TO_MAP_VALUE_OR_NULL
4122 * Two bpf_map_lookups (even with the same key) will have different reg->id.
4123 * For traditional PTR_TO_MAP_VALUE the verifier clears reg->id after
4124 * value_or_null->value transition, since the verifier only cares about
4125 * the range of access to valid map value pointer and doesn't care about actual
4126 * address of the map element.
4127 * For maps with 'struct bpf_spin_lock' inside map value the verifier keeps
4128 * reg->id > 0 after value_or_null->value transition. By doing so
4129 * two bpf_map_lookups will be considered two different pointers that
4130 * point to different bpf_spin_locks.
4131 * The verifier allows taking only one bpf_spin_lock at a time to avoid
4132 * dead-locks.
4133 * Since only one bpf_spin_lock is allowed the checks are simpler than
4134 * reg_is_refcounted() logic. The verifier needs to remember only
4135 * one spin_lock instead of array of acquired_refs.
4136 * cur_state->active_spin_lock remembers which map value element got locked
4137 * and clears it after bpf_spin_unlock.
4138 */
4139static int process_spin_lock(struct bpf_verifier_env *env, int regno, bool is_lock)
4140{
4141    struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4142    struct bpf_verifier_state *cur = env->cur_state;
4143    bool is_const = tnum_is_const(reg->var_off);
4144    struct bpf_map *map = reg->map_ptr;
4145    u64 val = reg->var_off.value;
4146
4147    if (!is_const) {
4148        verbose(env, "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", regno);
4149        return -EINVAL;
4150    }
4151    if (!map->btf) {
4152        verbose(env, "map '%s' has to have BTF in order to use bpf_spin_lock\n", map->name);
4153        return -EINVAL;
4154    }
4155    if (!map_value_has_spin_lock(map)) {
4156        if (map->spin_lock_off == -E2BIG) {
4157            verbose(env, "map '%s' has more than one 'struct bpf_spin_lock'\n", map->name);
4158        } else if (map->spin_lock_off == -ENOENT) {
4159            verbose(env, "map '%s' doesn't have 'struct bpf_spin_lock'\n", map->name);
4160        } else {
4161            verbose(env, "map '%s' is not a struct type or bpf_spin_lock is mangled\n", map->name);
4162        }
4163        return -EINVAL;
4164    }
4165    if (map->spin_lock_off != val + reg->off) {
4166        verbose(env, "off %lld doesn't point to 'struct bpf_spin_lock'\n", val + reg->off);
4167        return -EINVAL;
4168    }
4169    if (is_lock) {
4170        if (cur->active_spin_lock) {
4171            verbose(env, "Locking two bpf_spin_locks are not allowed\n");
4172            return -EINVAL;
4173        }
4174        cur->active_spin_lock = reg->id;
4175    } else {
4176        if (!cur->active_spin_lock) {
4177            verbose(env, "bpf_spin_unlock without taking a lock\n");
4178            return -EINVAL;
4179        }
4180        if (cur->active_spin_lock != reg->id) {
4181            verbose(env, "bpf_spin_unlock of different lock\n");
4182            return -EINVAL;
4183        }
4184        cur->active_spin_lock = 0;
4185    }
4186    return 0;
4187}
4188
4189static bool arg_type_is_mem_ptr(enum bpf_arg_type type)
4190{
4191    return base_type(type) == ARG_PTR_TO_MEM || base_type(type) == ARG_PTR_TO_UNINIT_MEM;
4192}
4193
4194static bool arg_type_is_mem_size(enum bpf_arg_type type)
4195{
4196    return type == ARG_CONST_SIZE || type == ARG_CONST_SIZE_OR_ZERO;
4197}
4198
4199static bool arg_type_is_alloc_size(enum bpf_arg_type type)
4200{
4201    return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
4202}
4203
4204static bool arg_type_is_int_ptr(enum bpf_arg_type type)
4205{
4206    return type == ARG_PTR_TO_INT || type == ARG_PTR_TO_LONG;
4207}
4208
4209static int int_ptr_type_to_size(enum bpf_arg_type type)
4210{
4211    if (type == ARG_PTR_TO_INT) {
4212        return sizeof(u32);
4213    } else if (type == ARG_PTR_TO_LONG) {
4214        return sizeof(u64);
4215    }
4216
4217    return -EINVAL;
4218}
4219
4220static int resolve_map_arg_type(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta,
4221                                enum bpf_arg_type *arg_type)
4222{
4223    if (!meta->map_ptr) {
4224        /* kernel subsystem misconfigured verifier */
4225        verbose(env, "invalid map_ptr to access map->type\n");
4226        return -EACCES;
4227    }
4228
4229    switch (meta->map_ptr->map_type) {
4230        case BPF_MAP_TYPE_SOCKMAP:
4231        case BPF_MAP_TYPE_SOCKHASH:
4232            if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
4233                *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
4234            } else {
4235                verbose(env, "invalid arg_type for sockmap/sockhash\n");
4236                return -EINVAL;
4237            }
4238            break;
4239
4240        default:
4241            break;
4242    }
4243    return 0;
4244}
4245
4246struct bpf_reg_types {
4247    const enum bpf_reg_type types[10];
4248    u32 *btf_id;
4249};
4250
4251static const struct bpf_reg_types map_key_value_types = {
4252    .types =
4253        {
4254            PTR_TO_STACK,
4255            PTR_TO_PACKET,
4256            PTR_TO_PACKET_META,
4257            PTR_TO_MAP_VALUE,
4258        },
4259};
4260
4261static const struct bpf_reg_types sock_types = {
4262    .types =
4263        {
4264            PTR_TO_SOCK_COMMON,
4265            PTR_TO_SOCKET,
4266            PTR_TO_TCP_SOCK,
4267            PTR_TO_XDP_SOCK,
4268        },
4269};
4270
4271#ifdef CONFIG_NET
4272static const struct bpf_reg_types btf_id_sock_common_types = {
4273    .types =
4274        {
4275            PTR_TO_SOCK_COMMON,
4276            PTR_TO_SOCKET,
4277            PTR_TO_TCP_SOCK,
4278            PTR_TO_XDP_SOCK,
4279            PTR_TO_BTF_ID,
4280        },
4281    .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
4282};
4283#endif
4284
4285static const struct bpf_reg_types mem_types = {
4286    .types =
4287        {
4288            PTR_TO_STACK,
4289            PTR_TO_PACKET,
4290            PTR_TO_PACKET_META,
4291            PTR_TO_MAP_VALUE,
4292            PTR_TO_MEM,
4293            PTR_TO_MEM | MEM_ALLOC,
4294            PTR_TO_BUF,
4295        },
4296};
4297
4298static const struct bpf_reg_types int_ptr_types = {
4299    .types =
4300        {
4301            PTR_TO_STACK,
4302            PTR_TO_PACKET,
4303            PTR_TO_PACKET_META,
4304            PTR_TO_MAP_VALUE,
4305        },
4306};
4307
4308static const struct bpf_reg_types fullsock_types = {.types = {PTR_TO_SOCKET}};
4309static const struct bpf_reg_types scalar_types = {.types = {SCALAR_VALUE}};
4310static const struct bpf_reg_types context_types = {.types = {PTR_TO_CTX}};
4311static const struct bpf_reg_types alloc_mem_types = {.types = {PTR_TO_MEM | MEM_ALLOC}};
4312static const struct bpf_reg_types const_map_ptr_types = {.types = {CONST_PTR_TO_MAP}};
4313static const struct bpf_reg_types btf_ptr_types = {.types = {PTR_TO_BTF_ID}};
4314static const struct bpf_reg_types spin_lock_types = {.types = {PTR_TO_MAP_VALUE}};
4315static const struct bpf_reg_types percpu_btf_ptr_types = {.types = {PTR_TO_PERCPU_BTF_ID}};
4316
4317static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
4318    [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
4319    [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
4320    [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
4321    [ARG_CONST_SIZE] = &scalar_types,
4322    [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
4323    [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
4324    [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
4325    [ARG_PTR_TO_CTX] = &context_types,
4326    [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
4327#ifdef CONFIG_NET
4328    [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
4329#endif
4330    [ARG_PTR_TO_SOCKET] = &fullsock_types,
4331    [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
4332    [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
4333    [ARG_PTR_TO_MEM] = &mem_types,
4334    [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
4335    [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
4336    [ARG_PTR_TO_INT] = &int_ptr_types,
4337    [ARG_PTR_TO_LONG] = &int_ptr_types,
4338    [ARG_PTR_TO_PERCPU_BTF_ID] = &percpu_btf_ptr_types,
4339};
4340
4341static int check_reg_type(struct bpf_verifier_env *env, u32 regno, enum bpf_arg_type arg_type, const u32 *arg_btf_id)
4342{
4343    struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4344    enum bpf_reg_type expected, type = reg->type;
4345    const struct bpf_reg_types *compatible;
4346    int i, j;
4347
4348    compatible = compatible_reg_types[base_type(arg_type)];
4349    if (!compatible) {
4350        verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
4351        return -EFAULT;
4352    }
4353
4354    /* ARG_PTR_TO_MEM + RDONLY is compatible with PTR_TO_MEM and PTR_TO_MEM + RDONLY,
4355     * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM and NOT with PTR_TO_MEM + RDONLY
4356     *
4357     * Same for MAYBE_NULL:
4358     *
4359     * ARG_PTR_TO_MEM + MAYBE_NULL is compatible with PTR_TO_MEM and PTR_TO_MEM + MAYBE_NULL,
4360     * but ARG_PTR_TO_MEM is compatible only with PTR_TO_MEM but NOT with PTR_TO_MEM + MAYBE_NULL
4361     *
4362     * Therefore we fold these flags depending on the arg_type before comparison.
4363     */
4364    if (arg_type & MEM_RDONLY) {
4365        type &= ~MEM_RDONLY;
4366    }
4367    if (arg_type & PTR_MAYBE_NULL) {
4368        type &= ~PTR_MAYBE_NULL;
4369    }
4370
4371    for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
4372        expected = compatible->types[i];
4373        if (expected == NOT_INIT) {
4374            break;
4375        }
4376
4377        if (type == expected) {
4378            goto found;
4379        }
4380    }
4381
4382    verbose(env, "R%d type=%s expected=", regno, reg_type_str(env, reg->type));
4383    for (j = 0; j + 1 < i; j++) {
4384        verbose(env, "%s, ", reg_type_str(env, compatible->types[j]));
4385    }
4386    verbose(env, "%s\n", reg_type_str(env, compatible->types[j]));
4387    return -EACCES;
4388
4389found:
4390    if (reg->type == PTR_TO_BTF_ID) {
4391        if (!arg_btf_id) {
4392            if (!compatible->btf_id) {
4393                verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
4394                return -EFAULT;
4395            }
4396            arg_btf_id = compatible->btf_id;
4397        }
4398
4399        if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, *arg_btf_id)) {
4400            verbose(env, "R%d is of type %s but %s is expected\n", regno, kernel_type_name(reg->btf_id),
4401                    kernel_type_name(*arg_btf_id));
4402            return -EACCES;
4403        }
4404    }
4405
4406    return 0;
4407}
4408
4409static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta,
4410                          const struct bpf_func_proto *fn)
4411{
4412    u32 regno = BPF_REG_1 + arg;
4413    struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
4414    enum bpf_arg_type arg_type = fn->arg_type[arg];
4415    enum bpf_reg_type type = reg->type;
4416    int err = 0;
4417
4418    if (arg_type == ARG_DONTCARE) {
4419        return 0;
4420    }
4421
4422    err = check_reg_arg(env, regno, SRC_OP);
4423    if (err) {
4424        return err;
4425    }
4426
4427    if (arg_type == ARG_ANYTHING) {
4428        if (is_pointer_value(env, regno)) {
4429            verbose(env, "R%d leaks addr into helper function\n", regno);
4430            return -EACCES;
4431        }
4432        return 0;
4433    }
4434
4435    if (type_is_pkt_pointer(type) && !may_access_direct_pkt_data(env, meta, BPF_READ)) {
4436        verbose(env, "helper access to the packet is not allowed\n");
4437        return -EACCES;
4438    }
4439
4440    if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4441        err = resolve_map_arg_type(env, meta, &arg_type);
4442        if (err) {
4443            return err;
4444        }
4445    }
4446
4447    if (register_is_null(reg) && type_may_be_null(arg_type)) {
4448        /* A NULL register has a SCALAR_VALUE type, so skip
4449         * type checking.
4450         */
4451        goto skip_type_check;
4452    }
4453
4454    err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
4455    if (err) {
4456        return err;
4457    }
4458
4459    switch ((u32)type) {
4460        case SCALAR_VALUE:
4461        /* Pointer types where reg offset is explicitly allowed: */
4462        case PTR_TO_PACKET:
4463        case PTR_TO_PACKET_META:
4464        case PTR_TO_MAP_VALUE:
4465        case PTR_TO_MEM:
4466        case PTR_TO_MEM | MEM_RDONLY:
4467        case PTR_TO_MEM | MEM_ALLOC:
4468        case PTR_TO_BUF:
4469        case PTR_TO_BUF | MEM_RDONLY:
4470        case PTR_TO_STACK:
4471            /* Some of the argument types nevertheless require a
4472             * zero register offset.
4473             */
4474            if (arg_type == ARG_PTR_TO_ALLOC_MEM) {
4475                goto force_off_check;
4476            }
4477            break;
4478        /* All the rest must be rejected: */
4479        default:
4480            force_off_check:
4481            err = __check_ptr_off_reg(env, reg, regno, type == PTR_TO_BTF_ID);
4482            if (err < 0) {
4483                return err;
4484            }
4485            break;
4486    }
4487
4488skip_type_check:
4489    if (reg->ref_obj_id) {
4490        if (meta->ref_obj_id) {
4491            verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno,
4492                    reg->ref_obj_id, meta->ref_obj_id);
4493            return -EFAULT;
4494        }
4495        meta->ref_obj_id = reg->ref_obj_id;
4496    }
4497
4498    if (arg_type == ARG_CONST_MAP_PTR) {
4499        /* bpf_map_xxx(map_ptr) call: remember that map_ptr */
4500        meta->map_ptr = reg->map_ptr;
4501    } else if (arg_type == ARG_PTR_TO_MAP_KEY) {
4502        /* bpf_map_xxx(..., map_ptr, ..., key) call:
4503         * check that [key, key + map->key_size) are within
4504         * stack limits and initialized
4505         */
4506        if (!meta->map_ptr) {
4507            /* in function declaration map_ptr must come before
4508             * map_key, so that it's verified and known before
4509             * we have to check map_key here. Otherwise it means
4510             * that kernel subsystem misconfigured verifier
4511             */
4512            verbose(env, "invalid map_ptr to access map->key\n");
4513            return -EACCES;
4514        }
4515        err = check_helper_mem_access(env, regno, meta->map_ptr->key_size, false, NULL);
4516    } else if (base_type(arg_type) == ARG_PTR_TO_MAP_VALUE || base_type(arg_type) == ARG_PTR_TO_UNINIT_MAP_VALUE) {
4517        if (type_may_be_null(arg_type) && register_is_null(reg)) {
4518            return 0;
4519        }
4520
4521        /* bpf_map_xxx(..., map_ptr, ..., value) call:
4522         * check [value, value + map->value_size) validity
4523         */
4524        if (!meta->map_ptr) {
4525            /* kernel subsystem misconfigured verifier */
4526            verbose(env, "invalid map_ptr to access map->value\n");
4527            return -EACCES;
4528        }
4529        meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE);
4530        err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta);
4531    } else if (arg_type == ARG_PTR_TO_PERCPU_BTF_ID) {
4532        if (!reg->btf_id) {
4533            verbose(env, "Helper has invalid btf_id in R%d\n", regno);
4534            return -EACCES;
4535        }
4536        meta->ret_btf_id = reg->btf_id;
4537    } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
4538        if (meta->func_id == BPF_FUNC_spin_lock) {
4539            if (process_spin_lock(env, regno, true)) {
4540                return -EACCES;
4541            }
4542        } else if (meta->func_id == BPF_FUNC_spin_unlock) {
4543            if (process_spin_lock(env, regno, false)) {
4544                return -EACCES;
4545            }
4546        } else {
4547            verbose(env, "verifier internal error\n");
4548            return -EFAULT;
4549        }
4550    } else if (arg_type_is_mem_ptr(arg_type)) {
4551        /* The access to this pointer is only checked when we hit the
4552         * next is_mem_size argument below.
4553         */
4554        meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
4555    } else if (arg_type_is_mem_size(arg_type)) {
4556        bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
4557
4558        /* This is used to refine r0 return value bounds for helpers
4559         * that enforce this value as an upper bound on return values.
4560         * See do_refine_retval_range() for helpers that can refine
4561         * the return value. C type of helper is u32 so we pull register
4562         * bound from umax_value however, if negative verifier errors
4563         * out. Only upper bounds can be learned because retval is an
4564         * int type and negative retvals are allowed.
4565         */
4566        meta->msize_max_value = reg->umax_value;
4567
4568        /* The register is SCALAR_VALUE; the access check
4569         * happens using its boundaries.
4570         */
4571        if (!tnum_is_const(reg->var_off)) {
4572            /* For unprivileged variable accesses, disable raw
4573             * mode so that the program is required to
4574             * initialize all the memory that the helper could
4575             * just partially fill up.
4576             */
4577            meta = NULL;
4578        }
4579
4580        if (reg->smin_value < 0) {
4581            verbose(env, "R%d min value is negative, either use unsigned or 'var &= const'\n", regno);
4582            return -EACCES;
4583        }
4584
4585        if (reg->umin_value == 0) {
4586            err = check_helper_mem_access(env, regno - 1, 0, zero_size_allowed, meta);
4587            if (err) {
4588                return err;
4589            }
4590        }
4591
4592        if (reg->umax_value >= BPF_MAX_VAR_SIZ) {
4593            verbose(env, "R%d unbounded memory access, use 'var &= const' or 'if (var < const)'\n", regno);
4594            return -EACCES;
4595        }
4596        err = check_helper_mem_access(env, regno - 1, reg->umax_value, zero_size_allowed, meta);
4597        if (!err) {
4598            err = mark_chain_precision(env, regno);
4599        }
4600    } else if (arg_type_is_alloc_size(arg_type)) {
4601        if (!tnum_is_const(reg->var_off)) {
4602            verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n", regno);
4603            return -EACCES;
4604        }
4605        meta->mem_size = reg->var_off.value;
4606    } else if (arg_type_is_int_ptr(arg_type)) {
4607        int size = int_ptr_type_to_size(arg_type);
4608
4609        err = check_helper_mem_access(env, regno, size, false, meta);
4610        if (err) {
4611            return err;
4612        }
4613        err = check_ptr_alignment(env, reg, 0, size, true);
4614    }
4615
4616    return err;
4617}
4618
4619static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
4620{
4621    enum bpf_attach_type eatype = env->prog->expected_attach_type;
4622    enum bpf_prog_type type = resolve_prog_type(env->prog);
4623
4624    if (func_id != BPF_FUNC_map_update_elem) {
4625        return false;
4626    }
4627
4628    /* It's not possible to get access to a locked struct sock in these
4629     * contexts, so updating is safe.
4630     */
4631    switch (type) {
4632        case BPF_PROG_TYPE_TRACING:
4633            if (eatype == BPF_TRACE_ITER) {
4634                return true;
4635            }
4636            break;
4637        case BPF_PROG_TYPE_SOCKET_FILTER:
4638        case BPF_PROG_TYPE_SCHED_CLS:
4639        case BPF_PROG_TYPE_SCHED_ACT:
4640        case BPF_PROG_TYPE_XDP:
4641        case BPF_PROG_TYPE_SK_REUSEPORT:
4642        case BPF_PROG_TYPE_FLOW_DISSECTOR:
4643        case BPF_PROG_TYPE_SK_LOOKUP:
4644            return true;
4645        default:
4646            break;
4647    }
4648
4649    verbose(env, "cannot update sockmap in this context\n");
4650    return false;
4651}
4652
4653static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
4654{
4655    return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
4656}
4657
4658static int check_map_func_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, int func_id)
4659{
4660    if (!map) {
4661        return 0;
4662    }
4663
4664    /* We need a two way check, first is from map perspective ... */
4665    switch (map->map_type) {
4666        case BPF_MAP_TYPE_PROG_ARRAY:
4667            if (func_id != BPF_FUNC_tail_call) {
4668                goto error;
4669            }
4670            break;
4671        case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
4672            if (func_id != BPF_FUNC_perf_event_read && func_id != BPF_FUNC_perf_event_output &&
4673                func_id != BPF_FUNC_skb_output && func_id != BPF_FUNC_perf_event_read_value &&
4674                func_id != BPF_FUNC_xdp_output) {
4675                goto error;
4676            }
4677            break;
4678        case BPF_MAP_TYPE_RINGBUF:
4679            if (func_id != BPF_FUNC_ringbuf_output && func_id != BPF_FUNC_ringbuf_reserve &&
4680                func_id != BPF_FUNC_ringbuf_query) {
4681                goto error;
4682            }
4683            break;
4684        case BPF_MAP_TYPE_STACK_TRACE:
4685            if (func_id != BPF_FUNC_get_stackid) {
4686                goto error;
4687            }
4688            break;
4689        case BPF_MAP_TYPE_CGROUP_ARRAY:
4690            if (func_id != BPF_FUNC_skb_under_cgroup && func_id != BPF_FUNC_current_task_under_cgroup) {
4691                goto error;
4692            }
4693            break;
4694        case BPF_MAP_TYPE_CGROUP_STORAGE:
4695        case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
4696            if (func_id != BPF_FUNC_get_local_storage) {
4697                goto error;
4698            }
4699            break;
4700        case BPF_MAP_TYPE_DEVMAP:
4701        case BPF_MAP_TYPE_DEVMAP_HASH:
4702            if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) {
4703                goto error;
4704            }
4705            break;
4706        /* Restrict bpf side of cpumap and xskmap, open when use-cases
4707         * appear.
4708         */
4709        case BPF_MAP_TYPE_CPUMAP:
4710            if (func_id != BPF_FUNC_redirect_map) {
4711                goto error;
4712            }
4713            break;
4714        case BPF_MAP_TYPE_XSKMAP:
4715            if (func_id != BPF_FUNC_redirect_map && func_id != BPF_FUNC_map_lookup_elem) {
4716                goto error;
4717            }
4718            break;
4719        case BPF_MAP_TYPE_ARRAY_OF_MAPS:
4720        case BPF_MAP_TYPE_HASH_OF_MAPS:
4721            if (func_id != BPF_FUNC_map_lookup_elem) {
4722                goto error;
4723            }
4724            break;
4725        case BPF_MAP_TYPE_SOCKMAP:
4726            if (func_id != BPF_FUNC_sk_redirect_map && func_id != BPF_FUNC_sock_map_update &&
4727                func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map &&
4728                func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem &&
4729                !may_update_sockmap(env, func_id)) {
4730                goto error;
4731            }
4732            break;
4733        case BPF_MAP_TYPE_SOCKHASH:
4734            if (func_id != BPF_FUNC_sk_redirect_hash && func_id != BPF_FUNC_sock_hash_update &&
4735                func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash &&
4736                func_id != BPF_FUNC_sk_select_reuseport && func_id != BPF_FUNC_map_lookup_elem &&
4737                !may_update_sockmap(env, func_id)) {
4738                goto error;
4739            }
4740            break;
4741        case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
4742            if (func_id != BPF_FUNC_sk_select_reuseport) {
4743                goto error;
4744            }
4745            break;
4746        case BPF_MAP_TYPE_QUEUE:
4747        case BPF_MAP_TYPE_STACK:
4748            if (func_id != BPF_FUNC_map_peek_elem && func_id != BPF_FUNC_map_pop_elem &&
4749                func_id != BPF_FUNC_map_push_elem) {
4750                goto error;
4751            }
4752            break;
4753        case BPF_MAP_TYPE_SK_STORAGE:
4754            if (func_id != BPF_FUNC_sk_storage_get && func_id != BPF_FUNC_sk_storage_delete) {
4755                goto error;
4756            }
4757            break;
4758        case BPF_MAP_TYPE_INODE_STORAGE:
4759            if (func_id != BPF_FUNC_inode_storage_get && func_id != BPF_FUNC_inode_storage_delete) {
4760                goto error;
4761            }
4762            break;
4763        default:
4764            break;
4765    }
4766
4767    /* ... and second from the function itself. */
4768    switch (func_id) {
4769        case BPF_FUNC_tail_call:
4770            if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
4771                goto error;
4772            }
4773            if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
4774                verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
4775                return -EINVAL;
4776            }
4777            break;
4778        case BPF_FUNC_perf_event_read:
4779        case BPF_FUNC_perf_event_output:
4780        case BPF_FUNC_perf_event_read_value:
4781        case BPF_FUNC_skb_output:
4782        case BPF_FUNC_xdp_output:
4783            if (map->map_type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) {
4784                goto error;
4785            }
4786            break;
4787        case BPF_FUNC_ringbuf_output:
4788        case BPF_FUNC_ringbuf_reserve:
4789        case BPF_FUNC_ringbuf_query:
4790            if (map->map_type != BPF_MAP_TYPE_RINGBUF) {
4791                goto error;
4792            }
4793            break;
4794        case BPF_FUNC_get_stackid:
4795            if (map->map_type != BPF_MAP_TYPE_STACK_TRACE) {
4796                goto error;
4797            }
4798            break;
4799        case BPF_FUNC_current_task_under_cgroup:
4800        case BPF_FUNC_skb_under_cgroup:
4801            if (map->map_type != BPF_MAP_TYPE_CGROUP_ARRAY) {
4802                goto error;
4803            }
4804            break;
4805        case BPF_FUNC_redirect_map:
4806            if (map->map_type != BPF_MAP_TYPE_DEVMAP && map->map_type != BPF_MAP_TYPE_DEVMAP_HASH &&
4807                map->map_type != BPF_MAP_TYPE_CPUMAP && map->map_type != BPF_MAP_TYPE_XSKMAP) {
4808                goto error;
4809            }
4810            break;
4811        case BPF_FUNC_sk_redirect_map:
4812        case BPF_FUNC_msg_redirect_map:
4813        case BPF_FUNC_sock_map_update:
4814            if (map->map_type != BPF_MAP_TYPE_SOCKMAP) {
4815                goto error;
4816            }
4817            break;
4818        case BPF_FUNC_sk_redirect_hash:
4819        case BPF_FUNC_msg_redirect_hash:
4820        case BPF_FUNC_sock_hash_update:
4821            if (map->map_type != BPF_MAP_TYPE_SOCKHASH) {
4822                goto error;
4823            }
4824            break;
4825        case BPF_FUNC_get_local_storage:
4826            if (map->map_type != BPF_MAP_TYPE_CGROUP_STORAGE && map->map_type != BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) {
4827                goto error;
4828            }
4829            break;
4830        case BPF_FUNC_sk_select_reuseport:
4831            if (map->map_type != BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && map->map_type != BPF_MAP_TYPE_SOCKMAP &&
4832                map->map_type != BPF_MAP_TYPE_SOCKHASH) {
4833                goto error;
4834            }
4835            break;
4836        case BPF_FUNC_map_peek_elem:
4837        case BPF_FUNC_map_pop_elem:
4838        case BPF_FUNC_map_push_elem:
4839            if (map->map_type != BPF_MAP_TYPE_QUEUE && map->map_type != BPF_MAP_TYPE_STACK) {
4840                goto error;
4841            }
4842            break;
4843        case BPF_FUNC_sk_storage_get:
4844        case BPF_FUNC_sk_storage_delete:
4845            if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) {
4846                goto error;
4847            }
4848            break;
4849        case BPF_FUNC_inode_storage_get:
4850        case BPF_FUNC_inode_storage_delete:
4851            if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) {
4852                goto error;
4853            }
4854            break;
4855        default:
4856            break;
4857    }
4858
4859    return 0;
4860error:
4861    verbose(env, "cannot pass map_type %d into func %s#%d\n", map->map_type, func_id_name(func_id), func_id);
4862    return -EINVAL;
4863}
4864
4865static bool check_raw_mode_ok(const struct bpf_func_proto *fn)
4866{
4867    int count = 0;
4868
4869    if (fn->arg1_type == ARG_PTR_TO_UNINIT_MEM) {
4870        count++;
4871    }
4872    if (fn->arg2_type == ARG_PTR_TO_UNINIT_MEM) {
4873        count++;
4874    }
4875    if (fn->arg3_type == ARG_PTR_TO_UNINIT_MEM) {
4876        count++;
4877    }
4878    if (fn->arg4_type == ARG_PTR_TO_UNINIT_MEM) {
4879        count++;
4880    }
4881    if (fn->arg5_type == ARG_PTR_TO_UNINIT_MEM) {
4882        count++;
4883    }
4884
4885    /* We only support one arg being in raw mode at the moment,
4886     * which is sufficient for the helper functions we have
4887     * right now.
4888     */
4889    return count <= 1;
4890}
4891
4892static bool check_args_pair_invalid(enum bpf_arg_type arg_curr, enum bpf_arg_type arg_next)
4893{
4894    return (arg_type_is_mem_ptr(arg_curr) && !arg_type_is_mem_size(arg_next)) ||
4895           (!arg_type_is_mem_ptr(arg_curr) && arg_type_is_mem_size(arg_next));
4896}
4897
4898static bool check_arg_pair_ok(const struct bpf_func_proto *fn)
4899{
4900    /* bpf_xxx(..., buf, len) call will access 'len'
4901     * bytes from memory 'buf'. Both arg types need
4902     * to be paired, so make sure there's no buggy
4903     * helper function specification.
4904     */
4905    if (arg_type_is_mem_size(fn->arg1_type) || arg_type_is_mem_ptr(fn->arg5_type) ||
4906        check_args_pair_invalid(fn->arg1_type, fn->arg2_type) ||
4907        check_args_pair_invalid(fn->arg2_type, fn->arg3_type) ||
4908        check_args_pair_invalid(fn->arg3_type, fn->arg4_type) ||
4909        check_args_pair_invalid(fn->arg4_type, fn->arg5_type)) {
4910        return false;
4911    }
4912
4913    return true;
4914}
4915
4916static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
4917{
4918    int count = 0;
4919
4920    if (arg_type_may_be_refcounted(fn->arg1_type)) {
4921        count++;
4922    }
4923    if (arg_type_may_be_refcounted(fn->arg2_type)) {
4924        count++;
4925    }
4926    if (arg_type_may_be_refcounted(fn->arg3_type)) {
4927        count++;
4928    }
4929    if (arg_type_may_be_refcounted(fn->arg4_type)) {
4930        count++;
4931    }
4932    if (arg_type_may_be_refcounted(fn->arg5_type)) {
4933        count++;
4934    }
4935
4936    /* A reference acquiring function cannot acquire
4937     * another refcounted ptr.
4938     */
4939    if (may_be_acquire_function(func_id) && count) {
4940        return false;
4941    }
4942
4943    /* We only support one arg being unreferenced at the moment,
4944     * which is sufficient for the helper functions we have right now.
4945     */
4946    return count <= 1;
4947}
4948
4949static bool check_btf_id_ok(const struct bpf_func_proto *fn)
4950{
4951    int i;
4952
4953    for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
4954        if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) {
4955            return false;
4956        }
4957
4958        if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) {
4959            return false;
4960        }
4961    }
4962
4963    return true;
4964}
4965
4966static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
4967{
4968    return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && check_btf_id_ok(fn) && check_refcount_ok(fn, func_id)
4969               ? 0
4970               : -EINVAL;
4971}
4972
4973/* Packet data might have moved, any old PTR_TO_PACKET[_META,_END]
4974 * are now invalid, so turn them into unknown SCALAR_VALUE.
4975 */
4976static void __clear_all_pkt_pointers(struct bpf_verifier_env *env, struct bpf_func_state *state)
4977{
4978    struct bpf_reg_state *regs = state->regs, *reg;
4979    int i;
4980
4981    for (i = 0; i < MAX_BPF_REG; i++) {
4982        if (reg_is_pkt_pointer_any(&regs[i])) {
4983            mark_reg_unknown(env, regs, i);
4984        }
4985    }
4986
4987    bpf_for_each_spilled_reg(i, state, reg)
4988    {
4989        if (!reg) {
4990            continue;
4991        }
4992        if (reg_is_pkt_pointer_any(reg)) {
4993            __mark_reg_unknown(env, reg);
4994        }
4995    }
4996}
4997
4998static void clear_all_pkt_pointers(struct bpf_verifier_env *env)
4999{
5000    struct bpf_verifier_state *vstate = env->cur_state;
5001    int i;
5002
5003    for (i = 0; i <= vstate->curframe; i++) {
5004        __clear_all_pkt_pointers(env, vstate->frame[i]);
5005    }
5006}
5007
5008static void release_reg_references(struct bpf_verifier_env *env, struct bpf_func_state *state, int ref_obj_id)
5009{
5010    struct bpf_reg_state *regs = state->regs, *reg;
5011    int i;
5012
5013    for (i = 0; i < MAX_BPF_REG; i++) {
5014        if (regs[i].ref_obj_id == ref_obj_id) {
5015            mark_reg_unknown(env, regs, i);
5016        }
5017    }
5018
5019    bpf_for_each_spilled_reg(i, state, reg)
5020    {
5021        if (!reg) {
5022            continue;
5023        }
5024        if (reg->ref_obj_id == ref_obj_id) {
5025            __mark_reg_unknown(env, reg);
5026        }
5027    }
5028}
5029
5030/* The pointer with the specified id has released its reference to kernel
5031 * resources. Identify all copies of the same pointer and clear the reference.
5032 */
5033static int release_reference(struct bpf_verifier_env *env, int ref_obj_id)
5034{
5035    struct bpf_verifier_state *vstate = env->cur_state;
5036    int err;
5037    int i;
5038
5039    err = release_reference_state(cur_func(env), ref_obj_id);
5040    if (err) {
5041        return err;
5042    }
5043
5044    for (i = 0; i <= vstate->curframe; i++) {
5045        release_reg_references(env, vstate->frame[i], ref_obj_id);
5046    }
5047
5048    return 0;
5049}
5050
5051static void clear_caller_saved_regs(struct bpf_verifier_env *env, struct bpf_reg_state *regs)
5052{
5053    int i;
5054
5055    /* after the call registers r0 - r5 were scratched */
5056    for (i = 0; i < CALLER_SAVED_REGS; i++) {
5057        mark_reg_not_init(env, regs, caller_saved[i]);
5058        check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5059    }
5060}
5061
5062static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)
5063{
5064    struct bpf_verifier_state *state = env->cur_state;
5065    struct bpf_func_info_aux *func_info_aux;
5066    struct bpf_func_state *caller, *callee;
5067    int i, err, subprog, target_insn;
5068    bool is_global = false;
5069
5070    if (state->curframe + 1 >= MAX_CALL_FRAMES) {
5071        verbose(env, "the call stack of %d frames is too deep\n", state->curframe + 2);
5072        return -E2BIG;
5073    }
5074
5075    target_insn = *insn_idx + insn->imm;
5076    subprog = find_subprog(env, target_insn + 1);
5077    if (subprog < 0) {
5078        verbose(env, "verifier bug. No program starts at insn %d\n", target_insn + 1);
5079        return -EFAULT;
5080    }
5081
5082    caller = state->frame[state->curframe];
5083    if (state->frame[state->curframe + 1]) {
5084        verbose(env, "verifier bug. Frame %d already allocated\n", state->curframe + 1);
5085        return -EFAULT;
5086    }
5087
5088    func_info_aux = env->prog->aux->func_info_aux;
5089    if (func_info_aux) {
5090        is_global = func_info_aux[subprog].linkage == BTF_FUNC_GLOBAL;
5091    }
5092    err = btf_check_func_arg_match(env, subprog, caller->regs);
5093    if (err == -EFAULT) {
5094        return err;
5095    }
5096    if (is_global) {
5097        if (err) {
5098            verbose(env, "Caller passes invalid args into func#%d\n", subprog);
5099            return err;
5100        } else {
5101            if (env->log.level & BPF_LOG_LEVEL) {
5102                verbose(env, "Func#%d is global and valid. Skipping.\n", subprog);
5103            }
5104            clear_caller_saved_regs(env, caller->regs);
5105
5106            /* All global functions return a 64-bit SCALAR_VALUE */
5107            mark_reg_unknown(env, caller->regs, BPF_REG_0);
5108            caller->regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5109
5110            /* continue with next insn after call */
5111            return 0;
5112        }
5113    }
5114
5115    callee = kzalloc(sizeof(*callee), GFP_KERNEL);
5116    if (!callee) {
5117        return -ENOMEM;
5118    }
5119    state->frame[state->curframe + 1] = callee;
5120
5121    /* callee cannot access r0, r6 - r9 for reading and has to write
5122     * into its own stack before reading from it.
5123     * callee can read/write into caller's stack
5124     */
5125    init_func_state(env, callee,
5126                    /* remember the callsite, it will be used by bpf_exit */
5127                    *insn_idx /* callsite */, state->curframe + 1 /* frameno within this callchain */,
5128                    subprog /* subprog number within this prog */);
5129
5130    /* Transfer references to the callee */
5131    err = transfer_reference_state(callee, caller);
5132    if (err) {
5133        return err;
5134    }
5135
5136    /* copy r1 - r5 args that callee can access.  The copy includes parent
5137     * pointers, which connects us up to the liveness chain
5138     */
5139    for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
5140        callee->regs[i] = caller->regs[i];
5141    }
5142
5143    clear_caller_saved_regs(env, caller->regs);
5144
5145    /* only increment it after check_reg_arg() finished */
5146    state->curframe++;
5147
5148    /* and go analyze first insn of the callee */
5149    *insn_idx = target_insn;
5150
5151    if (env->log.level & BPF_LOG_LEVEL) {
5152        verbose(env, "caller:\n");
5153        print_verifier_state(env, caller);
5154        verbose(env, "callee:\n");
5155        print_verifier_state(env, callee);
5156    }
5157    return 0;
5158}
5159
5160static int prepare_func_exit(struct bpf_verifier_env *env, int *insn_idx)
5161{
5162    struct bpf_verifier_state *state = env->cur_state;
5163    struct bpf_func_state *caller, *callee;
5164    struct bpf_reg_state *r0;
5165    int err;
5166
5167    callee = state->frame[state->curframe];
5168    r0 = &callee->regs[BPF_REG_0];
5169    if (r0->type == PTR_TO_STACK) {
5170        /* technically it's ok to return caller's stack pointer
5171         * (or caller's caller's pointer) back to the caller,
5172         * since these pointers are valid. Only current stack
5173         * pointer will be invalid as soon as function exits,
5174         * but let's be conservative
5175         */
5176        verbose(env, "cannot return stack pointer to the caller\n");
5177        return -EINVAL;
5178    }
5179
5180    state->curframe--;
5181    caller = state->frame[state->curframe];
5182    /* return to the caller whatever r0 had in the callee */
5183    caller->regs[BPF_REG_0] = *r0;
5184
5185    /* Transfer references to the caller */
5186    err = transfer_reference_state(caller, callee);
5187    if (err) {
5188        return err;
5189    }
5190
5191    *insn_idx = callee->callsite + 1;
5192    if (env->log.level & BPF_LOG_LEVEL) {
5193        verbose(env, "returning from callee:\n");
5194        print_verifier_state(env, callee);
5195        verbose(env, "to caller at %d:\n", *insn_idx);
5196        print_verifier_state(env, caller);
5197    }
5198    /* clear everything in the callee */
5199    free_func_state(callee);
5200    state->frame[state->curframe + 1] = NULL;
5201    return 0;
5202}
5203
5204static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, int func_id,
5205                                   struct bpf_call_arg_meta *meta)
5206{
5207    struct bpf_reg_state *ret_reg = &regs[BPF_REG_0];
5208
5209    if (ret_type != RET_INTEGER ||
5210        (func_id != BPF_FUNC_get_stack && func_id != BPF_FUNC_probe_read_str &&
5211         func_id != BPF_FUNC_probe_read_kernel_str && func_id != BPF_FUNC_probe_read_user_str)) {
5212        return;
5213    }
5214
5215    ret_reg->smax_value = meta->msize_max_value;
5216    ret_reg->s32_max_value = meta->msize_max_value;
5217    ret_reg->smin_value = -MAX_ERRNO;
5218    ret_reg->s32_min_value = -MAX_ERRNO;
5219    reg_bounds_sync(ret_reg);
5220}
5221
5222static int record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)
5223{
5224    struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5225    struct bpf_map *map = meta->map_ptr;
5226
5227    if (func_id != BPF_FUNC_tail_call && func_id != BPF_FUNC_map_lookup_elem && func_id != BPF_FUNC_map_update_elem &&
5228        func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_map_push_elem && func_id != BPF_FUNC_map_pop_elem &&
5229        func_id != BPF_FUNC_map_peek_elem) {
5230        return 0;
5231    }
5232
5233    if (map == NULL) {
5234        verbose(env, "kernel subsystem misconfigured verifier\n");
5235        return -EINVAL;
5236    }
5237
5238    /* In case of read-only, some additional restrictions
5239     * need to be applied in order to prevent altering the
5240     * state of the map from program side.
5241     */
5242    if ((map->map_flags & BPF_F_RDONLY_PROG) &&
5243        (func_id == BPF_FUNC_map_delete_elem || func_id == BPF_FUNC_map_update_elem ||
5244         func_id == BPF_FUNC_map_push_elem || func_id == BPF_FUNC_map_pop_elem)) {
5245        verbose(env, "write into map forbidden\n");
5246        return -EACCES;
5247    }
5248
5249    if (!BPF_MAP_PTR(aux->map_ptr_state)) {
5250        bpf_map_ptr_store(aux, meta->map_ptr, !meta->map_ptr->bypass_spec_v1);
5251    } else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr) {
5252        bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON, !meta->map_ptr->bypass_spec_v1);
5253    }
5254    return 0;
5255}
5256
5257static int record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta, int func_id, int insn_idx)
5258{
5259    struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
5260    struct bpf_reg_state *regs = cur_regs(env), *reg;
5261    struct bpf_map *map = meta->map_ptr;
5262    u64 val;
5263    int err;
5264
5265    if (func_id != BPF_FUNC_tail_call) {
5266        return 0;
5267    }
5268    if (!map || map->map_type != BPF_MAP_TYPE_PROG_ARRAY) {
5269        verbose(env, "kernel subsystem misconfigured verifier\n");
5270        return -EINVAL;
5271    }
5272
5273    reg = &regs[BPF_REG_3];
5274    val = reg->var_off.value;
5275    max = map->max_entries;
5276
5277    if (!(register_is_const(reg) && val < max)) {
5278        bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5279        return 0;
5280    }
5281
5282    err = mark_chain_precision(env, BPF_REG_3);
5283    if (err) {
5284        return err;
5285    }
5286
5287    if (bpf_map_key_unseen(aux)) {
5288        bpf_map_key_store(aux, val);
5289    } else if (!bpf_map_key_poisoned(aux) && bpf_map_key_immediate(aux) != val) {
5290        bpf_map_key_store(aux, BPF_MAP_KEY_POISON);
5291    }
5292    return 0;
5293}
5294
5295static int check_reference_leak(struct bpf_verifier_env *env)
5296{
5297    struct bpf_func_state *state = cur_func(env);
5298    int i;
5299
5300    for (i = 0; i < state->acquired_refs; i++) {
5301        verbose(env, "Unreleased reference id=%d alloc_insn=%d\n", state->refs[i].id, state->refs[i].insn_idx);
5302    }
5303    return state->acquired_refs ? -EINVAL : 0;
5304}
5305
5306static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn_idx)
5307{
5308    const struct bpf_func_proto *fn = NULL;
5309    enum bpf_return_type ret_type;
5310    enum bpf_type_flag ret_flag;
5311    struct bpf_reg_state *regs;
5312    struct bpf_call_arg_meta meta;
5313    bool changes_data;
5314    int i, err;
5315
5316    /* find function prototype */
5317    if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
5318        verbose(env, "invalid func %s#%d\n", func_id_name(func_id), func_id);
5319        return -EINVAL;
5320    }
5321
5322    if (env->ops->get_func_proto) {
5323        fn = env->ops->get_func_proto(func_id, env->prog);
5324    }
5325    if (!fn) {
5326        verbose(env, "unknown func %s#%d\n", func_id_name(func_id), func_id);
5327        return -EINVAL;
5328    }
5329
5330    /* eBPF programs must be GPL compatible to use GPL-ed functions */
5331    if (!env->prog->gpl_compatible && fn->gpl_only) {
5332        verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
5333        return -EINVAL;
5334    }
5335
5336    if (fn->allowed && !fn->allowed(env->prog)) {
5337        verbose(env, "helper call is not allowed in probe\n");
5338        return -EINVAL;
5339    }
5340
5341    /* With LD_ABS/IND some JITs save/restore skb from r1. */
5342    changes_data = bpf_helper_changes_pkt_data(fn->func);
5343    if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
5344        verbose(env, "kernel subsystem misconfigured func %s#%d: r1 != ctx\n", func_id_name(func_id), func_id);
5345        return -EINVAL;
5346    }
5347
5348    memset(&meta, 0, sizeof(meta));
5349    meta.pkt_access = fn->pkt_access;
5350
5351    err = check_func_proto(fn, func_id);
5352    if (err) {
5353        verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(func_id), func_id);
5354        return err;
5355    }
5356
5357    meta.func_id = func_id;
5358    /* check args */
5359    for (i = 0; i < 5; i++) {
5360        err = check_func_arg(env, i, &meta, fn);
5361        if (err) {
5362            return err;
5363        }
5364    }
5365
5366    err = record_func_map(env, &meta, func_id, insn_idx);
5367    if (err) {
5368        return err;
5369    }
5370
5371    err = record_func_key(env, &meta, func_id, insn_idx);
5372    if (err) {
5373        return err;
5374    }
5375
5376    /* Mark slots with STACK_MISC in case of raw mode, stack offset
5377     * is inferred from register state.
5378     */
5379    for (i = 0; i < meta.access_size; i++) {
5380        err = check_mem_access(env, insn_idx, meta.regno, i, BPF_B, BPF_WRITE, -1, false);
5381        if (err) {
5382            return err;
5383        }
5384    }
5385
5386    if (func_id == BPF_FUNC_tail_call) {
5387        err = check_reference_leak(env);
5388        if (err) {
5389            verbose(env, "tail_call would lead to reference leak\n");
5390            return err;
5391        }
5392    } else if (is_release_function(func_id)) {
5393        err = release_reference(env, meta.ref_obj_id);
5394        if (err) {
5395            verbose(env, "func %s#%d reference has not been acquired before\n", func_id_name(func_id), func_id);
5396            return err;
5397        }
5398    }
5399
5400    regs = cur_regs(env);
5401    /* check that flags argument in get_local_storage(map, flags) is 0,
5402     * this is required because get_local_storage() can't return an error.
5403     */
5404    if (func_id == BPF_FUNC_get_local_storage && !register_is_null(&regs[BPF_REG_2])) {
5405        verbose(env, "get_local_storage() doesn't support non-zero flags\n");
5406        return -EINVAL;
5407    }
5408
5409    /* reset caller saved regs */
5410    for (i = 0; i < CALLER_SAVED_REGS; i++) {
5411        mark_reg_not_init(env, regs, caller_saved[i]);
5412        check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
5413    }
5414
5415    /* helper call returns 64-bit value. */
5416    regs[BPF_REG_0].subreg_def = DEF_NOT_SUBREG;
5417
5418    /* update return register (already marked as written above) */
5419    ret_type = fn->ret_type;
5420    ret_flag = type_flag(fn->ret_type);
5421    if (ret_type == RET_INTEGER) {
5422        /* sets type to SCALAR_VALUE */
5423        mark_reg_unknown(env, regs, BPF_REG_0);
5424    } else if (ret_type == RET_VOID) {
5425        regs[BPF_REG_0].type = NOT_INIT;
5426    } else if (base_type(ret_type) == RET_PTR_TO_MAP_VALUE) {
5427        /* There is no offset yet applied, variable or fixed */
5428        mark_reg_known_zero(env, regs, BPF_REG_0);
5429        /* remember map_ptr, so that check_map_access()
5430         * can check 'value_size' boundary of memory access
5431         * to map element returned from bpf_map_lookup_elem()
5432         */
5433        if (meta.map_ptr == NULL) {
5434            verbose(env, "kernel subsystem misconfigured verifier\n");
5435            return -EINVAL;
5436        }
5437        regs[BPF_REG_0].map_ptr = meta.map_ptr;
5438        regs[BPF_REG_0].type = PTR_TO_MAP_VALUE | ret_flag;
5439        if (!type_may_be_null(ret_type) && map_value_has_spin_lock(meta.map_ptr)) {
5440            regs[BPF_REG_0].id = ++env->id_gen;
5441        }
5442    } else if (base_type(ret_type) == RET_PTR_TO_SOCKET) {
5443        mark_reg_known_zero(env, regs, BPF_REG_0);
5444        regs[BPF_REG_0].type = PTR_TO_SOCKET | ret_flag;
5445    } else if (base_type(ret_type) == RET_PTR_TO_SOCK_COMMON) {
5446        mark_reg_known_zero(env, regs, BPF_REG_0);
5447        regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON | ret_flag;
5448    } else if (base_type(ret_type) == RET_PTR_TO_TCP_SOCK) {
5449        mark_reg_known_zero(env, regs, BPF_REG_0);
5450        regs[BPF_REG_0].type = PTR_TO_TCP_SOCK | ret_flag;
5451    } else if (base_type(ret_type) == RET_PTR_TO_ALLOC_MEM) {
5452        mark_reg_known_zero(env, regs, BPF_REG_0);
5453        regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
5454        regs[BPF_REG_0].mem_size = meta.mem_size;
5455    } else if (base_type(ret_type) == RET_PTR_TO_MEM_OR_BTF_ID) {
5456        const struct btf_type *t;
5457
5458        mark_reg_known_zero(env, regs, BPF_REG_0);
5459        t = btf_type_skip_modifiers(btf_vmlinux, meta.ret_btf_id, NULL);
5460        if (!btf_type_is_struct(t)) {
5461            u32 tsize;
5462            const struct btf_type *ret;
5463            const char *tname;
5464
5465            /* resolve the type size of ksym. */
5466            ret = btf_resolve_size(btf_vmlinux, t, &tsize);
5467            if (IS_ERR(ret)) {
5468                tname = btf_name_by_offset(btf_vmlinux, t->name_off);
5469                verbose(env, "unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret));
5470                return -EINVAL;
5471            }
5472            regs[BPF_REG_0].type = PTR_TO_MEM | ret_flag;
5473            regs[BPF_REG_0].mem_size = tsize;
5474        } else {
5475            /* MEM_RDONLY may be carried from ret_flag, but it
5476             * doesn't apply on PTR_TO_BTF_ID. Fold it, otherwise
5477             * it will confuse the check of PTR_TO_BTF_ID in
5478             * check_mem_access().
5479             */
5480            ret_flag &= ~MEM_RDONLY;
5481
5482            regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
5483            regs[BPF_REG_0].btf_id = meta.ret_btf_id;
5484        }
5485    } else if (base_type(ret_type) == RET_PTR_TO_BTF_ID) {
5486        int ret_btf_id;
5487
5488        mark_reg_known_zero(env, regs, BPF_REG_0);
5489        regs[BPF_REG_0].type = PTR_TO_BTF_ID | ret_flag;
5490        ret_btf_id = *fn->ret_btf_id;
5491        if (ret_btf_id == 0) {
5492            verbose(env, "invalid return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id);
5493            return -EINVAL;
5494        }
5495        regs[BPF_REG_0].btf_id = ret_btf_id;
5496    } else {
5497        verbose(env, "unknown return type %u of func %s#%d\n", base_type(ret_type), func_id_name(func_id), func_id);
5498        return -EINVAL;
5499    }
5500
5501    if (type_may_be_null(regs[BPF_REG_0].type)) {
5502        regs[BPF_REG_0].id = ++env->id_gen;
5503    }
5504
5505    if (is_ptr_cast_function(func_id)) {
5506        /* For release_reference() */
5507        regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
5508    } else if (is_acquire_function(func_id, meta.map_ptr)) {
5509        int id = acquire_reference_state(env, insn_idx);
5510        if (id < 0) {
5511            return id;
5512        }
5513        /* For mark_ptr_or_null_reg() */
5514        regs[BPF_REG_0].id = id;
5515        /* For release_reference() */
5516        regs[BPF_REG_0].ref_obj_id = id;
5517    }
5518
5519    do_refine_retval_range(regs, fn->ret_type, func_id, &meta);
5520
5521    err = check_map_func_compatibility(env, meta.map_ptr, func_id);
5522    if (err) {
5523        return err;
5524    }
5525
5526    if ((func_id == BPF_FUNC_get_stack || func_id == BPF_FUNC_get_task_stack) && !env->prog->has_callchain_buf) {
5527        const char *err_str;
5528
5529#ifdef CONFIG_PERF_EVENTS
5530        err = get_callchain_buffers(sysctl_perf_event_max_stack);
5531        err_str = "cannot get callchain buffer for func %s#%d\n";
5532#else
5533        err = -ENOTSUPP;
5534        err_str = "func %s#%d not supported without CONFIG_PERF_EVENTS\n";
5535#endif
5536        if (err) {
5537            verbose(env, err_str, func_id_name(func_id), func_id);
5538            return err;
5539        }
5540
5541        env->prog->has_callchain_buf = true;
5542    }
5543
5544    if (func_id == BPF_FUNC_get_stackid || func_id == BPF_FUNC_get_stack) {
5545        env->prog->call_get_stack = true;
5546    }
5547
5548    if (changes_data) {
5549        clear_all_pkt_pointers(env);
5550    }
5551    return 0;
5552}
5553
5554static bool signed_add_overflows(s64 a, s64 b)
5555{
5556    /* Do the add in u64, where overflow is well-defined */
5557    s64 res = (s64)((u64)a + (u64)b);
5558
5559    if (b < 0) {
5560        return res > a;
5561    }
5562    return res < a;
5563}
5564
5565static bool signed_add32_overflows(s32 a, s32 b)
5566{
5567    /* Do the add in u32, where overflow is well-defined */
5568    s32 res = (s32)((u32)a + (u32)b);
5569
5570    if (b < 0) {
5571        return res > a;
5572    }
5573    return res < a;
5574}
5575
5576static bool signed_sub_overflows(s64 a, s64 b)
5577{
5578    /* Do the sub in u64, where overflow is well-defined */
5579    s64 res = (s64)((u64)a - (u64)b);
5580
5581    if (b < 0) {
5582        return res < a;
5583    }
5584    return res > a;
5585}
5586
5587static bool signed_sub32_overflows(s32 a, s32 b)
5588{
5589    /* Do the sub in u32, where overflow is well-defined */
5590    s32 res = (s32)((u32)a - (u32)b);
5591
5592    if (b < 0) {
5593        return res < a;
5594    }
5595    return res > a;
5596}
5597
5598static bool check_reg_sane_offset(struct bpf_verifier_env *env, const struct bpf_reg_state *reg, enum bpf_reg_type type)
5599{
5600    bool known = tnum_is_const(reg->var_off);
5601    s64 val = reg->var_off.value;
5602    s64 smin = reg->smin_value;
5603
5604    if (known && (val >= BPF_MAX_VAR_OFF || val <= -BPF_MAX_VAR_OFF)) {
5605        verbose(env, "math between %s pointer and %lld is not allowed\n", reg_type_str(env, type), val);
5606        return false;
5607    }
5608
5609    if (reg->off >= BPF_MAX_VAR_OFF || reg->off <= -BPF_MAX_VAR_OFF) {
5610        verbose(env, "%s pointer offset %d is not allowed\n", reg_type_str(env, type), reg->off);
5611        return false;
5612    }
5613
5614    if (smin == S64_MIN) {
5615        verbose(env, "math between %s pointer and register with unbounded min value is not allowed\n",
5616                reg_type_str(env, type));
5617        return false;
5618    }
5619
5620    if (smin >= BPF_MAX_VAR_OFF || smin <= -BPF_MAX_VAR_OFF) {
5621        verbose(env, "value %lld makes %s pointer be out of bounds\n", smin, reg_type_str(env, type));
5622        return false;
5623    }
5624
5625    return true;
5626}
5627
5628static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
5629{
5630    return &env->insn_aux_data[env->insn_idx];
5631}
5632
5633enum {
5634    REASON_BOUNDS = -1,
5635    REASON_TYPE = -2,
5636    REASON_PATHS = -3,
5637    REASON_LIMIT = -4,
5638    REASON_STACK = -5,
5639};
5640
5641static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg, u32 *alu_limit, bool mask_to_left)
5642{
5643    u32 max = 0, ptr_limit = 0;
5644
5645    switch (ptr_reg->type) {
5646        case PTR_TO_STACK:
5647            /* Offset 0 is out-of-bounds, but acceptable start for the
5648             * left direction, see BPF_REG_FP. Also, unknown scalar
5649             * offset where we would need to deal with min/max bounds is
5650             * currently prohibited for unprivileged.
5651             */
5652            max = MAX_BPF_STACK + mask_to_left;
5653            ptr_limit = -(ptr_reg->var_off.value + ptr_reg->off);
5654            break;
5655        case PTR_TO_MAP_VALUE:
5656            max = ptr_reg->map_ptr->value_size;
5657            ptr_limit = (mask_to_left ? ptr_reg->smin_value : ptr_reg->umax_value) + ptr_reg->off;
5658            break;
5659        default:
5660            return REASON_TYPE;
5661    }
5662
5663    if (ptr_limit >= max) {
5664        return REASON_LIMIT;
5665    }
5666    *alu_limit = ptr_limit;
5667    return 0;
5668}
5669
5670static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env, const struct bpf_insn *insn)
5671{
5672    return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
5673}
5674
5675static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux, u32 alu_state, u32 alu_limit)
5676{
5677    /* If we arrived here from different branches with different
5678     * state or limits to sanitize, then this won't work.
5679     */
5680    if (aux->alu_state && (aux->alu_state != alu_state || aux->alu_limit != alu_limit)) {
5681        return REASON_PATHS;
5682    }
5683
5684    /* Corresponding fixup done in fixup_bpf_calls(). */
5685    aux->alu_state = alu_state;
5686    aux->alu_limit = alu_limit;
5687    return 0;
5688}
5689
5690static int sanitize_val_alu(struct bpf_verifier_env *env, struct bpf_insn *insn)
5691{
5692    struct bpf_insn_aux_data *aux = cur_aux(env);
5693
5694    if (can_skip_alu_sanitation(env, insn)) {
5695        return 0;
5696    }
5697
5698    return update_alu_sanitation_state(aux, BPF_ALU_NON_POINTER, 0);
5699}
5700
5701static bool sanitize_needed(u8 opcode)
5702{
5703    return opcode == BPF_ADD || opcode == BPF_SUB;
5704}
5705
5706struct bpf_sanitize_info {
5707    struct bpf_insn_aux_data aux;
5708    bool mask_to_left;
5709};
5710
5711static struct bpf_verifier_state *sanitize_speculative_path(struct bpf_verifier_env *env, const struct bpf_insn *insn,
5712                                                            u32 next_idx, u32 curr_idx)
5713{
5714    struct bpf_verifier_state *branch;
5715    struct bpf_reg_state *regs;
5716
5717    branch = push_stack(env, next_idx, curr_idx, true);
5718    if (branch && insn) {
5719        regs = branch->frame[branch->curframe]->regs;
5720        if (BPF_SRC(insn->code) == BPF_K) {
5721            mark_reg_unknown(env, regs, insn->dst_reg);
5722        } else if (BPF_SRC(insn->code) == BPF_X) {
5723            mark_reg_unknown(env, regs, insn->dst_reg);
5724            mark_reg_unknown(env, regs, insn->src_reg);
5725        }
5726    }
5727    return branch;
5728}
5729
5730static int sanitize_ptr_alu(struct bpf_verifier_env *env, struct bpf_insn *insn, const struct bpf_reg_state *ptr_reg,
5731                            const struct bpf_reg_state *off_reg, struct bpf_reg_state *dst_reg,
5732                            struct bpf_sanitize_info *info, const bool commit_window)
5733{
5734    struct bpf_insn_aux_data *aux = commit_window ? cur_aux(env) : &info->aux;
5735    struct bpf_verifier_state *vstate = env->cur_state;
5736    bool off_is_imm = tnum_is_const(off_reg->var_off);
5737    bool off_is_neg = off_reg->smin_value < 0;
5738    bool ptr_is_dst_reg = ptr_reg == dst_reg;
5739    u8 opcode = BPF_OP(insn->code);
5740    u32 alu_state, alu_limit;
5741    struct bpf_reg_state tmp;
5742    bool ret;
5743    int err;
5744
5745    if (can_skip_alu_sanitation(env, insn)) {
5746        return 0;
5747    }
5748
5749    /* We already marked aux for masking from non-speculative
5750     * paths, thus we got here in the first place. We only care
5751     * to explore bad access from here.
5752     */
5753    if (vstate->speculative) {
5754        goto do_sim;
5755    }
5756
5757    if (!commit_window) {
5758        if (!tnum_is_const(off_reg->var_off) && (off_reg->smin_value < 0) != (off_reg->smax_value < 0)) {
5759            return REASON_BOUNDS;
5760        }
5761
5762        info->mask_to_left = (opcode == BPF_ADD && off_is_neg) || (opcode == BPF_SUB && !off_is_neg);
5763    }
5764
5765    err = retrieve_ptr_limit(ptr_reg, &alu_limit, info->mask_to_left);
5766    if (err < 0) {
5767        return err;
5768    }
5769
5770    if (commit_window) {
5771        /* In commit phase we narrow the masking window based on
5772         * the observed pointer move after the simulated operation.
5773         */
5774        alu_state = info->aux.alu_state;
5775        alu_limit = abs(info->aux.alu_limit - alu_limit);
5776    } else {
5777        alu_state = off_is_neg ? BPF_ALU_NEG_VALUE : 0;
5778        alu_state |= off_is_imm ? BPF_ALU_IMMEDIATE : 0;
5779        alu_state |= ptr_is_dst_reg ? BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
5780
5781        /* Limit pruning on unknown scalars to enable deep search for
5782         * potential masking differences from other program paths.
5783         */
5784        if (!off_is_imm) {
5785            env->explore_alu_limits = true;
5786        }
5787    }
5788
5789    err = update_alu_sanitation_state(aux, alu_state, alu_limit);
5790    if (err < 0) {
5791        return err;
5792    }
5793do_sim:
5794    /* If we're in commit phase, we're done here given we already
5795     * pushed the truncated dst_reg into the speculative verification
5796     * stack.
5797     *
5798     * Also, when register is a known constant, we rewrite register-based
5799     * operation to immediate-based, and thus do not need masking (and as
5800     * a consequence, do not need to simulate the zero-truncation either).
5801     */
5802    if (commit_window || off_is_imm) {
5803        return 0;
5804    }
5805
5806    /* Simulate and find potential out-of-bounds access under
5807     * speculative execution from truncation as a result of
5808     * masking when off was not within expected range. If off
5809     * sits in dst, then we temporarily need to move ptr there
5810     * to simulate dst (== 0) +/-= ptr. Needed, for example,
5811     * for cases where we use K-based arithmetic in one direction
5812     * and truncated reg-based in the other in order to explore
5813     * bad access.
5814     */
5815    if (!ptr_is_dst_reg) {
5816        tmp = *dst_reg;
5817        *dst_reg = *ptr_reg;
5818    }
5819    ret = sanitize_speculative_path(env, NULL, env->insn_idx + 1, env->insn_idx);
5820    if (!ptr_is_dst_reg && ret) {
5821        *dst_reg = tmp;
5822    }
5823    return !ret ? REASON_STACK : 0;
5824}
5825
5826static void sanitize_mark_insn_seen(struct bpf_verifier_env *env)
5827{
5828    struct bpf_verifier_state *vstate = env->cur_state;
5829
5830    /* If we simulate paths under speculation, we don't update the
5831     * insn as 'seen' such that when we verify unreachable paths in
5832     * the non-speculative domain, sanitize_dead_code() can still
5833     * rewrite/sanitize them.
5834     */
5835    if (!vstate->speculative) {
5836        env->insn_aux_data[env->insn_idx].seen = env->pass_cnt;
5837    }
5838}
5839
5840static int sanitize_err(struct bpf_verifier_env *env, const struct bpf_insn *insn, int reason,
5841                        const struct bpf_reg_state *off_reg, const struct bpf_reg_state *dst_reg)
5842{
5843    static const char *err = "pointer arithmetic with it prohibited for !root";
5844    const char *op = BPF_OP(insn->code) == BPF_ADD ? "add" : "sub";
5845    u32 dst = insn->dst_reg, src = insn->src_reg;
5846
5847    switch (reason) {
5848        case REASON_BOUNDS:
5849            verbose(env, "R%d has unknown scalar with mixed signed bounds, %s\n", off_reg == dst_reg ? dst : src, err);
5850            break;
5851        case REASON_TYPE:
5852            verbose(env, "R%d has pointer with unsupported alu operation, %s\n", off_reg == dst_reg ? src : dst, err);
5853            break;
5854        case REASON_PATHS:
5855            verbose(env, "R%d tried to %s from different maps, paths or scalars, %s\n", dst, op, err);
5856            break;
5857        case REASON_LIMIT:
5858            verbose(env, "R%d tried to %s beyond pointer bounds, %s\n", dst, op, err);
5859            break;
5860        case REASON_STACK:
5861            verbose(env, "R%d could not be pushed for speculative verification, %s\n", dst, err);
5862            break;
5863        default:
5864            verbose(env, "verifier internal error: unknown reason (%d)\n", reason);
5865            break;
5866    }
5867
5868    return -EACCES;
5869}
5870
5871/* check that stack access falls within stack limits and that 'reg' doesn't
5872 * have a variable offset.
5873 *
5874 * Variable offset is prohibited for unprivileged mode for simplicity since it
5875 * requires corresponding support in Spectre masking for stack ALU.  See also
5876 * retrieve_ptr_limit().
5877 *
5878 *
5879 * 'off' includes 'reg->off'.
5880 */
5881static int check_stack_access_for_ptr_arithmetic(struct bpf_verifier_env *env, int regno,
5882                                                 const struct bpf_reg_state *reg, int off)
5883{
5884    if (!tnum_is_const(reg->var_off)) {
5885        char tn_buf[48];
5886
5887        tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
5888        verbose(env, "R%d variable stack access prohibited for !root, var_off=%s off=%d\n", regno, tn_buf, off);
5889        return -EACCES;
5890    }
5891
5892    if (off >= 0 || off < -MAX_BPF_STACK) {
5893        verbose(env,
5894                "R%d stack pointer arithmetic goes out of range, "
5895                "prohibited for !root; off=%d\n",
5896                regno, off);
5897        return -EACCES;
5898    }
5899
5900    return 0;
5901}
5902
5903static int sanitize_check_bounds(struct bpf_verifier_env *env, const struct bpf_insn *insn,
5904                                 const struct bpf_reg_state *dst_reg)
5905{
5906    u32 dst = insn->dst_reg;
5907
5908    /* For unprivileged we require that resulting offset must be in bounds
5909     * in order to be able to sanitize access later on.
5910     */
5911    if (env->bypass_spec_v1) {
5912        return 0;
5913    }
5914
5915    switch (dst_reg->type) {
5916        case PTR_TO_STACK:
5917            if (check_stack_access_for_ptr_arithmetic(env, dst, dst_reg, dst_reg->off + dst_reg->var_off.value)) {
5918                return -EACCES;
5919            }
5920            break;
5921        case PTR_TO_MAP_VALUE:
5922            if (check_map_access(env, dst, dst_reg->off, 1, false)) {
5923                verbose(env,
5924                        "R%d pointer arithmetic of map value goes out of range, "
5925                        "prohibited for !root\n",
5926                        dst);
5927                return -EACCES;
5928            }
5929            break;
5930        default:
5931            break;
5932    }
5933
5934    return 0;
5935}
5936
5937/* Handles arithmetic on a pointer and a scalar: computes new min/max and var_off.
5938 * Caller should also handle BPF_MOV case separately.
5939 * If we return -EACCES, caller may want to try again treating pointer as a
5940 * scalar.  So we only emit a diagnostic if !env->allow_ptr_leaks.
5941 */
5942static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn,
5943                                   const struct bpf_reg_state *ptr_reg, const struct bpf_reg_state *off_reg)
5944{
5945    struct bpf_verifier_state *vstate = env->cur_state;
5946    struct bpf_func_state *state = vstate->frame[vstate->curframe];
5947    struct bpf_reg_state *regs = state->regs, *dst_reg;
5948    bool known = tnum_is_const(off_reg->var_off);
5949    s64 smin_val = off_reg->smin_value, smax_val = off_reg->smax_value, smin_ptr = ptr_reg->smin_value,
5950        smax_ptr = ptr_reg->smax_value;
5951    u64 umin_val = off_reg->umin_value, umax_val = off_reg->umax_value, umin_ptr = ptr_reg->umin_value,
5952        umax_ptr = ptr_reg->umax_value;
5953    struct bpf_sanitize_info info = {};
5954    u8 opcode = BPF_OP(insn->code);
5955    u32 dst = insn->dst_reg;
5956    int ret;
5957
5958    dst_reg = &regs[dst];
5959
5960    if ((known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val || umin_val > umax_val) {
5961        /* Taint dst register if offset had invalid bounds derived from
5962         * e.g. dead branches.
5963         */
5964        __mark_reg_unknown(env, dst_reg);
5965        return 0;
5966    }
5967
5968    if (BPF_CLASS(insn->code) != BPF_ALU64) {
5969        /* 32-bit ALU ops on pointers produce (meaningless) scalars */
5970        if (opcode == BPF_SUB && env->allow_ptr_leaks) {
5971            __mark_reg_unknown(env, dst_reg);
5972            return 0;
5973        }
5974
5975        verbose(env, "R%d 32-bit pointer arithmetic prohibited\n", dst);
5976        return -EACCES;
5977    }
5978
5979    if (ptr_reg->type & PTR_MAYBE_NULL) {
5980        verbose(env, "R%d pointer arithmetic on %s prohibited, null-check it first\n", dst,
5981                reg_type_str(env, ptr_reg->type));
5982        return -EACCES;
5983    }
5984
5985    switch (base_type(ptr_reg->type)) {
5986        case CONST_PTR_TO_MAP:
5987            /* smin_val represents the known value */
5988            if (known && smin_val == 0 && opcode == BPF_ADD) {
5989                break;
5990            }
5991            fallthrough;
5992        case PTR_TO_PACKET_END:
5993        case PTR_TO_SOCKET:
5994        case PTR_TO_SOCK_COMMON:
5995        case PTR_TO_TCP_SOCK:
5996        case PTR_TO_XDP_SOCK:
5997            reject:
5998            verbose(env, "R%d pointer arithmetic on %s prohibited\n", dst, reg_type_str(env, ptr_reg->type));
5999            return -EACCES;
6000        default:
6001            if (type_may_be_null(ptr_reg->type)) {
6002                goto reject;
6003            }
6004            break;
6005    }
6006
6007    /* In case of 'scalar += pointer', dst_reg inherits pointer type and id.
6008     * The id may be overwritten later if we create a new variable offset.
6009     */
6010    dst_reg->type = ptr_reg->type;
6011    dst_reg->id = ptr_reg->id;
6012
6013    if (!check_reg_sane_offset(env, off_reg, ptr_reg->type) || !check_reg_sane_offset(env, ptr_reg, ptr_reg->type)) {
6014        return -EINVAL;
6015    }
6016
6017    /* pointer types do not carry 32-bit bounds at the moment. */
6018    verifier_mark_reg32_unbounded(dst_reg);
6019
6020    if (sanitize_needed(opcode)) {
6021        ret = sanitize_ptr_alu(env, insn, ptr_reg, off_reg, dst_reg, &info, false);
6022        if (ret < 0) {
6023            return sanitize_err(env, insn, ret, off_reg, dst_reg);
6024        }
6025    }
6026
6027    switch (opcode) {
6028        case BPF_ADD:
6029            /* We can take a fixed offset as long as it doesn't overflow
6030             * the s32 'off' field
6031             */
6032            if (known && (ptr_reg->off + smin_val == (s64)(s32)(ptr_reg->off + smin_val))) {
6033                /* pointer += K.  Accumulate it into fixed offset */
6034                dst_reg->smin_value = smin_ptr;
6035                dst_reg->smax_value = smax_ptr;
6036                dst_reg->umin_value = umin_ptr;
6037                dst_reg->umax_value = umax_ptr;
6038                dst_reg->var_off = ptr_reg->var_off;
6039                dst_reg->off = ptr_reg->off + smin_val;
6040                dst_reg->raw = ptr_reg->raw;
6041                break;
6042            }
6043            /* A new variable offset is created.  Note that off_reg->off
6044             * == 0, since it's a scalar.
6045             * dst_reg gets the pointer type and since some positive
6046             * integer value was added to the pointer, give it a new 'id'
6047             * if it's a PTR_TO_PACKET.
6048             * this creates a new 'base' pointer, off_reg (variable) gets
6049             * added into the variable offset, and we copy the fixed offset
6050             * from ptr_reg.
6051             */
6052            if (signed_add_overflows(smin_ptr, smin_val) || signed_add_overflows(smax_ptr, smax_val)) {
6053                dst_reg->smin_value = S64_MIN;
6054                dst_reg->smax_value = S64_MAX;
6055            } else {
6056                dst_reg->smin_value = smin_ptr + smin_val;
6057                dst_reg->smax_value = smax_ptr + smax_val;
6058            }
6059            if (umin_ptr + umin_val < umin_ptr || umax_ptr + umax_val < umax_ptr) {
6060                dst_reg->umin_value = 0;
6061                dst_reg->umax_value = U64_MAX;
6062            } else {
6063                dst_reg->umin_value = umin_ptr + umin_val;
6064                dst_reg->umax_value = umax_ptr + umax_val;
6065            }
6066            dst_reg->var_off = tnum_add(ptr_reg->var_off, off_reg->var_off);
6067            dst_reg->off = ptr_reg->off;
6068            dst_reg->raw = ptr_reg->raw;
6069            if (reg_is_pkt_pointer(ptr_reg)) {
6070                dst_reg->id = ++env->id_gen;
6071                /* something was added to pkt_ptr, set range to zero */
6072                dst_reg->raw = 0;
6073            }
6074            break;
6075        case BPF_SUB:
6076            if (dst_reg == off_reg) {
6077                /* scalar -= pointer.  Creates an unknown scalar */
6078                verbose(env, "R%d tried to subtract pointer from scalar\n", dst);
6079                return -EACCES;
6080            }
6081            /* We don't allow subtraction from FP, because (according to
6082             * test_verifier.c test "invalid fp arithmetic", JITs might not
6083             * be able to deal with it.
6084             */
6085            if (ptr_reg->type == PTR_TO_STACK) {
6086                verbose(env, "R%d subtraction from stack pointer prohibited\n", dst);
6087                return -EACCES;
6088            }
6089            if (known && (ptr_reg->off - smin_val == (s64)(s32)(ptr_reg->off - smin_val))) {
6090                /* pointer -= K.  Subtract it from fixed offset */
6091                dst_reg->smin_value = smin_ptr;
6092                dst_reg->smax_value = smax_ptr;
6093                dst_reg->umin_value = umin_ptr;
6094                dst_reg->umax_value = umax_ptr;
6095                dst_reg->var_off = ptr_reg->var_off;
6096                dst_reg->id = ptr_reg->id;
6097                dst_reg->off = ptr_reg->off - smin_val;
6098                dst_reg->raw = ptr_reg->raw;
6099                break;
6100            }
6101            /* A new variable offset is created.  If the subtrahend is known
6102             * nonnegative, then any reg->range we had before is still good.
6103             */
6104            if (signed_sub_overflows(smin_ptr, smax_val) || signed_sub_overflows(smax_ptr, smin_val)) {
6105                /* Overflow possible, we know nothing */
6106                dst_reg->smin_value = S64_MIN;
6107                dst_reg->smax_value = S64_MAX;
6108            } else {
6109                dst_reg->smin_value = smin_ptr - smax_val;
6110                dst_reg->smax_value = smax_ptr - smin_val;
6111            }
6112            if (umin_ptr < umax_val) {
6113                /* Overflow possible, we know nothing */
6114                dst_reg->umin_value = 0;
6115                dst_reg->umax_value = U64_MAX;
6116            } else {
6117                /* Cannot overflow (as long as bounds are consistent) */
6118                dst_reg->umin_value = umin_ptr - umax_val;
6119                dst_reg->umax_value = umax_ptr - umin_val;
6120            }
6121            dst_reg->var_off = tnum_sub(ptr_reg->var_off, off_reg->var_off);
6122            dst_reg->off = ptr_reg->off;
6123            dst_reg->raw = ptr_reg->raw;
6124            if (reg_is_pkt_pointer(ptr_reg)) {
6125                dst_reg->id = ++env->id_gen;
6126                /* something was added to pkt_ptr, set range to zero */
6127                if (smin_val < 0) {
6128                    dst_reg->raw = 0;
6129                }
6130            }
6131            break;
6132        case BPF_AND:
6133        case BPF_OR:
6134        case BPF_XOR:
6135            /* bitwise ops on pointers are troublesome, prohibit. */
6136            verbose(env, "R%d bitwise operator %s on pointer prohibited\n", dst, bpf_alu_string[opcode >> 0x4]);
6137            return -EACCES;
6138        default:
6139            /* other operators (e.g. MUL,LSH) produce non-pointer results */
6140            verbose(env, "R%d pointer arithmetic with %s operator prohibited\n", dst, bpf_alu_string[opcode >> 0x4]);
6141            return -EACCES;
6142    }
6143
6144    if (!check_reg_sane_offset(env, dst_reg, ptr_reg->type)) {
6145        return -EINVAL;
6146    }
6147
6148    reg_bounds_sync(dst_reg);
6149
6150    if (sanitize_check_bounds(env, insn, dst_reg) < 0) {
6151        return -EACCES;
6152    }
6153    if (sanitize_needed(opcode)) {
6154        ret = sanitize_ptr_alu(env, insn, dst_reg, off_reg, dst_reg, &info, true);
6155        if (ret < 0) {
6156            return sanitize_err(env, insn, ret, off_reg, dst_reg);
6157        }
6158    }
6159
6160    return 0;
6161}
6162
6163static void scalar32_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6164{
6165    s32 smin_val = src_reg->s32_min_value;
6166    s32 smax_val = src_reg->s32_max_value;
6167    u32 umin_val = src_reg->u32_min_value;
6168    u32 umax_val = src_reg->u32_max_value;
6169
6170    if (signed_add32_overflows(dst_reg->s32_min_value, smin_val) ||
6171        signed_add32_overflows(dst_reg->s32_max_value, smax_val)) {
6172        dst_reg->s32_min_value = S32_MIN;
6173        dst_reg->s32_max_value = S32_MAX;
6174    } else {
6175        dst_reg->s32_min_value += smin_val;
6176        dst_reg->s32_max_value += smax_val;
6177    }
6178    if (dst_reg->u32_min_value + umin_val < umin_val || dst_reg->u32_max_value + umax_val < umax_val) {
6179        dst_reg->u32_min_value = 0;
6180        dst_reg->u32_max_value = U32_MAX;
6181    } else {
6182        dst_reg->u32_min_value += umin_val;
6183        dst_reg->u32_max_value += umax_val;
6184    }
6185}
6186
6187static void scalar_min_max_add(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6188{
6189    s64 smin_val = src_reg->smin_value;
6190    s64 smax_val = src_reg->smax_value;
6191    u64 umin_val = src_reg->umin_value;
6192    u64 umax_val = src_reg->umax_value;
6193
6194    if (signed_add_overflows(dst_reg->smin_value, smin_val) || signed_add_overflows(dst_reg->smax_value, smax_val)) {
6195        dst_reg->smin_value = S64_MIN;
6196        dst_reg->smax_value = S64_MAX;
6197    } else {
6198        dst_reg->smin_value += smin_val;
6199        dst_reg->smax_value += smax_val;
6200    }
6201    if (dst_reg->umin_value + umin_val < umin_val || dst_reg->umax_value + umax_val < umax_val) {
6202        dst_reg->umin_value = 0;
6203        dst_reg->umax_value = U64_MAX;
6204    } else {
6205        dst_reg->umin_value += umin_val;
6206        dst_reg->umax_value += umax_val;
6207    }
6208}
6209
6210static void scalar32_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6211{
6212    s32 smin_val = src_reg->s32_min_value;
6213    s32 smax_val = src_reg->s32_max_value;
6214    u32 umin_val = src_reg->u32_min_value;
6215    u32 umax_val = src_reg->u32_max_value;
6216
6217    if (signed_sub32_overflows(dst_reg->s32_min_value, smax_val) ||
6218        signed_sub32_overflows(dst_reg->s32_max_value, smin_val)) {
6219        /* Overflow possible, we know nothing */
6220        dst_reg->s32_min_value = S32_MIN;
6221        dst_reg->s32_max_value = S32_MAX;
6222    } else {
6223        dst_reg->s32_min_value -= smax_val;
6224        dst_reg->s32_max_value -= smin_val;
6225    }
6226    if (dst_reg->u32_min_value < umax_val) {
6227        /* Overflow possible, we know nothing */
6228        dst_reg->u32_min_value = 0;
6229        dst_reg->u32_max_value = U32_MAX;
6230    } else {
6231        /* Cannot overflow (as long as bounds are consistent) */
6232        dst_reg->u32_min_value -= umax_val;
6233        dst_reg->u32_max_value -= umin_val;
6234    }
6235}
6236
6237static void scalar_min_max_sub(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6238{
6239    s64 smin_val = src_reg->smin_value;
6240    s64 smax_val = src_reg->smax_value;
6241    u64 umin_val = src_reg->umin_value;
6242    u64 umax_val = src_reg->umax_value;
6243
6244    if (signed_sub_overflows(dst_reg->smin_value, smax_val) || signed_sub_overflows(dst_reg->smax_value, smin_val)) {
6245        /* Overflow possible, we know nothing */
6246        dst_reg->smin_value = S64_MIN;
6247        dst_reg->smax_value = S64_MAX;
6248    } else {
6249        dst_reg->smin_value -= smax_val;
6250        dst_reg->smax_value -= smin_val;
6251    }
6252    if (dst_reg->umin_value < umax_val) {
6253        /* Overflow possible, we know nothing */
6254        dst_reg->umin_value = 0;
6255        dst_reg->umax_value = U64_MAX;
6256    } else {
6257        /* Cannot overflow (as long as bounds are consistent) */
6258        dst_reg->umin_value -= umax_val;
6259        dst_reg->umax_value -= umin_val;
6260    }
6261}
6262
6263static void scalar32_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6264{
6265    s32 smin_val = src_reg->s32_min_value;
6266    u32 umin_val = src_reg->u32_min_value;
6267    u32 umax_val = src_reg->u32_max_value;
6268
6269    if (smin_val < 0 || dst_reg->s32_min_value < 0) {
6270        /* Ain't nobody got time to multiply that sign */
6271        verifier_mark_reg32_unbounded(dst_reg);
6272        return;
6273    }
6274    /* Both values are positive, so we can work with unsigned and
6275     * copy the result to signed (unless it exceeds S32_MAX).
6276     */
6277    if (umax_val > U16_MAX || dst_reg->u32_max_value > U16_MAX) {
6278        /* Potential overflow, we know nothing */
6279        verifier_mark_reg32_unbounded(dst_reg);
6280        return;
6281    }
6282    dst_reg->u32_min_value *= umin_val;
6283    dst_reg->u32_max_value *= umax_val;
6284    if (dst_reg->u32_max_value > S32_MAX) {
6285        /* Overflow possible, we know nothing */
6286        dst_reg->s32_min_value = S32_MIN;
6287        dst_reg->s32_max_value = S32_MAX;
6288    } else {
6289        dst_reg->s32_min_value = dst_reg->u32_min_value;
6290        dst_reg->s32_max_value = dst_reg->u32_max_value;
6291    }
6292}
6293
6294static void scalar_min_max_mul(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6295{
6296    s64 smin_val = src_reg->smin_value;
6297    u64 umin_val = src_reg->umin_value;
6298    u64 umax_val = src_reg->umax_value;
6299
6300    if (smin_val < 0 || dst_reg->smin_value < 0) {
6301        /* Ain't nobody got time to multiply that sign */
6302        verifier_mark_reg64_unbounded(dst_reg);
6303        return;
6304    }
6305    /* Both values are positive, so we can work with unsigned and
6306     * copy the result to signed (unless it exceeds S64_MAX).
6307     */
6308    if (umax_val > U32_MAX || dst_reg->umax_value > U32_MAX) {
6309        /* Potential overflow, we know nothing */
6310        verifier_mark_reg64_unbounded(dst_reg);
6311        return;
6312    }
6313    dst_reg->umin_value *= umin_val;
6314    dst_reg->umax_value *= umax_val;
6315    if (dst_reg->umax_value > S64_MAX) {
6316        /* Overflow possible, we know nothing */
6317        dst_reg->smin_value = S64_MIN;
6318        dst_reg->smax_value = S64_MAX;
6319    } else {
6320        dst_reg->smin_value = dst_reg->umin_value;
6321        dst_reg->smax_value = dst_reg->umax_value;
6322    }
6323}
6324
6325static void scalar32_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6326{
6327    bool src_known = tnum_subreg_is_const(src_reg->var_off);
6328    bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6329    struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6330    s32 smin_val = src_reg->s32_min_value;
6331    u32 umax_val = src_reg->u32_max_value;
6332
6333    if (src_known && dst_known) {
6334        verifier_mark_reg32_known(dst_reg, var32_off.value);
6335        return;
6336    }
6337
6338    /* We get our minimum from the var_off, since that's inherently
6339     * bitwise.  Our maximum is the minimum of the operands' maxima.
6340     */
6341    dst_reg->u32_min_value = var32_off.value;
6342    dst_reg->u32_max_value = min(dst_reg->u32_max_value, umax_val);
6343    if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6344        /* Lose signed bounds when ANDing negative numbers,
6345         * ain't nobody got time for that.
6346         */
6347        dst_reg->s32_min_value = S32_MIN;
6348        dst_reg->s32_max_value = S32_MAX;
6349    } else {
6350        /* ANDing two positives gives a positive, so safe to
6351         * cast result into s64.
6352         */
6353        dst_reg->s32_min_value = dst_reg->u32_min_value;
6354        dst_reg->s32_max_value = dst_reg->u32_max_value;
6355    }
6356}
6357
6358static void scalar_min_max_and(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6359{
6360    bool src_known = tnum_is_const(src_reg->var_off);
6361    bool dst_known = tnum_is_const(dst_reg->var_off);
6362    s64 smin_val = src_reg->smin_value;
6363    u64 umax_val = src_reg->umax_value;
6364
6365    if (src_known && dst_known) {
6366        verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6367        return;
6368    }
6369
6370    /* We get our minimum from the var_off, since that's inherently
6371     * bitwise.  Our maximum is the minimum of the operands' maxima.
6372     */
6373    dst_reg->umin_value = dst_reg->var_off.value;
6374    dst_reg->umax_value = min(dst_reg->umax_value, umax_val);
6375    if (dst_reg->smin_value < 0 || smin_val < 0) {
6376        /* Lose signed bounds when ANDing negative numbers,
6377         * ain't nobody got time for that.
6378         */
6379        dst_reg->smin_value = S64_MIN;
6380        dst_reg->smax_value = S64_MAX;
6381    } else {
6382        /* ANDing two positives gives a positive, so safe to
6383         * cast result into s64.
6384         */
6385        dst_reg->smin_value = dst_reg->umin_value;
6386        dst_reg->smax_value = dst_reg->umax_value;
6387    }
6388    /* We may learn something more from the var_off */
6389    verifier_update_reg_bounds(dst_reg);
6390}
6391
6392static void scalar32_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6393{
6394    bool src_known = tnum_subreg_is_const(src_reg->var_off);
6395    bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6396    struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6397    s32 smin_val = src_reg->s32_min_value;
6398    u32 umin_val = src_reg->u32_min_value;
6399
6400    if (src_known && dst_known) {
6401        verifier_mark_reg32_known(dst_reg, var32_off.value);
6402        return;
6403    }
6404
6405    /* We get our maximum from the var_off, and our minimum is the
6406     * maximum of the operands' minima
6407     */
6408    dst_reg->u32_min_value = max(dst_reg->u32_min_value, umin_val);
6409    dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6410    if (dst_reg->s32_min_value < 0 || smin_val < 0) {
6411        /* Lose signed bounds when ORing negative numbers,
6412         * ain't nobody got time for that.
6413         */
6414        dst_reg->s32_min_value = S32_MIN;
6415        dst_reg->s32_max_value = S32_MAX;
6416    } else {
6417        /* ORing two positives gives a positive, so safe to
6418         * cast result into s64.
6419         */
6420        dst_reg->s32_min_value = dst_reg->u32_min_value;
6421        dst_reg->s32_max_value = dst_reg->u32_max_value;
6422    }
6423}
6424
6425static void scalar_min_max_or(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6426{
6427    bool src_known = tnum_is_const(src_reg->var_off);
6428    bool dst_known = tnum_is_const(dst_reg->var_off);
6429    s64 smin_val = src_reg->smin_value;
6430    u64 umin_val = src_reg->umin_value;
6431
6432    if (src_known && dst_known) {
6433        verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6434        return;
6435    }
6436
6437    /* We get our maximum from the var_off, and our minimum is the
6438     * maximum of the operands' minima
6439     */
6440    dst_reg->umin_value = max(dst_reg->umin_value, umin_val);
6441    dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6442    if (dst_reg->smin_value < 0 || smin_val < 0) {
6443        /* Lose signed bounds when ORing negative numbers,
6444         * ain't nobody got time for that.
6445         */
6446        dst_reg->smin_value = S64_MIN;
6447        dst_reg->smax_value = S64_MAX;
6448    } else {
6449        /* ORing two positives gives a positive, so safe to
6450         * cast result into s64.
6451         */
6452        dst_reg->smin_value = dst_reg->umin_value;
6453        dst_reg->smax_value = dst_reg->umax_value;
6454    }
6455    /* We may learn something more from the var_off */
6456    verifier_update_reg_bounds(dst_reg);
6457}
6458
6459static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6460{
6461    bool src_known = tnum_subreg_is_const(src_reg->var_off);
6462    bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
6463    struct tnum var32_off = tnum_subreg(dst_reg->var_off);
6464    s32 smin_val = src_reg->s32_min_value;
6465
6466    if (src_known && dst_known) {
6467        verifier_mark_reg32_known(dst_reg, var32_off.value);
6468        return;
6469    }
6470
6471    /* We get both minimum and maximum from the var32_off. */
6472    dst_reg->u32_min_value = var32_off.value;
6473    dst_reg->u32_max_value = var32_off.value | var32_off.mask;
6474
6475    if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
6476        /* XORing two positive sign numbers gives a positive,
6477         * so safe to cast u32 result into s32.
6478         */
6479        dst_reg->s32_min_value = dst_reg->u32_min_value;
6480        dst_reg->s32_max_value = dst_reg->u32_max_value;
6481    } else {
6482        dst_reg->s32_min_value = S32_MIN;
6483        dst_reg->s32_max_value = S32_MAX;
6484    }
6485}
6486
6487static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6488{
6489    bool src_known = tnum_is_const(src_reg->var_off);
6490    bool dst_known = tnum_is_const(dst_reg->var_off);
6491    s64 smin_val = src_reg->smin_value;
6492
6493    if (src_known && dst_known) {
6494        /* dst_reg->var_off.value has been updated earlier */
6495        verifier_mark_reg_known(dst_reg, dst_reg->var_off.value);
6496        return;
6497    }
6498
6499    /* We get both minimum and maximum from the var_off. */
6500    dst_reg->umin_value = dst_reg->var_off.value;
6501    dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
6502
6503    if (dst_reg->smin_value >= 0 && smin_val >= 0) {
6504        /* XORing two positive sign numbers gives a positive,
6505         * so safe to cast u64 result into s64.
6506         */
6507        dst_reg->smin_value = dst_reg->umin_value;
6508        dst_reg->smax_value = dst_reg->umax_value;
6509    } else {
6510        dst_reg->smin_value = S64_MIN;
6511        dst_reg->smax_value = S64_MAX;
6512    }
6513
6514    verifier_update_reg_bounds(dst_reg);
6515}
6516
6517static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)
6518{
6519    /* We lose all sign bit information (except what we can pick
6520     * up from var_off)
6521     */
6522    dst_reg->s32_min_value = S32_MIN;
6523    dst_reg->s32_max_value = S32_MAX;
6524    /* If we might shift our top bit out, then we know nothing */
6525    if (umax_val > VERIFIER_THIRTYONE || dst_reg->u32_max_value > 1ULL << (VERIFIER_THIRTYONE - umax_val)) {
6526        dst_reg->u32_min_value = 0;
6527        dst_reg->u32_max_value = U32_MAX;
6528    } else {
6529        dst_reg->u32_min_value <<= umin_val;
6530        dst_reg->u32_max_value <<= umax_val;
6531    }
6532}
6533
6534static void scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6535{
6536    u32 umax_val = src_reg->u32_max_value;
6537    u32 umin_val = src_reg->u32_min_value;
6538    /* u32 alu operation will zext upper bits */
6539    struct tnum subreg = tnum_subreg(dst_reg->var_off);
6540
6541    __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6542    dst_reg->var_off = tnum_subreg(tnum_lshift(subreg, umin_val));
6543    /* Not required but being careful mark reg64 bounds as unknown so
6544     * that we are forced to pick them up from tnum and zext later and
6545     * if some path skips this step we are still safe.
6546     */
6547    verifier_mark_reg64_unbounded(dst_reg);
6548    verifier_update_reg32_bounds(dst_reg);
6549}
6550
6551static void __scalar64_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val)
6552{
6553    /* Special case <<32 because it is a common compiler pattern to sign
6554     * extend subreg by doing <<32 s>>32. In this case if 32bit bounds are
6555     * positive we know this shift will also be positive so we can track
6556     * bounds correctly. Otherwise we lose all sign bit information except
6557     * what we can pick up from var_off. Perhaps we can generalize this
6558     * later to shifts of any length.
6559     */
6560    if (umin_val == 0x20 && umax_val == 0x20 && dst_reg->s32_max_value >= 0) {
6561        dst_reg->smax_value = (s64)dst_reg->s32_max_value << 0x20;
6562    } else {
6563        dst_reg->smax_value = S64_MAX;
6564    }
6565
6566    if (umin_val == 0x20 && umax_val == 0x20 && dst_reg->s32_min_value >= 0) {
6567        dst_reg->smin_value = (s64)dst_reg->s32_min_value << 0x20;
6568    } else {
6569        dst_reg->smin_value = S64_MIN;
6570    }
6571
6572    /* If we might shift our top bit out, then we know nothing */
6573    if (dst_reg->umax_value > 1ULL << (0x3f - umax_val)) {
6574        dst_reg->umin_value = 0;
6575        dst_reg->umax_value = U64_MAX;
6576    } else {
6577        dst_reg->umin_value <<= umin_val;
6578        dst_reg->umax_value <<= umax_val;
6579    }
6580}
6581
6582static void scalar_min_max_lsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6583{
6584    u64 umax_val = src_reg->umax_value;
6585    u64 umin_val = src_reg->umin_value;
6586
6587    /* scalar64 calc uses 32bit unshifted bounds so must be called first */
6588    __scalar64_min_max_lsh(dst_reg, umin_val, umax_val);
6589    __scalar32_min_max_lsh(dst_reg, umin_val, umax_val);
6590
6591    dst_reg->var_off = tnum_lshift(dst_reg->var_off, umin_val);
6592    /* We may learn something more from the var_off */
6593    verifier_update_reg_bounds(dst_reg);
6594}
6595
6596static void scalar32_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6597{
6598    struct tnum subreg = tnum_subreg(dst_reg->var_off);
6599    u32 umax_val = src_reg->u32_max_value;
6600    u32 umin_val = src_reg->u32_min_value;
6601
6602    /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6603     * be negative, then either:
6604     * 1) src_reg might be zero, so the sign bit of the result is
6605     *    unknown, so we lose our signed bounds
6606     * 2) it's known negative, thus the unsigned bounds capture the
6607     *    signed bounds
6608     * 3) the signed bounds cross zero, so they tell us nothing
6609     *    about the result
6610     * If the value in dst_reg is known nonnegative, then again the
6611     * unsigned bounts capture the signed bounds.
6612     * Thus, in all cases it suffices to blow away our signed bounds
6613     * and rely on inferring new ones from the unsigned bounds and
6614     * var_off of the result.
6615     */
6616    dst_reg->s32_min_value = S32_MIN;
6617    dst_reg->s32_max_value = S32_MAX;
6618
6619    dst_reg->var_off = tnum_rshift(subreg, umin_val);
6620    dst_reg->u32_min_value >>= umax_val;
6621    dst_reg->u32_max_value >>= umin_val;
6622
6623    verifier_mark_reg64_unbounded(dst_reg);
6624    verifier_update_reg32_bounds(dst_reg);
6625}
6626
6627static void scalar_min_max_rsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6628{
6629    u64 umax_val = src_reg->umax_value;
6630    u64 umin_val = src_reg->umin_value;
6631
6632    /* BPF_RSH is an unsigned shift.  If the value in dst_reg might
6633     * be negative, then either:
6634     * 1) src_reg might be zero, so the sign bit of the result is
6635     *    unknown, so we lose our signed bounds
6636     * 2) it's known negative, thus the unsigned bounds capture the
6637     *    signed bounds
6638     * 3) the signed bounds cross zero, so they tell us nothing
6639     *    about the result
6640     * If the value in dst_reg is known nonnegative, then again the
6641     * unsigned bounts capture the signed bounds.
6642     * Thus, in all cases it suffices to blow away our signed bounds
6643     * and rely on inferring new ones from the unsigned bounds and
6644     * var_off of the result.
6645     */
6646    dst_reg->smin_value = S64_MIN;
6647    dst_reg->smax_value = S64_MAX;
6648    dst_reg->var_off = tnum_rshift(dst_reg->var_off, umin_val);
6649    dst_reg->umin_value >>= umax_val;
6650    dst_reg->umax_value >>= umin_val;
6651
6652    /* Its not easy to operate on alu32 bounds here because it depends
6653     * on bits being shifted in. Take easy way out and mark unbounded
6654     * so we can recalculate later from tnum.
6655     */
6656    verifier_mark_reg32_unbounded(dst_reg);
6657    verifier_update_reg_bounds(dst_reg);
6658}
6659
6660static void scalar32_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6661{
6662    u64 umin_val = src_reg->u32_min_value;
6663
6664    /* Upon reaching here, src_known is true and
6665     * umax_val is equal to umin_val.
6666     */
6667    dst_reg->s32_min_value = (u32)(((s32)dst_reg->s32_min_value) >> umin_val);
6668    dst_reg->s32_max_value = (u32)(((s32)dst_reg->s32_max_value) >> umin_val);
6669
6670    dst_reg->var_off = tnum_arshift(tnum_subreg(dst_reg->var_off), umin_val, 0x20);
6671
6672    /* blow away the dst_reg umin_value/umax_value and rely on
6673     * dst_reg var_off to refine the result.
6674     */
6675    dst_reg->u32_min_value = 0;
6676    dst_reg->u32_max_value = U32_MAX;
6677
6678    verifier_mark_reg64_unbounded(dst_reg);
6679    verifier_update_reg32_bounds(dst_reg);
6680}
6681
6682static void scalar_min_max_arsh(struct bpf_reg_state *dst_reg, struct bpf_reg_state *src_reg)
6683{
6684    u64 umin_val = src_reg->umin_value;
6685
6686    /* Upon reaching here, src_known is true and umax_val is equal
6687     * to umin_val.
6688     */
6689    dst_reg->smin_value >>= umin_val;
6690    dst_reg->smax_value >>= umin_val;
6691
6692    dst_reg->var_off = tnum_arshift(dst_reg->var_off, umin_val, 0x40);
6693
6694    /* blow away the dst_reg umin_value/umax_value and rely on
6695     * dst_reg var_off to refine the result.
6696     */
6697    dst_reg->umin_value = 0;
6698    dst_reg->umax_value = U64_MAX;
6699
6700    /* Its not easy to operate on alu32 bounds here because it depends
6701     * on bits being shifted in from upper 32-bits. Take easy way out
6702     * and mark unbounded so we can recalculate later from tnum.
6703     */
6704    verifier_mark_reg32_unbounded(dst_reg);
6705    verifier_update_reg_bounds(dst_reg);
6706}
6707
6708/* WARNING: This function does calculations on 64-bit values, but the actual
6709 * execution may occur on 32-bit values. Therefore, things like bitshifts
6710 * need extra checks in the 32-bit case.
6711 */
6712static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn,
6713                                      struct bpf_reg_state *dst_reg, struct bpf_reg_state src_reg)
6714{
6715    struct bpf_reg_state *regs = cur_regs(env);
6716    u8 opcode = BPF_OP(insn->code);
6717    bool src_known;
6718    s64 smin_val, smax_val;
6719    u64 umin_val, umax_val;
6720    s32 s32_min_val, s32_max_val;
6721    u32 u32_min_val, u32_max_val;
6722    u64 insn_bitness = (BPF_CLASS(insn->code) == BPF_ALU64) ? VERIFIER_SIXTYFOUR : VERIFIER_THIRTYTWO;
6723    bool alu32 = (BPF_CLASS(insn->code) != BPF_ALU64);
6724    int ret;
6725
6726    smin_val = src_reg.smin_value;
6727    smax_val = src_reg.smax_value;
6728    umin_val = src_reg.umin_value;
6729    umax_val = src_reg.umax_value;
6730
6731    s32_min_val = src_reg.s32_min_value;
6732    s32_max_val = src_reg.s32_max_value;
6733    u32_min_val = src_reg.u32_min_value;
6734    u32_max_val = src_reg.u32_max_value;
6735
6736    if (alu32) {
6737        src_known = tnum_subreg_is_const(src_reg.var_off);
6738        if ((src_known && (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) || s32_min_val > s32_max_val ||
6739            u32_min_val > u32_max_val) {
6740            /* Taint dst register if offset had invalid bounds
6741             * derived from e.g. dead branches.
6742             */
6743            __mark_reg_unknown(env, dst_reg);
6744            return 0;
6745        }
6746    } else {
6747        src_known = tnum_is_const(src_reg.var_off);
6748        if ((src_known && (smin_val != smax_val || umin_val != umax_val)) || smin_val > smax_val ||
6749            umin_val > umax_val) {
6750            /* Taint dst register if offset had invalid bounds
6751             * derived from e.g. dead branches.
6752             */
6753            __mark_reg_unknown(env, dst_reg);
6754            return 0;
6755        }
6756    }
6757
6758    if (!src_known && opcode != BPF_ADD && opcode != BPF_SUB && opcode != BPF_AND) {
6759        __mark_reg_unknown(env, dst_reg);
6760        return 0;
6761    }
6762
6763    if (sanitize_needed(opcode)) {
6764        ret = sanitize_val_alu(env, insn);
6765        if (ret < 0) {
6766            return sanitize_err(env, insn, ret, NULL, NULL);
6767        }
6768    }
6769
6770    /* Calculate sign/unsigned bounds and tnum for alu32 and alu64 bit ops.
6771     * There are two classes of instructions: The first class we track both
6772     * alu32 and alu64 sign/unsigned bounds independently this provides the
6773     * greatest amount of precision when alu operations are mixed with jmp32
6774     * operations. These operations are BPF_ADD, BPF_SUB, BPF_MUL, BPF_ADD,
6775     * and BPF_OR. This is possible because these ops have fairly easy to
6776     * understand and calculate behavior in both 32-bit and 64-bit alu ops.
6777     * See alu32 verifier tests for examples. The second class of
6778     * operations, BPF_LSH, BPF_RSH, and BPF_ARSH, however are not so easy
6779     * with regards to tracking sign/unsigned bounds because the bits may
6780     * cross subreg boundaries in the alu64 case. When this happens we mark
6781     * the reg unbounded in the subreg bound space and use the resulting
6782     * tnum to calculate an approximation of the sign/unsigned bounds.
6783     */
6784    switch (opcode) {
6785        case BPF_ADD:
6786            scalar32_min_max_add(dst_reg, &src_reg);
6787            scalar_min_max_add(dst_reg, &src_reg);
6788            dst_reg->var_off = tnum_add(dst_reg->var_off, src_reg.var_off);
6789            break;
6790        case BPF_SUB:
6791            scalar32_min_max_sub(dst_reg, &src_reg);
6792            scalar_min_max_sub(dst_reg, &src_reg);
6793            dst_reg->var_off = tnum_sub(dst_reg->var_off, src_reg.var_off);
6794            break;
6795        case BPF_MUL:
6796            dst_reg->var_off = tnum_mul(dst_reg->var_off, src_reg.var_off);
6797            scalar32_min_max_mul(dst_reg, &src_reg);
6798            scalar_min_max_mul(dst_reg, &src_reg);
6799            break;
6800        case BPF_AND:
6801            dst_reg->var_off = tnum_and(dst_reg->var_off, src_reg.var_off);
6802            scalar32_min_max_and(dst_reg, &src_reg);
6803            scalar_min_max_and(dst_reg, &src_reg);
6804            break;
6805        case BPF_OR:
6806            dst_reg->var_off = tnum_or(dst_reg->var_off, src_reg.var_off);
6807            scalar32_min_max_or(dst_reg, &src_reg);
6808            scalar_min_max_or(dst_reg, &src_reg);
6809            break;
6810        case BPF_XOR:
6811            dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
6812            scalar32_min_max_xor(dst_reg, &src_reg);
6813            scalar_min_max_xor(dst_reg, &src_reg);
6814            break;
6815        case BPF_LSH:
6816            if (umax_val >= insn_bitness) {
6817                /* Shifts greater than 31 or 63 are undefined.
6818                 * This includes shifts by a negative number.
6819                 */
6820                mark_reg_unknown(env, regs, insn->dst_reg);
6821                break;
6822            }
6823            if (alu32) {
6824                scalar32_min_max_lsh(dst_reg, &src_reg);
6825            } else {
6826                scalar_min_max_lsh(dst_reg, &src_reg);
6827            }
6828            break;
6829        case BPF_RSH:
6830            if (umax_val >= insn_bitness) {
6831                /* Shifts greater than 31 or 63 are undefined.
6832                 * This includes shifts by a negative number.
6833                 */
6834                mark_reg_unknown(env, regs, insn->dst_reg);
6835                break;
6836            }
6837            if (alu32) {
6838                scalar32_min_max_rsh(dst_reg, &src_reg);
6839            } else {
6840                scalar_min_max_rsh(dst_reg, &src_reg);
6841            }
6842            break;
6843        case BPF_ARSH:
6844            if (umax_val >= insn_bitness) {
6845                /* Shifts greater than 31 or 63 are undefined.
6846                 * This includes shifts by a negative number.
6847                 */
6848                mark_reg_unknown(env, regs, insn->dst_reg);
6849                break;
6850            }
6851            if (alu32) {
6852                scalar32_min_max_arsh(dst_reg, &src_reg);
6853            } else {
6854                scalar_min_max_arsh(dst_reg, &src_reg);
6855            }
6856            break;
6857        default:
6858            mark_reg_unknown(env, regs, insn->dst_reg);
6859            break;
6860    }
6861
6862    /* ALU32 ops are zero extended into 64bit register */
6863    if (alu32) {
6864        zext_32_to_64(dst_reg);
6865    }
6866
6867    reg_bounds_sync(dst_reg);
6868    return 0;
6869}
6870
6871/* Handles ALU ops other than BPF_END, BPF_NEG and BPF_MOV: computes new min/max
6872 * and var_off.
6873 */
6874static int adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn)
6875{
6876    struct bpf_verifier_state *vstate = env->cur_state;
6877    struct bpf_func_state *state = vstate->frame[vstate->curframe];
6878    struct bpf_reg_state *regs = state->regs, *dst_reg, *src_reg;
6879    struct bpf_reg_state *ptr_reg = NULL, off_reg = {0};
6880    u8 opcode = BPF_OP(insn->code);
6881    int err;
6882
6883    dst_reg = &regs[insn->dst_reg];
6884    src_reg = NULL;
6885    if (dst_reg->type != SCALAR_VALUE) {
6886        ptr_reg = dst_reg;
6887    } else {
6888        /* Make sure ID is cleared otherwise dst_reg min/max could be
6889         * incorrectly propagated into other registers by find_equal_scalars()
6890         */
6891        dst_reg->id = 0;
6892    }
6893    if (BPF_SRC(insn->code) == BPF_X) {
6894        src_reg = &regs[insn->src_reg];
6895        if (src_reg->type != SCALAR_VALUE) {
6896            if (dst_reg->type != SCALAR_VALUE) {
6897                /* Combining two pointers by any ALU op yields
6898                 * an arbitrary scalar. Disallow all math except
6899                 * pointer subtraction
6900                 */
6901                if (opcode == BPF_SUB && env->allow_ptr_leaks) {
6902                    mark_reg_unknown(env, regs, insn->dst_reg);
6903                    return 0;
6904                }
6905                verbose(env, "R%d pointer %s pointer prohibited\n", insn->dst_reg,
6906                        bpf_alu_string[opcode >> VERIFIER_FOUR]);
6907                return -EACCES;
6908            } else {
6909                /* scalar += pointer
6910                 * This is legal, but we have to reverse our
6911                 * src/dest handling in computing the range
6912                 */
6913                err = mark_chain_precision(env, insn->dst_reg);
6914                if (err) {
6915                    return err;
6916                }
6917                return adjust_ptr_min_max_vals(env, insn, src_reg, dst_reg);
6918            }
6919        } else if (ptr_reg) {
6920            /* pointer += scalar */
6921            err = mark_chain_precision(env, insn->src_reg);
6922            if (err) {
6923                return err;
6924            }
6925            return adjust_ptr_min_max_vals(env, insn, dst_reg, src_reg);
6926        }
6927    } else {
6928        /* Pretend the src is a reg with a known value, since we only
6929         * need to be able to read from this state.
6930         */
6931        off_reg.type = SCALAR_VALUE;
6932        verifier_mark_reg_known(&off_reg, insn->imm);
6933        src_reg = &off_reg;
6934        if (ptr_reg) { /* pointer += K */
6935            return adjust_ptr_min_max_vals(env, insn, ptr_reg, src_reg);
6936        }
6937    }
6938
6939    /* Got here implies adding two SCALAR_VALUEs */
6940    if (WARN_ON_ONCE(ptr_reg)) {
6941        print_verifier_state(env, state);
6942        verbose(env, "verifier internal error: unexpected ptr_reg\n");
6943        return -EINVAL;
6944    }
6945    if (WARN_ON(!src_reg)) {
6946        print_verifier_state(env, state);
6947        verbose(env, "verifier internal error: no src_reg\n");
6948        return -EINVAL;
6949    }
6950    return adjust_scalar_min_max_vals(env, insn, dst_reg, *src_reg);
6951}
6952
6953/* check validity of 32-bit and 64-bit arithmetic operations */
6954static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn)
6955{
6956    struct bpf_reg_state *regs = cur_regs(env);
6957    u8 opcode = BPF_OP(insn->code);
6958    int err;
6959
6960    if (opcode == BPF_END || opcode == BPF_NEG) {
6961        if (opcode == BPF_NEG) {
6962            if (BPF_SRC(insn->code) != 0 || insn->src_reg != BPF_REG_0 || insn->off != 0 || insn->imm != 0) {
6963                verbose(env, "BPF_NEG uses reserved fields\n");
6964                return -EINVAL;
6965            }
6966        } else {
6967            if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
6968                (insn->imm != 0x10 && insn->imm != VERIFIER_THIRTYTWO && insn->imm != VERIFIER_SIXTYFOUR) ||
6969                BPF_CLASS(insn->code) == BPF_ALU64) {
6970                verbose(env, "BPF_END uses reserved fields\n");
6971                return -EINVAL;
6972            }
6973        }
6974
6975        /* check src operand */
6976        err = check_reg_arg(env, insn->dst_reg, SRC_OP);
6977        if (err) {
6978            return err;
6979        }
6980
6981        if (is_pointer_value(env, insn->dst_reg)) {
6982            verbose(env, "R%d pointer arithmetic prohibited\n", insn->dst_reg);
6983            return -EACCES;
6984        }
6985
6986        /* check dest operand */
6987        err = check_reg_arg(env, insn->dst_reg, DST_OP);
6988        if (err) {
6989            return err;
6990        }
6991    } else if (opcode == BPF_MOV) {
6992        if (BPF_SRC(insn->code) == BPF_X) {
6993            if (insn->imm != 0 || insn->off != 0) {
6994                verbose(env, "BPF_MOV uses reserved fields\n");
6995                return -EINVAL;
6996            }
6997            /* check src operand */
6998            err = check_reg_arg(env, insn->src_reg, SRC_OP);
6999            if (err) {
7000                return err;
7001            }
7002        } else {
7003            if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7004                verbose(env, "BPF_MOV uses reserved fields\n");
7005                return -EINVAL;
7006            }
7007        }
7008
7009        /* check dest operand, mark as required later */
7010        err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7011        if (err) {
7012            return err;
7013        }
7014
7015        if (BPF_SRC(insn->code) == BPF_X) {
7016            struct bpf_reg_state *src_reg = regs + insn->src_reg;
7017            struct bpf_reg_state *dst_reg = regs + insn->dst_reg;
7018
7019            if (BPF_CLASS(insn->code) == BPF_ALU64) {
7020                /* case: R1 = R2
7021                 * copy register state to dest reg
7022                 */
7023                if (src_reg->type == SCALAR_VALUE && !src_reg->id) {
7024                    /* Assign src and dst registers the same ID
7025                     * that will be used by find_equal_scalars()
7026                     * to propagate min/max range.
7027                     */
7028                    src_reg->id = ++env->id_gen;
7029                }
7030                *dst_reg = *src_reg;
7031                dst_reg->live |= REG_LIVE_WRITTEN;
7032                dst_reg->subreg_def = DEF_NOT_SUBREG;
7033            } else {
7034                /* R1 = (u32) R2 */
7035                if (is_pointer_value(env, insn->src_reg)) {
7036                    verbose(env, "R%d partial copy of pointer\n", insn->src_reg);
7037                    return -EACCES;
7038                } else if (src_reg->type == SCALAR_VALUE) {
7039                    *dst_reg = *src_reg;
7040                    /* Make sure ID is cleared otherwise
7041                     * dst_reg min/max could be incorrectly
7042                     * propagated into src_reg by find_equal_scalars()
7043                     */
7044                    dst_reg->id = 0;
7045                    dst_reg->live |= REG_LIVE_WRITTEN;
7046                    dst_reg->subreg_def = env->insn_idx + 1;
7047                } else {
7048                    mark_reg_unknown(env, regs, insn->dst_reg);
7049                }
7050                zext_32_to_64(dst_reg);
7051
7052                reg_bounds_sync(dst_reg);
7053            }
7054        } else {
7055            /* case: R = imm
7056             * remember the value we stored into this reg
7057             */
7058            /* clear any state __mark_reg_known doesn't set */
7059            mark_reg_unknown(env, regs, insn->dst_reg);
7060            regs[insn->dst_reg].type = SCALAR_VALUE;
7061            if (BPF_CLASS(insn->code) == BPF_ALU64) {
7062                verifier_mark_reg_known(regs + insn->dst_reg, insn->imm);
7063            } else {
7064                verifier_mark_reg_known(regs + insn->dst_reg, (u32)insn->imm);
7065            }
7066        }
7067    } else if (opcode > BPF_END) {
7068        verbose(env, "invalid BPF_ALU opcode %x\n", opcode);
7069        return -EINVAL;
7070    } else { /* all other ALU ops: and, sub, xor, add, ... */
7071        if (BPF_SRC(insn->code) == BPF_X) {
7072            if (insn->imm != 0 || insn->off != 0) {
7073                verbose(env, "BPF_ALU uses reserved fields\n");
7074                return -EINVAL;
7075            }
7076            /* check src1 operand */
7077            err = check_reg_arg(env, insn->src_reg, SRC_OP);
7078            if (err) {
7079                return err;
7080            }
7081        } else {
7082            if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
7083                verbose(env, "BPF_ALU uses reserved fields\n");
7084                return -EINVAL;
7085            }
7086        }
7087
7088        /* check src2 operand */
7089        err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7090        if (err) {
7091            return err;
7092        }
7093
7094        if ((opcode == BPF_MOD || opcode == BPF_DIV) && BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
7095            verbose(env, "div by zero\n");
7096            return -EINVAL;
7097        }
7098
7099        if ((opcode == BPF_LSH || opcode == BPF_RSH || opcode == BPF_ARSH) && BPF_SRC(insn->code) == BPF_K) {
7100            int size = BPF_CLASS(insn->code) == BPF_ALU64 ? VERIFIER_SIXTYFOUR : 32;
7101            if (insn->imm < 0 || insn->imm >= size) {
7102                verbose(env, "invalid shift %d\n", insn->imm);
7103                return -EINVAL;
7104            }
7105        }
7106        /* check dest operand */
7107        err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
7108        if (err) {
7109            return err;
7110        }
7111        return adjust_reg_min_max_vals(env, insn);
7112    }
7113
7114    return 0;
7115}
7116
7117static void __find_good_pkt_pointers(struct bpf_func_state *state, struct bpf_reg_state *dst_reg,
7118                                     enum bpf_reg_type type, u16 new_range)
7119{
7120    struct bpf_reg_state *reg;
7121    int i;
7122
7123    for (i = 0; i < MAX_BPF_REG; i++) {
7124        reg = &state->regs[i];
7125        if (reg->type == type && reg->id == dst_reg->id) {
7126            /* keep the maximum range already checked */
7127            reg->range = max(reg->range, new_range);
7128        }
7129    }
7130
7131    bpf_for_each_spilled_reg(i, state, reg)
7132    {
7133        if (!reg) {
7134            continue;
7135        }
7136        if (reg->type == type && reg->id == dst_reg->id) {
7137            reg->range = max(reg->range, new_range);
7138        }
7139    }
7140}
7141
7142static void find_good_pkt_pointers(struct bpf_verifier_state *vstate, struct bpf_reg_state *dst_reg,
7143                                   enum bpf_reg_type type, bool range_right_open)
7144{
7145    u16 new_range;
7146    int i;
7147
7148    if (dst_reg->off < 0 || (dst_reg->off == 0 && range_right_open)) {
7149        /* This doesn't give us any range */
7150        return;
7151    }
7152
7153    if (dst_reg->umax_value > MAX_PACKET_OFF || dst_reg->umax_value + dst_reg->off > MAX_PACKET_OFF) {
7154        /* Risk of overflow.  For instance, ptr + (1<<63) may be less
7155         * than pkt_end, but that's because it's also less than pkt.
7156         */
7157        return;
7158    }
7159
7160    new_range = dst_reg->off;
7161    if (range_right_open) {
7162        new_range--;
7163    }
7164
7165    /* Examples for register markings:
7166     *
7167     * pkt_data in dst register:
7168     *
7169     *   r2 = r3;
7170     *   r2 += 8;
7171     *   if (r2 > pkt_end) goto <handle exception>
7172     *   <access okay>
7173     *
7174     *   r2 = r3;
7175     *   r2 += 8;
7176     *   if (r2 < pkt_end) goto <access okay>
7177     *   <handle exception>
7178     *
7179     *   Where:
7180     *     r2 == dst_reg, pkt_end == src_reg
7181     *     r2=pkt(id=n,off=8,r=0)
7182     *     r3=pkt(id=n,off=0,r=0)
7183     *
7184     * pkt_data in src register:
7185     *
7186     *   r2 = r3;
7187     *   r2 += 8;
7188     *   if (pkt_end >= r2) goto <access okay>
7189     *   <handle exception>
7190     *
7191     *   r2 = r3;
7192     *   r2 += 8;
7193     *   if (pkt_end <= r2) goto <handle exception>
7194     *   <access okay>
7195     *
7196     *   Where:
7197     *     pkt_end == dst_reg, r2 == src_reg
7198     *     r2=pkt(id=n,off=8,r=0)
7199     *     r3=pkt(id=n,off=0,r=0)
7200     *
7201     * Find register r3 and mark its range as r3=pkt(id=n,off=0,r=8)
7202     * or r3=pkt(id=n,off=0,r=8-1), so that range of bytes [r3, r3 + 8)
7203     * and [r3, r3 + 8-1) respectively is safe to access depending on
7204     * the check.
7205     */
7206
7207    /* If our ids match, then we must have the same max_value.  And we
7208     * don't care about the other reg's fixed offset, since if it's too big
7209     * the range won't allow anything.
7210     * dst_reg->off is known < MAX_PACKET_OFF, therefore it fits in a u16.
7211     */
7212    for (i = 0; i <= vstate->curframe; i++) {
7213        __find_good_pkt_pointers(vstate->frame[i], dst_reg, type, new_range);
7214    }
7215}
7216
7217static int is_branch32_taken(struct bpf_reg_state *reg, u32 val, u8 opcode)
7218{
7219    struct tnum subreg = tnum_subreg(reg->var_off);
7220    s32 sval = (s32)val;
7221
7222    switch (opcode) {
7223        case BPF_JEQ:
7224            if (tnum_is_const(subreg)) {
7225                return !!tnum_equals_const(subreg, val);
7226            }
7227            break;
7228        case BPF_JNE:
7229            if (tnum_is_const(subreg)) {
7230                return !tnum_equals_const(subreg, val);
7231            }
7232            break;
7233        case BPF_JSET:
7234            if ((~subreg.mask & subreg.value) & val) {
7235                return 1;
7236            }
7237            if (!((subreg.mask | subreg.value) & val)) {
7238                return 0;
7239            }
7240            break;
7241        case BPF_JGT:
7242            if (reg->u32_min_value > val) {
7243                return 1;
7244            } else if (reg->u32_max_value <= val) {
7245                return 0;
7246            }
7247            break;
7248        case BPF_JSGT:
7249            if (reg->s32_min_value > sval) {
7250                return 1;
7251            } else if (reg->s32_max_value <= sval) {
7252                return 0;
7253            }
7254            break;
7255        case BPF_JLT:
7256            if (reg->u32_max_value < val) {
7257                return 1;
7258            } else if (reg->u32_min_value >= val) {
7259                return 0;
7260            }
7261            break;
7262        case BPF_JSLT:
7263            if (reg->s32_max_value < sval) {
7264                return 1;
7265            } else if (reg->s32_min_value >= sval) {
7266                return 0;
7267            }
7268            break;
7269        case BPF_JGE:
7270            if (reg->u32_min_value >= val) {
7271                return 1;
7272            } else if (reg->u32_max_value < val) {
7273                return 0;
7274            }
7275            break;
7276        case BPF_JSGE:
7277            if (reg->s32_min_value >= sval) {
7278                return 1;
7279            } else if (reg->s32_max_value < sval) {
7280                return 0;
7281            }
7282            break;
7283        case BPF_JLE:
7284            if (reg->u32_max_value <= val) {
7285                return 1;
7286            } else if (reg->u32_min_value > val) {
7287                return 0;
7288            }
7289            break;
7290        case BPF_JSLE:
7291            if (reg->s32_max_value <= sval) {
7292                return 1;
7293            } else if (reg->s32_min_value > sval) {
7294                return 0;
7295            }
7296            break;
7297    }
7298
7299    return -1;
7300}
7301
7302static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
7303{
7304    s64 sval = (s64)val;
7305
7306    switch (opcode) {
7307        case BPF_JEQ:
7308            if (tnum_is_const(reg->var_off)) {
7309                return !!tnum_equals_const(reg->var_off, val);
7310            }
7311            break;
7312        case BPF_JNE:
7313            if (tnum_is_const(reg->var_off)) {
7314                return !tnum_equals_const(reg->var_off, val);
7315            }
7316            break;
7317        case BPF_JSET:
7318            if ((~reg->var_off.mask & reg->var_off.value) & val) {
7319                return 1;
7320            }
7321            if (!((reg->var_off.mask | reg->var_off.value) & val)) {
7322                return 0;
7323            }
7324            break;
7325        case BPF_JGT:
7326            if (reg->umin_value > val) {
7327                return 1;
7328            } else if (reg->umax_value <= val) {
7329                return 0;
7330            }
7331            break;
7332        case BPF_JSGT:
7333            if (reg->smin_value > sval) {
7334                return 1;
7335            } else if (reg->smax_value <= sval) {
7336                return 0;
7337            }
7338            break;
7339        case BPF_JLT:
7340            if (reg->umax_value < val) {
7341                return 1;
7342            } else if (reg->umin_value >= val) {
7343                return 0;
7344            }
7345            break;
7346        case BPF_JSLT:
7347            if (reg->smax_value < sval) {
7348                return 1;
7349            } else if (reg->smin_value >= sval) {
7350                return 0;
7351            }
7352            break;
7353        case BPF_JGE:
7354            if (reg->umin_value >= val) {
7355                return 1;
7356            } else if (reg->umax_value < val) {
7357                return 0;
7358            }
7359            break;
7360        case BPF_JSGE:
7361            if (reg->smin_value >= sval) {
7362                return 1;
7363            } else if (reg->smax_value < sval) {
7364                return 0;
7365            }
7366            break;
7367        case BPF_JLE:
7368            if (reg->umax_value <= val) {
7369                return 1;
7370            } else if (reg->umin_value > val) {
7371                return 0;
7372            }
7373            break;
7374        case BPF_JSLE:
7375            if (reg->smax_value <= sval) {
7376                return 1;
7377            } else if (reg->smin_value > sval) {
7378                return 0;
7379            }
7380            break;
7381    }
7382
7383    return -1;
7384}
7385
7386/* compute branch direction of the expression "if (reg opcode val) goto target;"
7387 * and return:
7388 *  1 - branch will be taken and "goto target" will be executed
7389 *  0 - branch will not be taken and fall-through to next insn
7390 * -1 - unknown. Example: "if (reg < 5)" is unknown when register value
7391 *      range [0,10]
7392 */
7393static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode, bool is_jmp32)
7394{
7395    if (__is_pointer_value(false, reg)) {
7396        if (!reg_type_not_null(reg->type)) {
7397            return -1;
7398        }
7399
7400        /* If pointer is valid tests against zero will fail so we can
7401         * use this to direct branch taken.
7402         */
7403        if (val != 0) {
7404            return -1;
7405        }
7406
7407        switch (opcode) {
7408            case BPF_JEQ:
7409                return 0;
7410            case BPF_JNE:
7411                return 1;
7412            default:
7413                return -1;
7414        }
7415    }
7416
7417    if (is_jmp32) {
7418        return is_branch32_taken(reg, val, opcode);
7419    }
7420    return is_branch64_taken(reg, val, opcode);
7421}
7422
7423/* Adjusts the register min/max values in the case that the dst_reg is the
7424 * variable register that we are working on, and src_reg is a constant or we're
7425 * simply doing a BPF_K check.
7426 * In JEQ/JNE cases we also adjust the var_off values.
7427 */
7428static void reg_set_min_max(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32,
7429                            u8 opcode, bool is_jmp32)
7430{
7431    struct tnum false_32off = tnum_subreg(false_reg->var_off);
7432    struct tnum false_64off = false_reg->var_off;
7433    struct tnum true_32off = tnum_subreg(true_reg->var_off);
7434    struct tnum true_64off = true_reg->var_off;
7435    s64 sval = (s64)val;
7436    s32 sval32 = (s32)val32;
7437
7438    /* If the dst_reg is a pointer, we can't learn anything about its
7439     * variable offset from the compare (unless src_reg were a pointer into
7440     * the same object, but we don't bother with that.
7441     * Since false_reg and true_reg have the same type by construction, we
7442     * only need to check one of them for pointerness.
7443     */
7444    if (__is_pointer_value(false, false_reg)) {
7445        return;
7446    }
7447
7448    switch (opcode) {
7449    /* JEQ/JNE comparison doesn't change the register equivalence.
7450     *
7451     * r1 = r2;
7452     * if (r1 == 42) goto label;
7453     * ...
7454     * label: // here both r1 and r2 are known to be 42.
7455     *
7456     * Hence when marking register as known preserve it's ID.
7457     */
7458        case BPF_JEQ:
7459            if (is_jmp32) {
7460                __mark_reg32_known(true_reg, val32);
7461                true_32off = tnum_subreg(true_reg->var_off);
7462            } else {
7463                ___mark_reg_known(true_reg, val);
7464                true_64off = true_reg->var_off;
7465            }
7466            break;
7467        case BPF_JNE:
7468            if (is_jmp32) {
7469                __mark_reg32_known(false_reg, val32);
7470                false_32off = tnum_subreg(false_reg->var_off);
7471            } else {
7472                ___mark_reg_known(false_reg, val);
7473                false_64off = false_reg->var_off;
7474            }
7475            break;
7476        case BPF_JSET:
7477            if (is_jmp32) {
7478                false_32off = tnum_and(false_32off, tnum_const(~val32));
7479                if (is_power_of_2(val32)) {
7480                    true_32off = tnum_or(true_32off, tnum_const(val32));
7481                }
7482            } else {
7483                false_64off = tnum_and(false_64off, tnum_const(~val));
7484                if (is_power_of_2(val)) {
7485                    true_64off = tnum_or(true_64off, tnum_const(val));
7486                }
7487            }
7488            break;
7489        case BPF_JGE:
7490        case BPF_JGT: {
7491            if (is_jmp32) {
7492                u32 false_umax = opcode == BPF_JGT ? val32 : val32 - 1;
7493                u32 true_umin = opcode == BPF_JGT ? val32 + 1 : val32;
7494
7495                false_reg->u32_max_value = min(false_reg->u32_max_value, false_umax);
7496                true_reg->u32_min_value = max(true_reg->u32_min_value, true_umin);
7497            } else {
7498                u64 false_umax = opcode == BPF_JGT ? val : val - 1;
7499                u64 true_umin = opcode == BPF_JGT ? val + 1 : val;
7500
7501                false_reg->umax_value = min(false_reg->umax_value, false_umax);
7502                true_reg->umin_value = max(true_reg->umin_value, true_umin);
7503            }
7504            break;
7505        }
7506        case BPF_JSGE:
7507        case BPF_JSGT: {
7508            if (is_jmp32) {
7509                s32 false_smax = opcode == BPF_JSGT ? sval32 : sval32 - 1;
7510                s32 true_smin = opcode == BPF_JSGT ? sval32 + 1 : sval32;
7511
7512                false_reg->s32_max_value = min(false_reg->s32_max_value, false_smax);
7513                true_reg->s32_min_value = max(true_reg->s32_min_value, true_smin);
7514            } else {
7515                s64 false_smax = opcode == BPF_JSGT ? sval : sval - 1;
7516                s64 true_smin = opcode == BPF_JSGT ? sval + 1 : sval;
7517
7518                false_reg->smax_value = min(false_reg->smax_value, false_smax);
7519                true_reg->smin_value = max(true_reg->smin_value, true_smin);
7520            }
7521            break;
7522        }
7523        case BPF_JLE:
7524        case BPF_JLT: {
7525            if (is_jmp32) {
7526                u32 false_umin = opcode == BPF_JLT ? val32 : val32 + 1;
7527                u32 true_umax = opcode == BPF_JLT ? val32 - 1 : val32;
7528
7529                false_reg->u32_min_value = max(false_reg->u32_min_value, false_umin);
7530                true_reg->u32_max_value = min(true_reg->u32_max_value, true_umax);
7531            } else {
7532                u64 false_umin = opcode == BPF_JLT ? val : val + 1;
7533                u64 true_umax = opcode == BPF_JLT ? val - 1 : val;
7534
7535                false_reg->umin_value = max(false_reg->umin_value, false_umin);
7536                true_reg->umax_value = min(true_reg->umax_value, true_umax);
7537            }
7538            break;
7539        }
7540        case BPF_JSLE:
7541        case BPF_JSLT: {
7542            if (is_jmp32) {
7543                s32 false_smin = opcode == BPF_JSLT ? sval32 : sval32 + 1;
7544                s32 true_smax = opcode == BPF_JSLT ? sval32 - 1 : sval32;
7545
7546                false_reg->s32_min_value = max(false_reg->s32_min_value, false_smin);
7547                true_reg->s32_max_value = min(true_reg->s32_max_value, true_smax);
7548            } else {
7549                s64 false_smin = opcode == BPF_JSLT ? sval : sval + 1;
7550                s64 true_smax = opcode == BPF_JSLT ? sval - 1 : sval;
7551
7552                false_reg->smin_value = max(false_reg->smin_value, false_smin);
7553                true_reg->smax_value = min(true_reg->smax_value, true_smax);
7554            }
7555            break;
7556        }
7557        default:
7558            return;
7559    }
7560
7561    if (is_jmp32) {
7562        false_reg->var_off = tnum_or(tnum_clear_subreg(false_64off), tnum_subreg(false_32off));
7563        true_reg->var_off = tnum_or(tnum_clear_subreg(true_64off), tnum_subreg(true_32off));
7564        verifier_reg_combine_32_into_64(false_reg);
7565        verifier_reg_combine_32_into_64(true_reg);
7566    } else {
7567        false_reg->var_off = false_64off;
7568        true_reg->var_off = true_64off;
7569        __reg_combine_64_into_32(false_reg);
7570        __reg_combine_64_into_32(true_reg);
7571    }
7572}
7573
7574/* Same as above, but for the case that dst_reg holds a constant and src_reg is
7575 * the variable reg.
7576 */
7577static void reg_set_min_max_inv(struct bpf_reg_state *true_reg, struct bpf_reg_state *false_reg, u64 val, u32 val32,
7578                                u8 opcode, bool is_jmp32)
7579{
7580    /* How can we transform "a <op> b" into "b <op> a"? */
7581    static const u8 opcode_flip[VERIFIER_SIXTEEN] = {
7582        [BPF_JEQ >> VERIFIER_FOUR] = BPF_JEQ,
7583        [BPF_JNE >> VERIFIER_FOUR] = BPF_JNE,
7584        [BPF_JSET >> VERIFIER_FOUR] = BPF_JSET,
7585        /* these swap "lesser" and "greater" (L and G in the opcodes) */
7586        [BPF_JGE >> VERIFIER_FOUR] = BPF_JLE,
7587        [BPF_JGT >> VERIFIER_FOUR] = BPF_JLT,
7588        [BPF_JLE >> VERIFIER_FOUR] = BPF_JGE,
7589        [BPF_JLT >> VERIFIER_FOUR] = BPF_JGT,
7590        [BPF_JSGE >> VERIFIER_FOUR] = BPF_JSLE,
7591        [BPF_JSGT >> VERIFIER_FOUR] = BPF_JSLT,
7592        [BPF_JSLE >> VERIFIER_FOUR] = BPF_JSGE,
7593        [BPF_JSLT >> VERIFIER_FOUR] = BPF_JSGT};
7594    opcode = opcode_flip[opcode >> VERIFIER_FOUR];
7595    /* This uses zero as "not present in table"; luckily the zero opcode,
7596     * BPF_JA, can't get here.
7597     */
7598    if (opcode) {
7599        reg_set_min_max(true_reg, false_reg, val, val32, opcode, is_jmp32);
7600    }
7601}
7602
7603/* Regs are known to be equal, so intersect their min/max/var_off */
7604static void __reg_combine_min_max(struct bpf_reg_state *src_reg, struct bpf_reg_state *dst_reg)
7605{
7606    src_reg->umin_value = dst_reg->umin_value = max(src_reg->umin_value, dst_reg->umin_value);
7607    src_reg->umax_value = dst_reg->umax_value = min(src_reg->umax_value, dst_reg->umax_value);
7608    src_reg->smin_value = dst_reg->smin_value = max(src_reg->smin_value, dst_reg->smin_value);
7609    src_reg->smax_value = dst_reg->smax_value = min(src_reg->smax_value, dst_reg->smax_value);
7610    src_reg->var_off = dst_reg->var_off = tnum_intersect(src_reg->var_off, dst_reg->var_off);
7611    /* We might have learned new bounds from the var_off. */
7612    reg_bounds_sync(src_reg);
7613    reg_bounds_sync(dst_reg);
7614}
7615
7616static void reg_combine_min_max(struct bpf_reg_state *true_src, struct bpf_reg_state *true_dst,
7617                                struct bpf_reg_state *false_src, struct bpf_reg_state *false_dst, u8 opcode)
7618{
7619    switch (opcode) {
7620        case BPF_JEQ:
7621            __reg_combine_min_max(true_src, true_dst);
7622            break;
7623        case BPF_JNE:
7624            __reg_combine_min_max(false_src, false_dst);
7625            break;
7626    }
7627}
7628
7629static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null)
7630{
7631    if (type_may_be_null(reg->type) && reg->id == id &&
7632        !WARN_ON_ONCE(!reg->id)) {
7633        if (WARN_ON_ONCE(reg->smin_value || reg->smax_value ||
7634            !tnum_equals_const(reg->var_off, 0) || reg->off)) {
7635            /* Old offset (both fixed and variable parts) should
7636             * have been known-zero, because we don't allow pointer
7637             * arithmetic on pointers that might be NULL. If we
7638             * see this happening, don't convert the register.
7639             */
7640            return;
7641        }
7642        if (is_null) {
7643            reg->type = SCALAR_VALUE;
7644        } else if (base_type(reg->type) == PTR_TO_MAP_VALUE) {
7645            const struct bpf_map *map = reg->map_ptr;
7646
7647            if (map->inner_map_meta) {
7648                reg->type = CONST_PTR_TO_MAP;
7649                reg->map_ptr = map->inner_map_meta;
7650            } else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
7651                reg->type = PTR_TO_XDP_SOCK;
7652            } else if (map->map_type == BPF_MAP_TYPE_SOCKMAP || map->map_type == BPF_MAP_TYPE_SOCKHASH) {
7653                reg->type = PTR_TO_SOCKET;
7654            } else {
7655                reg->type = PTR_TO_MAP_VALUE;
7656            }
7657        } else {
7658            reg->type &= ~PTR_MAYBE_NULL;
7659        }
7660
7661        if (is_null) {
7662            /* We don't need id and ref_obj_id from this point
7663             * onwards anymore, thus we should better reset it,
7664             * so that state pruning has chances to take effect.
7665             */
7666            reg->id = 0;
7667            reg->ref_obj_id = 0;
7668        } else if (!reg_may_point_to_spin_lock(reg)) {
7669            /* For not-NULL ptr, reg->ref_obj_id will be reset
7670             * in release_reg_references().
7671             *
7672             * reg->id is still used by spin_lock ptr. Other
7673             * than spin_lock ptr type, reg->id can be reset.
7674             */
7675            reg->id = 0;
7676        }
7677    }
7678}
7679
7680static void __mark_ptr_or_null_regs(struct bpf_func_state *state, u32 id, bool is_null)
7681{
7682    struct bpf_reg_state *reg;
7683    int i;
7684
7685    for (i = 0; i < MAX_BPF_REG; i++) {
7686        mark_ptr_or_null_reg(state, &state->regs[i], id, is_null);
7687    }
7688
7689    bpf_for_each_spilled_reg(i, state, reg)
7690    {
7691        if (!reg) {
7692            continue;
7693        }
7694        mark_ptr_or_null_reg(state, reg, id, is_null);
7695    }
7696}
7697
7698/* The logic is similar to find_good_pkt_pointers(), both could eventually
7699 * be folded together at some point.
7700 */
7701static void mark_ptr_or_null_regs(struct bpf_verifier_state *vstate, u32 regno, bool is_null)
7702{
7703    struct bpf_func_state *state = vstate->frame[vstate->curframe];
7704    struct bpf_reg_state *regs = state->regs;
7705    u32 ref_obj_id = regs[regno].ref_obj_id;
7706    u32 id = regs[regno].id;
7707    int i;
7708
7709    if (ref_obj_id && ref_obj_id == id && is_null) {
7710        /* regs[regno] is in the " == NULL" branch.
7711         * No one could have freed the reference state before
7712         * doing the NULL check.
7713         */
7714        WARN_ON_ONCE(release_reference_state(state, id));
7715    }
7716
7717    for (i = 0; i <= vstate->curframe; i++) {
7718        __mark_ptr_or_null_regs(vstate->frame[i], id, is_null);
7719    }
7720}
7721
7722static bool try_match_pkt_pointers(const struct bpf_insn *insn, struct bpf_reg_state *dst_reg,
7723                                   struct bpf_reg_state *src_reg, struct bpf_verifier_state *this_branch,
7724                                   struct bpf_verifier_state *other_branch)
7725{
7726    if (BPF_SRC(insn->code) != BPF_X) {
7727        return false;
7728    }
7729
7730    /* Pointers are always 64-bit. */
7731    if (BPF_CLASS(insn->code) == BPF_JMP32) {
7732        return false;
7733    }
7734
7735    switch (BPF_OP(insn->code)) {
7736        case BPF_JGT:
7737            if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7738                (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7739                /* pkt_data' > pkt_end, pkt_meta' > pkt_data */
7740                find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, false);
7741            } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7742                       (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7743                /* pkt_end > pkt_data', pkt_data > pkt_meta' */
7744                find_good_pkt_pointers(other_branch, src_reg, src_reg->type, true);
7745            } else {
7746                return false;
7747            }
7748            break;
7749        case BPF_JLT:
7750            if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7751                (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7752                /* pkt_data' < pkt_end, pkt_meta' < pkt_data */
7753                find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, true);
7754            } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7755                       (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7756                /* pkt_end < pkt_data', pkt_data > pkt_meta' */
7757                find_good_pkt_pointers(this_branch, src_reg, src_reg->type, false);
7758            } else {
7759                return false;
7760            }
7761            break;
7762        case BPF_JGE:
7763            if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7764                (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7765                /* pkt_data' >= pkt_end, pkt_meta' >= pkt_data */
7766                find_good_pkt_pointers(this_branch, dst_reg, dst_reg->type, true);
7767            } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7768                       (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7769                /* pkt_end >= pkt_data', pkt_data >= pkt_meta' */
7770                find_good_pkt_pointers(other_branch, src_reg, src_reg->type, false);
7771            } else {
7772                return false;
7773            }
7774            break;
7775        case BPF_JLE:
7776            if ((dst_reg->type == PTR_TO_PACKET && src_reg->type == PTR_TO_PACKET_END) ||
7777                (dst_reg->type == PTR_TO_PACKET_META && reg_is_init_pkt_pointer(src_reg, PTR_TO_PACKET))) {
7778                /* pkt_data' <= pkt_end, pkt_meta' <= pkt_data */
7779                find_good_pkt_pointers(other_branch, dst_reg, dst_reg->type, false);
7780            } else if ((dst_reg->type == PTR_TO_PACKET_END && src_reg->type == PTR_TO_PACKET) ||
7781                       (reg_is_init_pkt_pointer(dst_reg, PTR_TO_PACKET) && src_reg->type == PTR_TO_PACKET_META)) {
7782                /* pkt_end <= pkt_data', pkt_data <= pkt_meta' */
7783                find_good_pkt_pointers(this_branch, src_reg, src_reg->type, true);
7784            } else {
7785                return false;
7786            }
7787            break;
7788        default:
7789            return false;
7790    }
7791
7792    return true;
7793}
7794
7795static void find_equal_scalars(struct bpf_verifier_state *vstate, struct bpf_reg_state *known_reg)
7796{
7797    struct bpf_func_state *state;
7798    struct bpf_reg_state *reg;
7799    int i, j;
7800
7801    for (i = 0; i <= vstate->curframe; i++) {
7802        state = vstate->frame[i];
7803        for (j = 0; j < MAX_BPF_REG; j++) {
7804            reg = &state->regs[j];
7805            if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) {
7806                *reg = *known_reg;
7807            }
7808        }
7809
7810        bpf_for_each_spilled_reg(j, state, reg)
7811        {
7812            if (!reg) {
7813                continue;
7814            }
7815            if (reg->type == SCALAR_VALUE && reg->id == known_reg->id) {
7816                *reg = *known_reg;
7817            }
7818        }
7819    }
7820}
7821
7822static int check_cond_jmp_op(struct bpf_verifier_env *env, struct bpf_insn *insn, int *insn_idx)
7823{
7824    struct bpf_verifier_state *this_branch = env->cur_state;
7825    struct bpf_verifier_state *other_branch;
7826    struct bpf_reg_state *regs = this_branch->frame[this_branch->curframe]->regs;
7827    struct bpf_reg_state *dst_reg, *other_branch_regs, *src_reg = NULL;
7828    u8 opcode = BPF_OP(insn->code);
7829    bool is_jmp32;
7830    int pred = -1;
7831    int err;
7832
7833    /* Only conditional jumps are expected to reach here. */
7834    if (opcode == BPF_JA || opcode > BPF_JSLE) {
7835        verbose(env, "invalid BPF_JMP/JMP32 opcode %x\n", opcode);
7836        return -EINVAL;
7837    }
7838
7839    if (BPF_SRC(insn->code) == BPF_X) {
7840        if (insn->imm != 0) {
7841            verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7842            return -EINVAL;
7843        }
7844
7845        /* check src1 operand */
7846        err = check_reg_arg(env, insn->src_reg, SRC_OP);
7847        if (err) {
7848            return err;
7849        }
7850
7851        if (is_pointer_value(env, insn->src_reg)) {
7852            verbose(env, "R%d pointer comparison prohibited\n", insn->src_reg);
7853            return -EACCES;
7854        }
7855        src_reg = &regs[insn->src_reg];
7856    } else {
7857        if (insn->src_reg != BPF_REG_0) {
7858            verbose(env, "BPF_JMP/JMP32 uses reserved fields\n");
7859            return -EINVAL;
7860        }
7861    }
7862
7863    /* check src2 operand */
7864    err = check_reg_arg(env, insn->dst_reg, SRC_OP);
7865    if (err) {
7866        return err;
7867    }
7868
7869    dst_reg = &regs[insn->dst_reg];
7870    is_jmp32 = BPF_CLASS(insn->code) == BPF_JMP32;
7871
7872    if (BPF_SRC(insn->code) == BPF_K) {
7873        pred = is_branch_taken(dst_reg, insn->imm, opcode, is_jmp32);
7874    } else if (src_reg->type == SCALAR_VALUE && is_jmp32 && tnum_is_const(tnum_subreg(src_reg->var_off))) {
7875        pred = is_branch_taken(dst_reg, tnum_subreg(src_reg->var_off).value, opcode, is_jmp32);
7876    } else if (src_reg->type == SCALAR_VALUE && !is_jmp32 && tnum_is_const(src_reg->var_off)) {
7877        pred = is_branch_taken(dst_reg, src_reg->var_off.value, opcode, is_jmp32);
7878    }
7879
7880    if (pred >= 0) {
7881        /* If we get here with a dst_reg pointer type it is because
7882         * above is_branch_taken() special cased the 0 comparison.
7883         */
7884        if (!__is_pointer_value(false, dst_reg)) {
7885            err = mark_chain_precision(env, insn->dst_reg);
7886        }
7887        if (BPF_SRC(insn->code) == BPF_X && !err) {
7888            err = mark_chain_precision(env, insn->src_reg);
7889        }
7890        if (err) {
7891            return err;
7892        }
7893    }
7894
7895    if (pred == 1) {
7896        /* Only follow the goto, ignore fall-through. If needed, push
7897         * the fall-through branch for simulation under speculative
7898         * execution.
7899         */
7900        if (!env->bypass_spec_v1 && !sanitize_speculative_path(env, insn, *insn_idx + 1, *insn_idx)) {
7901            return -EFAULT;
7902        }
7903        *insn_idx += insn->off;
7904        return 0;
7905    } else if (pred == 0) {
7906        /* Only follow the fall-through branch, since that's where the
7907         * program will go. If needed, push the goto branch for
7908         * simulation under speculative execution.
7909         */
7910        if (!env->bypass_spec_v1 && !sanitize_speculative_path(env, insn, *insn_idx + insn->off + 1, *insn_idx)) {
7911            return -EFAULT;
7912        }
7913        return 0;
7914    }
7915
7916    other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx, false);
7917    if (!other_branch) {
7918        return -EFAULT;
7919    }
7920    other_branch_regs = other_branch->frame[other_branch->curframe]->regs;
7921
7922    /* detect if we are comparing against a constant value so we can adjust
7923     * our min/max values for our dst register.
7924     * this is only legit if both are scalars (or pointers to the same
7925     * object, I suppose, but we don't support that right now), because
7926     * otherwise the different base pointers mean the offsets aren't
7927     * comparable.
7928     */
7929    if (BPF_SRC(insn->code) == BPF_X) {
7930        struct bpf_reg_state *src_reg_in = &regs[insn->src_reg];
7931
7932        if (dst_reg->type == SCALAR_VALUE && src_reg_in->type == SCALAR_VALUE) {
7933            if (tnum_is_const(src_reg_in->var_off) || (is_jmp32 && tnum_is_const(tnum_subreg(src_reg_in->var_off)))) {
7934                reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, src_reg_in->var_off.value,
7935                                tnum_subreg(src_reg_in->var_off).value, opcode, is_jmp32);
7936            } else if (tnum_is_const(dst_reg->var_off) || (is_jmp32 && tnum_is_const(tnum_subreg(dst_reg->var_off)))) {
7937                reg_set_min_max_inv(&other_branch_regs[insn->src_reg], src_reg_in, dst_reg->var_off.value,
7938                                    tnum_subreg(dst_reg->var_off).value, opcode, is_jmp32);
7939            } else if (!is_jmp32 && (opcode == BPF_JEQ || opcode == BPF_JNE)) {
7940                /* Comparing for equality, we can combine knowledge */
7941                reg_combine_min_max(&other_branch_regs[insn->src_reg], &other_branch_regs[insn->dst_reg], src_reg_in,
7942                                    dst_reg, opcode);
7943            }
7944            if (src_reg_in->id && !WARN_ON_ONCE(src_reg_in->id != other_branch_regs[insn->src_reg].id)) {
7945                find_equal_scalars(this_branch, src_reg_in);
7946                find_equal_scalars(other_branch, &other_branch_regs[insn->src_reg]);
7947            }
7948        }
7949    } else if (dst_reg->type == SCALAR_VALUE) {
7950        reg_set_min_max(&other_branch_regs[insn->dst_reg], dst_reg, insn->imm, (u32)insn->imm, opcode, is_jmp32);
7951    }
7952
7953    if (dst_reg->type == SCALAR_VALUE && dst_reg->id &&
7954        !WARN_ON_ONCE(dst_reg->id != other_branch_regs[insn->dst_reg].id)) {
7955        find_equal_scalars(this_branch, dst_reg);
7956        find_equal_scalars(other_branch, &other_branch_regs[insn->dst_reg]);
7957    }
7958
7959    /* detect if R == 0 where R is returned from bpf_map_lookup_elem().
7960     * NOTE: these optimizations below are related with pointer comparison
7961     *       which will never be JMP32.
7962     */
7963    if (!is_jmp32 && BPF_SRC(insn->code) == BPF_K && insn->imm == 0 && (opcode == BPF_JEQ || opcode == BPF_JNE) &&
7964        type_may_be_null(dst_reg->type)) {
7965        /* Mark all identical registers in each branch as either
7966         * safe or unknown depending R == 0 or R != 0 conditional.
7967         */
7968        mark_ptr_or_null_regs(this_branch, insn->dst_reg, opcode == BPF_JNE);
7969        mark_ptr_or_null_regs(other_branch, insn->dst_reg, opcode == BPF_JEQ);
7970    } else if (!try_match_pkt_pointers(insn, dst_reg, &regs[insn->src_reg], this_branch, other_branch) &&
7971               is_pointer_value(env, insn->dst_reg)) {
7972        verbose(env, "R%d pointer comparison prohibited\n", insn->dst_reg);
7973        return -EACCES;
7974    }
7975    if (env->log.level & BPF_LOG_LEVEL) {
7976        print_verifier_state(env, this_branch->frame[this_branch->curframe]);
7977    }
7978    return 0;
7979}
7980
7981/* verify BPF_LD_IMM64 instruction */
7982static int check_ld_imm(struct bpf_verifier_env *env, struct bpf_insn *insn)
7983{
7984    struct bpf_insn_aux_data *aux = cur_aux(env);
7985    struct bpf_reg_state *regs = cur_regs(env);
7986    struct bpf_reg_state *dst_reg;
7987    struct bpf_map *map;
7988    int err;
7989
7990    if (BPF_SIZE(insn->code) != BPF_DW) {
7991        verbose(env, "invalid BPF_LD_IMM insn\n");
7992        return -EINVAL;
7993    }
7994    if (insn->off != 0) {
7995        verbose(env, "BPF_LD_IMM64 uses reserved fields\n");
7996        return -EINVAL;
7997    }
7998
7999    err = check_reg_arg(env, insn->dst_reg, DST_OP);
8000    if (err) {
8001        return err;
8002    }
8003
8004    dst_reg = &regs[insn->dst_reg];
8005    if (insn->src_reg == 0) {
8006        u64 imm = ((u64)(insn + 1)->imm << VERIFIER_THIRTYTWO) | (u32)insn->imm;
8007
8008        dst_reg->type = SCALAR_VALUE;
8009        verifier_mark_reg_known(&regs[insn->dst_reg], imm);
8010        return 0;
8011    }
8012
8013    /* All special src_reg cases are listed below. From this point onwards
8014     * we either succeed and assign a corresponding dst_reg->type after
8015     * zeroing the offset, or fail and reject the program.
8016     */
8017    mark_reg_known_zero(env, regs, insn->dst_reg);
8018
8019    if (insn->src_reg == BPF_PSEUDO_BTF_ID) {
8020        dst_reg->type = aux->btf_var.reg_type;
8021        switch (base_type(dst_reg->type)) {
8022            case PTR_TO_MEM:
8023                dst_reg->mem_size = aux->btf_var.mem_size;
8024                break;
8025            case PTR_TO_BTF_ID:
8026            case PTR_TO_PERCPU_BTF_ID:
8027                dst_reg->btf_id = aux->btf_var.btf_id;
8028                break;
8029            default:
8030                verbose(env, "bpf verifier is misconfigured\n");
8031                return -EFAULT;
8032        }
8033        return 0;
8034    }
8035
8036    map = env->used_maps[aux->map_index];
8037    dst_reg->map_ptr = map;
8038
8039    if (insn->src_reg == BPF_PSEUDO_MAP_VALUE) {
8040        dst_reg->type = PTR_TO_MAP_VALUE;
8041        dst_reg->off = aux->map_off;
8042        if (map_value_has_spin_lock(map)) {
8043            dst_reg->id = ++env->id_gen;
8044        }
8045    } else if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
8046        dst_reg->type = CONST_PTR_TO_MAP;
8047    } else {
8048        verbose(env, "bpf verifier is misconfigured\n");
8049        return -EINVAL;
8050    }
8051
8052    return 0;
8053}
8054
8055static bool may_access_skb(enum bpf_prog_type type)
8056{
8057    switch (type) {
8058        case BPF_PROG_TYPE_SOCKET_FILTER:
8059        case BPF_PROG_TYPE_SCHED_CLS:
8060        case BPF_PROG_TYPE_SCHED_ACT:
8061            return true;
8062        default:
8063            return false;
8064    }
8065}
8066
8067/* verify safety of LD_ABS|LD_IND instructions:
8068 * - they can only appear in the programs where ctx == skb
8069 * - since they are wrappers of function calls, they scratch R1-R5 registers,
8070 *   preserve R6-R9, and store return value into R0
8071 *
8072 * Implicit input:
8073 *   ctx == skb == R6 == CTX
8074 *
8075 * Explicit input:
8076 *   SRC == any register
8077 *   IMM == 32-bit immediate
8078 *
8079 * Output:
8080 *   R0 - 8/16/32-bit skb data converted to cpu endianness
8081 */
8082static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
8083{
8084    struct bpf_reg_state *regs = cur_regs(env);
8085    static const int ctx_reg = BPF_REG_6;
8086    u8 mode = BPF_MODE(insn->code);
8087    int i, err;
8088
8089    if (!may_access_skb(resolve_prog_type(env->prog))) {
8090        verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
8091        return -EINVAL;
8092    }
8093
8094    if (!env->ops->gen_ld_abs) {
8095        verbose(env, "bpf verifier is misconfigured\n");
8096        return -EINVAL;
8097    }
8098
8099    if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW ||
8100        (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
8101        verbose(env, "BPF_LD_[ABS|IND] uses reserved fields\n");
8102        return -EINVAL;
8103    }
8104
8105    /* check whether implicit source operand (register R6) is readable */
8106    err = check_reg_arg(env, ctx_reg, SRC_OP);
8107    if (err) {
8108        return err;
8109    }
8110
8111    /* Disallow usage of BPF_LD_[ABS|IND] with reference tracking, as
8112     * gen_ld_abs() may terminate the program at runtime, leading to
8113     * reference leak.
8114     */
8115    err = check_reference_leak(env);
8116    if (err) {
8117        verbose(env, "BPF_LD_[ABS|IND] cannot be mixed with socket references\n");
8118        return err;
8119    }
8120
8121    if (env->cur_state->active_spin_lock) {
8122        verbose(env, "BPF_LD_[ABS|IND] cannot be used inside bpf_spin_lock-ed region\n");
8123        return -EINVAL;
8124    }
8125
8126    if (regs[ctx_reg].type != PTR_TO_CTX) {
8127        verbose(env, "at the time of BPF_LD_ABS|IND R6 != pointer to skb\n");
8128        return -EINVAL;
8129    }
8130
8131    if (mode == BPF_IND) {
8132        /* check explicit source operand */
8133        err = check_reg_arg(env, insn->src_reg, SRC_OP);
8134        if (err) {
8135            return err;
8136        }
8137    }
8138
8139    err = check_ptr_off_reg(env, &regs[ctx_reg], ctx_reg);
8140    if (err < 0) {
8141        return err;
8142    }
8143
8144    /* reset caller saved regs to unreadable */
8145    for (i = 0; i < CALLER_SAVED_REGS; i++) {
8146        mark_reg_not_init(env, regs, caller_saved[i]);
8147        check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK);
8148    }
8149
8150    /* mark destination R0 register as readable, since it contains
8151     * the value fetched from the packet.
8152     * Already marked as written above.
8153     */
8154    mark_reg_unknown(env, regs, BPF_REG_0);
8155    /* ld_abs load up to 32-bit skb data. */
8156    regs[BPF_REG_0].subreg_def = env->insn_idx + 1;
8157    return 0;
8158}
8159
8160static int check_return_code(struct bpf_verifier_env *env)
8161{
8162    struct tnum enforce_attach_type_range = tnum_unknown;
8163    const struct bpf_prog *prog = env->prog;
8164    struct bpf_reg_state *reg;
8165    struct tnum range = tnum_range(0, 1);
8166    enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
8167    int err;
8168    const bool is_subprog = env->cur_state->frame[0]->subprogno;
8169
8170    /* LSM and struct_ops func-ptr's return type could be "void" */
8171    if (!is_subprog && (prog_type == BPF_PROG_TYPE_STRUCT_OPS || prog_type == BPF_PROG_TYPE_LSM) &&
8172        !prog->aux->attach_func_proto->type) {
8173        return 0;
8174    }
8175
8176    /* eBPF calling convetion is such that R0 is used
8177     * to return the value from eBPF program.
8178     * Make sure that it's readable at this time
8179     * of bpf_exit, which means that program wrote
8180     * something into it earlier
8181     */
8182    err = check_reg_arg(env, BPF_REG_0, SRC_OP);
8183    if (err) {
8184        return err;
8185    }
8186
8187    if (is_pointer_value(env, BPF_REG_0)) {
8188        verbose(env, "R0 leaks addr as return value\n");
8189        return -EACCES;
8190    }
8191
8192    reg = cur_regs(env) + BPF_REG_0;
8193    if (is_subprog) {
8194        if (reg->type != SCALAR_VALUE) {
8195            verbose(env, "At subprogram exit the register R0 is not a scalar value (%s)\n",
8196                    reg_type_str(env, reg->type));
8197            return -EINVAL;
8198        }
8199        return 0;
8200    }
8201
8202    switch (prog_type) {
8203        case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
8204            if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
8205                env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
8206                env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
8207                env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
8208                env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
8209                env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME) {
8210                range = tnum_range(1, 1);
8211            }
8212            break;
8213        case BPF_PROG_TYPE_CGROUP_SKB:
8214            if (env->prog->expected_attach_type == BPF_CGROUP_INET_EGRESS) {
8215                range = tnum_range(0, 3);
8216                enforce_attach_type_range = tnum_range(2, 3);
8217            }
8218            break;
8219        case BPF_PROG_TYPE_CGROUP_SOCK:
8220        case BPF_PROG_TYPE_SOCK_OPS:
8221        case BPF_PROG_TYPE_CGROUP_DEVICE:
8222        case BPF_PROG_TYPE_CGROUP_SYSCTL:
8223        case BPF_PROG_TYPE_CGROUP_SOCKOPT:
8224            break;
8225        case BPF_PROG_TYPE_RAW_TRACEPOINT:
8226            if (!env->prog->aux->attach_btf_id) {
8227                return 0;
8228            }
8229            range = tnum_const(0);
8230            break;
8231        case BPF_PROG_TYPE_TRACING:
8232            switch (env->prog->expected_attach_type) {
8233                case BPF_TRACE_FENTRY:
8234                case BPF_TRACE_FEXIT:
8235                    range = tnum_const(0);
8236                    break;
8237                case BPF_TRACE_RAW_TP:
8238                case BPF_MODIFY_RETURN:
8239                    return 0;
8240                case BPF_TRACE_ITER:
8241                    break;
8242                default:
8243                    return -ENOTSUPP;
8244            }
8245            break;
8246        case BPF_PROG_TYPE_SK_LOOKUP:
8247            range = tnum_range(SK_DROP, SK_PASS);
8248            break;
8249        case BPF_PROG_TYPE_EXT:
8250            /* freplace program can return anything as its return value
8251             * depends on the to-be-replaced kernel func or bpf program.
8252             */
8253        default:
8254            return 0;
8255    }
8256
8257    if (reg->type != SCALAR_VALUE) {
8258        verbose(env, "At program exit the register R0 is not a known value (%s)\n", reg_type_str(env, reg->type));
8259        return -EINVAL;
8260    }
8261
8262    if (!tnum_in(range, reg->var_off)) {
8263        char tn_buf[48];
8264
8265        verbose(env, "At program exit the register R0 ");
8266        if (!tnum_is_unknown(reg->var_off)) {
8267            tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
8268            verbose(env, "has value %s", tn_buf);
8269        } else {
8270            verbose(env, "has unknown scalar value");
8271        }
8272        tnum_strn(tn_buf, sizeof(tn_buf), range);
8273        verbose(env, " should have been in %s\n", tn_buf);
8274        return -EINVAL;
8275    }
8276
8277    if (!tnum_is_unknown(enforce_attach_type_range) && tnum_in(enforce_attach_type_range, reg->var_off)) {
8278        env->prog->enforce_expected_attach_type = 1;
8279    }
8280    return 0;
8281}
8282
8283/* non-recursive DFS pseudo code
8284 * 1  procedure DFS-iterative(G,v):
8285 * 2      label v as discovered
8286 * 3      let S be a stack
8287 * 4      S.push(v)
8288 * 5      while S is not empty
8289 * 6            t <- S.pop()
8290 * 7            if t is what we're looking for:
8291 * 8                return t
8292 * 9            for all edges e in G.adjacentEdges(t) do
8293 * 10               if edge e is already labelled
8294 * 11                   continue with the next edge
8295 * 12               w <- G.adjacentVertex(t,e)
8296 * 13               if vertex w is not discovered and not explored
8297 * 14                   label e as tree-edge
8298 * 15                   label w as discovered
8299 * 16                   S.push(w)
8300 * 17                   continue at 5
8301 * 18               else if vertex w is discovered
8302 * 19                   label e as back-edge
8303 * 20               else
8304 * 21                   // vertex w is explored
8305 * 22                   label e as forward- or cross-edge
8306 * 23           label t as explored
8307 * 24           S.pop()
8308 *
8309 * convention:
8310 * 0x10 - discovered
8311 * 0x11 - discovered and fall-through edge labelled
8312 * 0x12 - discovered and fall-through and branch edges labelled
8313 * 0x20 - explored
8314 */
8315
8316enum {
8317    DISCOVERED = 0x10,
8318    EXPLORED = 0x20,
8319    FALLTHROUGH = 1,
8320    BRANCH = 2,
8321};
8322
8323static u32 state_htab_size(struct bpf_verifier_env *env)
8324{
8325    return env->prog->len;
8326}
8327
8328static struct bpf_verifier_state_list **explored_state(struct bpf_verifier_env *env, int idx)
8329{
8330    struct bpf_verifier_state *cur = env->cur_state;
8331    struct bpf_func_state *state = cur->frame[cur->curframe];
8332
8333    return &env->explored_states[(idx ^ state->callsite) % state_htab_size(env)];
8334}
8335
8336static void init_explored_state(struct bpf_verifier_env *env, int idx)
8337{
8338    env->insn_aux_data[idx].prune_point = true;
8339}
8340
8341/* t, w, e - match pseudo-code above:
8342 * t - index of current instruction
8343 * w - next instruction
8344 * e - edge
8345 */
8346static int push_insn(int t, int w, int e, struct bpf_verifier_env *env, bool loop_ok)
8347{
8348    int *insn_stack = env->cfg.insn_stack;
8349    int *insn_state = env->cfg.insn_state;
8350
8351    if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH)) {
8352        return 0;
8353    }
8354
8355    if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH)) {
8356        return 0;
8357    }
8358
8359    if (w < 0 || w >= env->prog->len) {
8360        verbose_linfo(env, t, "%d: ", t);
8361        verbose(env, "jump out of range from insn %d to %d\n", t, w);
8362        return -EINVAL;
8363    }
8364
8365    if (e == BRANCH) {
8366        /* mark branch target for state pruning */
8367        init_explored_state(env, w);
8368    }
8369
8370    if (insn_state[w] == 0) {
8371        /* tree-edge */
8372        insn_state[t] = DISCOVERED | e;
8373        insn_state[w] = DISCOVERED;
8374        if (env->cfg.cur_stack >= env->prog->len) {
8375            return -E2BIG;
8376        }
8377        insn_stack[env->cfg.cur_stack++] = w;
8378        return 1;
8379    } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
8380        if (loop_ok && env->bpf_capable) {
8381            return 0;
8382        }
8383        verbose_linfo(env, t, "%d: ", t);
8384        verbose_linfo(env, w, "%d: ", w);
8385        verbose(env, "back-edge from insn %d to %d\n", t, w);
8386        return -EINVAL;
8387    } else if (insn_state[w] == EXPLORED) {
8388        /* forward- or cross-edge */
8389        insn_state[t] = DISCOVERED | e;
8390    } else {
8391        verbose(env, "insn state internal bug\n");
8392        return -EFAULT;
8393    }
8394    return 0;
8395}
8396
8397/* non-recursive depth-first-search to detect loops in BPF program
8398 * loop == back-edge in directed graph
8399 */
8400static int check_cfg(struct bpf_verifier_env *env)
8401{
8402    struct bpf_insn *insns = env->prog->insnsi;
8403    int insn_cnt = env->prog->len;
8404    int *insn_stack, *insn_state;
8405    int ret = 0;
8406    int i, t;
8407
8408    insn_state = env->cfg.insn_state = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8409    if (!insn_state) {
8410        return -ENOMEM;
8411    }
8412
8413    insn_stack = env->cfg.insn_stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
8414    if (!insn_stack) {
8415        kvfree(insn_state);
8416        return -ENOMEM;
8417    }
8418
8419    insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
8420    insn_stack[0] = 0;          /* 0 is the first instruction */
8421    env->cfg.cur_stack = 1;
8422
8423    while (1) {
8424        if (env->cfg.cur_stack == 0) {
8425            goto check_state;
8426        }
8427        t = insn_stack[env->cfg.cur_stack - 1];
8428
8429        if (BPF_CLASS(insns[t].code) == BPF_JMP || BPF_CLASS(insns[t].code) == BPF_JMP32) {
8430            u8 opcode = BPF_OP(insns[t].code);
8431            if (opcode == BPF_EXIT) {
8432                goto mark_explored;
8433            } else if (opcode == BPF_CALL) {
8434                ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8435                if (ret == 1) {
8436                    continue;
8437                } else if (ret < 0) {
8438                    goto err_free;
8439                }
8440                if (t + 1 < insn_cnt) {
8441                    init_explored_state(env, t + 1);
8442                }
8443                if (insns[t].src_reg == BPF_PSEUDO_CALL) {
8444                    init_explored_state(env, t);
8445                    ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env, false);
8446                    if (ret == 1) {
8447                        continue;
8448                    } else if (ret < 0) {
8449                        goto err_free;
8450                    }
8451                }
8452            } else if (opcode == BPF_JA) {
8453                if (BPF_SRC(insns[t].code) != BPF_K) {
8454                    ret = -EINVAL;
8455                    goto err_free;
8456                }
8457                /* unconditional jump with single edge */
8458                ret = push_insn(t, t + insns[t].off + 1, FALLTHROUGH, env, true);
8459                if (ret == 1) {
8460                    continue;
8461                } else if (ret < 0) {
8462                    goto err_free;
8463                }
8464                /* unconditional jmp is not a good pruning point,
8465                 * but it's marked, since backtracking needs
8466                 * to record jmp history in is_state_visited().
8467                 */
8468                init_explored_state(env, t + insns[t].off + 1);
8469                /* tell verifier to check for equivalent states
8470                 * after every call and jump
8471                 */
8472                if (t + 1 < insn_cnt) {
8473                    init_explored_state(env, t + 1);
8474                }
8475            } else {
8476                /* conditional jump with two edges */
8477                init_explored_state(env, t);
8478                ret = push_insn(t, t + 1, FALLTHROUGH, env, true);
8479                if (ret == 1) {
8480                    continue;
8481                } else if (ret < 0) {
8482                    goto err_free;
8483                }
8484
8485                ret = push_insn(t, t + insns[t].off + 1, BRANCH, env, true);
8486                if (ret == 1) {
8487                    continue;
8488                } else if (ret < 0) {
8489                    goto err_free;
8490                }
8491            }
8492        } else {
8493            /* all other non-branch instructions with single
8494             * fall-through edge
8495             */
8496            ret = push_insn(t, t + 1, FALLTHROUGH, env, false);
8497            if (ret == 1) {
8498                continue;
8499            } else if (ret < 0) {
8500                goto err_free;
8501            }
8502        }
8503
8504    mark_explored:
8505        insn_state[t] = EXPLORED;
8506        if (env->cfg.cur_stack-- <= 0) {
8507            verbose(env, "pop stack internal bug\n");
8508            ret = -EFAULT;
8509            goto err_free;
8510        }
8511        continue;
8512    }
8513
8514check_state:
8515    for (i = 0; i < insn_cnt; i++) {
8516        if (insn_state[i] != EXPLORED) {
8517            verbose(env, "unreachable insn %d\n", i);
8518            ret = -EINVAL;
8519            goto err_free;
8520        }
8521    }
8522    ret = 0; /* cfg looks good */
8523
8524err_free:
8525    kvfree(insn_state);
8526    kvfree(insn_stack);
8527    env->cfg.insn_state = env->cfg.insn_stack = NULL;
8528    return ret;
8529}
8530
8531static int check_abnormal_return(struct bpf_verifier_env *env)
8532{
8533    int i;
8534
8535    for (i = 1; i < env->subprog_cnt; i++) {
8536        if (env->subprog_info[i].has_ld_abs) {
8537            verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
8538            return -EINVAL;
8539        }
8540        if (env->subprog_info[i].has_tail_call) {
8541            verbose(env, "tail_call is not allowed in subprogs without BTF\n");
8542            return -EINVAL;
8543        }
8544    }
8545    return 0;
8546}
8547
8548/* The minimum supported BTF func info size */
8549#define MIN_BPF_FUNCINFO_SIZE 8
8550#define MAX_FUNCINFO_REC_SIZE 252
8551
8552static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8553{
8554    const struct btf_type *type, *func_proto, *ret_type;
8555    u32 i, nfuncs, urec_size, min_size;
8556    u32 krec_size = sizeof(struct bpf_func_info);
8557    struct bpf_func_info *krecord;
8558    struct bpf_func_info_aux *info_aux = NULL;
8559    struct bpf_prog *prog;
8560    const struct btf *btf;
8561    void __user *urecord;
8562    u32 prev_offset = 0;
8563    bool scalar_return;
8564    int ret = -ENOMEM;
8565
8566    nfuncs = attr->func_info_cnt;
8567    if (!nfuncs) {
8568        if (check_abnormal_return(env)) {
8569            return -EINVAL;
8570        }
8571        return 0;
8572    }
8573
8574    if (nfuncs != env->subprog_cnt) {
8575        verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
8576        return -EINVAL;
8577    }
8578
8579    urec_size = attr->func_info_rec_size;
8580    if (urec_size < MIN_BPF_FUNCINFO_SIZE || urec_size > MAX_FUNCINFO_REC_SIZE || urec_size % sizeof(u32)) {
8581        verbose(env, "invalid func info rec size %u\n", urec_size);
8582        return -EINVAL;
8583    }
8584
8585    prog = env->prog;
8586    btf = prog->aux->btf;
8587
8588    urecord = u64_to_user_ptr(attr->func_info);
8589    min_size = min_t(u32, krec_size, urec_size);
8590
8591    krecord = kvcalloc(nfuncs, krec_size, GFP_KERNEL | __GFP_NOWARN);
8592    if (!krecord) {
8593        return -ENOMEM;
8594    }
8595    info_aux = kcalloc(nfuncs, sizeof(*info_aux), GFP_KERNEL | __GFP_NOWARN);
8596    if (!info_aux) {
8597        goto err_free;
8598    }
8599
8600    for (i = 0; i < nfuncs; i++) {
8601        ret = bpf_check_uarg_tail_zero(urecord, krec_size, urec_size);
8602        if (ret) {
8603            if (ret == -E2BIG) {
8604                verbose(env, "nonzero tailing record in func info");
8605                /* set the size kernel expects so loader can zero
8606                 * out the rest of the record.
8607                 */
8608                if (put_user(min_size, &uattr->func_info_rec_size)) {
8609                    ret = -EFAULT;
8610                }
8611            }
8612            goto err_free;
8613        }
8614
8615        if (copy_from_user(&krecord[i], urecord, min_size)) {
8616            ret = -EFAULT;
8617            goto err_free;
8618        }
8619
8620        /* check insn_off */
8621        ret = -EINVAL;
8622        if (i == 0) {
8623            if (krecord[i].insn_off) {
8624                verbose(env, "nonzero insn_off %u for the first func info record", krecord[i].insn_off);
8625                goto err_free;
8626            }
8627        } else if (krecord[i].insn_off <= prev_offset) {
8628            verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", krecord[i].insn_off,
8629                    prev_offset);
8630            goto err_free;
8631        }
8632
8633        if (env->subprog_info[i].start != krecord[i].insn_off) {
8634            verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
8635            goto err_free;
8636        }
8637
8638        /* check type_id */
8639        type = btf_type_by_id(btf, krecord[i].type_id);
8640        if (!type || !btf_type_is_func(type)) {
8641            verbose(env, "invalid type id %d in func info", krecord[i].type_id);
8642            goto err_free;
8643        }
8644        info_aux[i].linkage = BTF_INFO_VLEN(type->info);
8645
8646        func_proto = btf_type_by_id(btf, type->type);
8647        if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) {
8648            /* btf_func_check() already verified it during BTF load */
8649            goto err_free;
8650        }
8651        ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
8652        scalar_return = btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
8653        if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
8654            verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
8655            goto err_free;
8656        }
8657        if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
8658            verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
8659            goto err_free;
8660        }
8661
8662        prev_offset = krecord[i].insn_off;
8663        urecord += urec_size;
8664    }
8665
8666    prog->aux->func_info = krecord;
8667    prog->aux->func_info_cnt = nfuncs;
8668    prog->aux->func_info_aux = info_aux;
8669    return 0;
8670
8671err_free:
8672    kvfree(krecord);
8673    kfree(info_aux);
8674    return ret;
8675}
8676
8677static void adjust_btf_func(struct bpf_verifier_env *env)
8678{
8679    struct bpf_prog_aux *aux = env->prog->aux;
8680    int i;
8681
8682    if (!aux->func_info) {
8683        return;
8684    }
8685
8686    for (i = 0; i < env->subprog_cnt; i++) {
8687        aux->func_info[i].insn_off = env->subprog_info[i].start;
8688    }
8689}
8690
8691#define MIN_BPF_LINEINFO_SIZE                                                                                          \
8692    (offsetof(struct bpf_line_info, line_col) + sizeof(((struct bpf_line_info *)(0))->line_col))
8693#define MAX_LINEINFO_REC_SIZE MAX_FUNCINFO_REC_SIZE
8694
8695static int check_btf_line(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8696{
8697    u32 i, s, nr_linfo, ncopy, expected_size, rec_size, prev_offset = 0;
8698    struct bpf_subprog_info *sub;
8699    struct bpf_line_info *linfo;
8700    struct bpf_prog *prog;
8701    const struct btf *btf;
8702    void __user *ulinfo;
8703    int err;
8704
8705    nr_linfo = attr->line_info_cnt;
8706    if (!nr_linfo) {
8707        return 0;
8708    }
8709    if (nr_linfo > INT_MAX / sizeof(struct bpf_line_info)) {
8710        return -EINVAL;
8711    }
8712
8713    rec_size = attr->line_info_rec_size;
8714    if (rec_size < MIN_BPF_LINEINFO_SIZE || rec_size > MAX_LINEINFO_REC_SIZE || rec_size & (sizeof(u32) - 1)) {
8715        return -EINVAL;
8716    }
8717
8718    /* Need to zero it in case the userspace may
8719     * pass in a smaller bpf_line_info object.
8720     */
8721    linfo = kvcalloc(nr_linfo, sizeof(struct bpf_line_info), GFP_KERNEL | __GFP_NOWARN);
8722    if (!linfo) {
8723        return -ENOMEM;
8724    }
8725
8726    prog = env->prog;
8727    btf = prog->aux->btf;
8728
8729    s = 0;
8730    sub = env->subprog_info;
8731    ulinfo = u64_to_user_ptr(attr->line_info);
8732    expected_size = sizeof(struct bpf_line_info);
8733    ncopy = min_t(u32, expected_size, rec_size);
8734    for (i = 0; i < nr_linfo; i++) {
8735        err = bpf_check_uarg_tail_zero(ulinfo, expected_size, rec_size);
8736        if (err) {
8737            if (err == -E2BIG) {
8738                verbose(env, "nonzero tailing record in line_info");
8739                if (put_user(expected_size, &uattr->line_info_rec_size)) {
8740                    err = -EFAULT;
8741                }
8742            }
8743            goto err_free;
8744        }
8745
8746        if (copy_from_user(&linfo[i], ulinfo, ncopy)) {
8747            err = -EFAULT;
8748            goto err_free;
8749        }
8750
8751        /*
8752         * Check insn_off to ensure
8753         * 1) strictly increasing AND
8754         * 2) bounded by prog->len
8755         *
8756         * The linfo[0].insn_off == 0 check logically falls into
8757         * the later "missing bpf_line_info for func..." case
8758         * because the first linfo[0].insn_off must be the
8759         * first sub also and the first sub must have
8760         * subprog_info[0].start == 0.
8761         */
8762        if ((i && linfo[i].insn_off <= prev_offset) || linfo[i].insn_off >= prog->len) {
8763            verbose(env, "Invalid line_info[%u].insn_off:%u (prev_offset:%u prog->len:%u)\n", i, linfo[i].insn_off,
8764                    prev_offset, prog->len);
8765            err = -EINVAL;
8766            goto err_free;
8767        }
8768
8769        if (!prog->insnsi[linfo[i].insn_off].code) {
8770            verbose(env, "Invalid insn code at line_info[%u].insn_off\n", i);
8771            err = -EINVAL;
8772            goto err_free;
8773        }
8774
8775        if (!btf_name_by_offset(btf, linfo[i].line_off) || !btf_name_by_offset(btf, linfo[i].file_name_off)) {
8776            verbose(env, "Invalid line_info[%u].line_off or .file_name_off\n", i);
8777            err = -EINVAL;
8778            goto err_free;
8779        }
8780
8781        if (s != env->subprog_cnt) {
8782            if (linfo[i].insn_off == sub[s].start) {
8783                sub[s].linfo_idx = i;
8784                s++;
8785            } else if (sub[s].start < linfo[i].insn_off) {
8786                verbose(env, "missing bpf_line_info for func#%u\n", s);
8787                err = -EINVAL;
8788                goto err_free;
8789            }
8790        }
8791
8792        prev_offset = linfo[i].insn_off;
8793        ulinfo += rec_size;
8794    }
8795
8796    if (s != env->subprog_cnt) {
8797        verbose(env, "missing bpf_line_info for %u funcs starting from func#%u\n", env->subprog_cnt - s, s);
8798        err = -EINVAL;
8799        goto err_free;
8800    }
8801
8802    prog->aux->linfo = linfo;
8803    prog->aux->nr_linfo = nr_linfo;
8804
8805    return 0;
8806
8807err_free:
8808    kvfree(linfo);
8809    return err;
8810}
8811
8812static int check_btf_info(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr)
8813{
8814    struct btf *btf;
8815    int err;
8816
8817    if (!attr->func_info_cnt && !attr->line_info_cnt) {
8818        if (check_abnormal_return(env)) {
8819            return -EINVAL;
8820        }
8821        return 0;
8822    }
8823
8824    btf = btf_get_by_fd(attr->prog_btf_fd);
8825    if (IS_ERR(btf)) {
8826        return PTR_ERR(btf);
8827    }
8828    env->prog->aux->btf = btf;
8829
8830    err = check_btf_func(env, attr, uattr);
8831    if (err) {
8832        return err;
8833    }
8834
8835    err = check_btf_line(env, attr, uattr);
8836    if (err) {
8837        return err;
8838    }
8839
8840    return 0;
8841}
8842
8843/* check %cur's range satisfies %old's */
8844static bool range_within(struct bpf_reg_state *old, struct bpf_reg_state *cur)
8845{
8846    return old->umin_value <= cur->umin_value && old->umax_value >= cur->umax_value &&
8847           old->smin_value <= cur->smin_value && old->smax_value >= cur->smax_value &&
8848           old->u32_min_value <= cur->u32_min_value && old->u32_max_value >= cur->u32_max_value &&
8849           old->s32_min_value <= cur->s32_min_value && old->s32_max_value >= cur->s32_max_value;
8850}
8851
8852/* If in the old state two registers had the same id, then they need to have
8853 * the same id in the new state as well.  But that id could be different from
8854 * the old state, so we need to track the mapping from old to new ids.
8855 * Once we have seen that, say, a reg with old id 5 had new id 9, any subsequent
8856 * regs with old id 5 must also have new id 9 for the new state to be safe.  But
8857 * regs with a different old id could still have new id 9, we don't care about
8858 * that.
8859 * So we look through our idmap to see if this old id has been seen before.  If
8860 * so, we require the new id to match; otherwise, we add the id pair to the map.
8861 */
8862static bool check_ids(u32 old_id, u32 cur_id, struct bpf_id_pair *idmap)
8863{
8864    unsigned int i;
8865
8866    for (i = 0; i < BPF_ID_MAP_SIZE; i++) {
8867        if (!idmap[i].old) {
8868            /* Reached an empty slot; haven't seen this id before */
8869            idmap[i].old = old_id;
8870            idmap[i].cur = cur_id;
8871            return true;
8872        }
8873        if (idmap[i].old == old_id) {
8874            return idmap[i].cur == cur_id;
8875        }
8876    }
8877    /* We ran out of idmap slots, which should be impossible */
8878    WARN_ON_ONCE(1);
8879    return false;
8880}
8881
8882static void clean_func_state(struct bpf_verifier_env *env, struct bpf_func_state *st)
8883{
8884    enum bpf_reg_liveness live;
8885    int i, j;
8886
8887    for (i = 0; i < BPF_REG_FP; i++) {
8888        live = st->regs[i].live;
8889        /* liveness must not touch this register anymore */
8890        st->regs[i].live |= REG_LIVE_DONE;
8891        if (!(live & REG_LIVE_READ)) {
8892            /* since the register is unused, clear its state
8893             * to make further comparison simpler
8894             */
8895            verifier_mark_reg_not_init(env, &st->regs[i]);
8896        }
8897    }
8898
8899    for (i = 0; i < st->allocated_stack / BPF_REG_SIZE; i++) {
8900        live = st->stack[i].spilled_ptr.live;
8901        /* liveness must not touch this stack slot anymore */
8902        st->stack[i].spilled_ptr.live |= REG_LIVE_DONE;
8903        if (!(live & REG_LIVE_READ)) {
8904            verifier_mark_reg_not_init(env, &st->stack[i].spilled_ptr);
8905            for (j = 0; j < BPF_REG_SIZE; j++) {
8906                st->stack[i].slot_type[j] = STACK_INVALID;
8907            }
8908        }
8909    }
8910}
8911
8912static void clean_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *st)
8913{
8914    int i;
8915
8916    if (st->frame[0]->regs[0].live & REG_LIVE_DONE) {
8917        /* all regs in this state in all frames were already marked */
8918        return;
8919    }
8920
8921    for (i = 0; i <= st->curframe; i++) {
8922        clean_func_state(env, st->frame[i]);
8923    }
8924}
8925
8926/* the parentage chains form a tree.
8927 * the verifier states are added to state lists at given insn and
8928 * pushed into state stack for future exploration.
8929 * when the verifier reaches bpf_exit insn some of the verifer states
8930 * stored in the state lists have their final liveness state already,
8931 * but a lot of states will get revised from liveness point of view when
8932 * the verifier explores other branches.
8933 * 1: r0 = 1
8934 * 2: if r1 == 100 goto pc+1
8935 * 3: r0 = 2
8936 * 4: exit
8937 * when the verifier reaches exit insn the register r0 in the state list of
8938 * insn 2 will be seen as !REG_LIVE_READ. Then the verifier pops the other_branch
8939 * of insn 2 and goes exploring further. At the insn 4 it will walk the
8940 * parentage chain from insn 4 into insn 2 and will mark r0 as REG_LIVE_READ.
8941 *
8942 * Since the verifier pushes the branch states as it sees them while exploring
8943 * the program the condition of walking the branch instruction for the second
8944 * time means that all states below this branch were already explored and
8945 * their final liveness markes are already propagated.
8946 * Hence when the verifier completes the search of state list in is_state_visited()
8947 * we can call this clean_live_states() function to mark all liveness states
8948 * as REG_LIVE_DONE to indicate that 'parent' pointers of 'struct bpf_reg_state'
8949 * will not be used.
8950 * This function also clears the registers and stack for states that !READ
8951 * to simplify state merging.
8952 *
8953 * Important note here that walking the same branch instruction in the callee
8954 * doesn't meant that the states are DONE. The verifier has to compare
8955 * the callsites
8956 */
8957static void clean_live_states(struct bpf_verifier_env *env, int insn, struct bpf_verifier_state *cur)
8958{
8959    struct bpf_verifier_state_list *sl;
8960    int i;
8961
8962    sl = *explored_state(env, insn);
8963    while (sl) {
8964        if (sl->state.branches) {
8965            goto next;
8966        }
8967        if (sl->state.insn_idx != insn || sl->state.curframe != cur->curframe) {
8968            goto next;
8969        }
8970        for (i = 0; i <= cur->curframe; i++) {
8971            if (sl->state.frame[i]->callsite != cur->frame[i]->callsite) {
8972                goto next;
8973            }
8974        }
8975        clean_verifier_state(env, &sl->state);
8976    next:
8977        sl = sl->next;
8978    }
8979}
8980
8981/* Returns true if (rold safe implies rcur safe) */
8982static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold, struct bpf_reg_state *rcur,
8983                    struct bpf_id_pair *idmap)
8984{
8985    bool equal;
8986
8987    if (!(rold->live & REG_LIVE_READ)) {
8988        /* explored state didn't use this */
8989        return true;
8990    }
8991
8992    equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0;
8993
8994    if (rold->type == PTR_TO_STACK) {
8995        /* two stack pointers are equal only if they're pointing to
8996         * the same stack frame, since fp-8 in foo != fp-8 in bar
8997         */
8998        return equal && rold->frameno == rcur->frameno;
8999    }
9000
9001    if (equal) {
9002        return true;
9003    }
9004
9005    if (rold->type == NOT_INIT) {
9006        /* explored state can't have used this */
9007        return true;
9008    }
9009    if (rcur->type == NOT_INIT) {
9010        return false;
9011    }
9012    switch (base_type(rold->type)) {
9013        case SCALAR_VALUE:
9014            if (env->explore_alu_limits) {
9015                return false;
9016            }
9017            if (rcur->type == SCALAR_VALUE) {
9018                if (!rold->precise && !rcur->precise) {
9019                    return true;
9020                }
9021                /* new val must satisfy old val knowledge */
9022                return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
9023            } else {
9024                /* We're trying to use a pointer in place of a scalar.
9025                 * Even if the scalar was unbounded, this could lead to
9026                 * pointer leaks because scalars are allowed to leak
9027                 * while pointers are not. We could make this safe in
9028                 * special cases if root is calling us, but it's
9029                 * probably not worth the hassle.
9030                 */
9031                return false;
9032            }
9033        case PTR_TO_MAP_VALUE:
9034            /* a PTR_TO_MAP_VALUE could be safe to use as a
9035             * PTR_TO_MAP_VALUE_OR_NULL into the same map.
9036             * However, if the old PTR_TO_MAP_VALUE_OR_NULL then got NULL-
9037             * checked, doing so could have affected others with the same
9038             * id, and we can't check for that because we lost the id when
9039             * we converted to a PTR_TO_MAP_VALUE.
9040             */
9041            if (type_may_be_null(rold->type)) {
9042                if (!type_may_be_null(rcur->type)) {
9043                    return false;
9044                }
9045                if (memcmp(rold, rcur, offsetof(struct bpf_reg_state, id))) {
9046                    return false;
9047                }
9048                /* Check our ids match any regs they're supposed to */
9049                return check_ids(rold->id, rcur->id, idmap);
9050            }
9051
9052            /* If the new min/max/var_off satisfy the old ones and
9053             * everything else matches, we are OK.
9054             * 'id' is not compared, since it's only used for maps with
9055             * bpf_spin_lock inside map element and in such cases if
9056             * the rest of the prog is valid for one map element then
9057             * it's valid for all map elements regardless of the key
9058             * used in bpf_map_lookup()
9059             */
9060            return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 && range_within(rold, rcur) &&
9061                   tnum_in(rold->var_off, rcur->var_off);
9062        case PTR_TO_PACKET_META:
9063        case PTR_TO_PACKET:
9064            if (rcur->type != rold->type) {
9065                return false;
9066            }
9067            /* We must have at least as much range as the old ptr
9068             * did, so that any accesses which were safe before are
9069             * still safe.  This is true even if old range < old off,
9070             * since someone could have accessed through (ptr - k), or
9071             * even done ptr -= k in a register, to get a safe access.
9072             */
9073            if (rold->range > rcur->range) {
9074                return false;
9075            }
9076            /* If the offsets don't match, we can't trust our alignment;
9077             * nor can we be sure that we won't fall out of range.
9078             */
9079            if (rold->off != rcur->off) {
9080                return false;
9081            }
9082            /* id relations must be preserved */
9083            if (rold->id && !check_ids(rold->id, rcur->id, idmap)) {
9084                return false;
9085            }
9086            /* new val must satisfy old val knowledge */
9087            return range_within(rold, rcur) && tnum_in(rold->var_off, rcur->var_off);
9088        case PTR_TO_CTX:
9089        case CONST_PTR_TO_MAP:
9090        case PTR_TO_PACKET_END:
9091        case PTR_TO_FLOW_KEYS:
9092        case PTR_TO_SOCKET:
9093        case PTR_TO_SOCK_COMMON:
9094        case PTR_TO_TCP_SOCK:
9095        case PTR_TO_XDP_SOCK:
9096            /* Only valid matches are exact, which memcmp() above
9097             * would have accepted
9098             */
9099        default:
9100            /* Don't know what's going on, just say it's not safe */
9101            return false;
9102    }
9103
9104    /* Shouldn't get here; if we do, say it's not safe */
9105    WARN_ON_ONCE(1);
9106    return false;
9107}
9108
9109static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur,
9110                      struct bpf_id_pair *idmap)
9111{
9112    int i, spi;
9113
9114    /* walk slots of the explored stack and ignore any additional
9115     * slots in the current stack, since explored(safe) state
9116     * didn't use them
9117     */
9118    for (i = 0; i < old->allocated_stack; i++) {
9119        spi = i / BPF_REG_SIZE;
9120
9121        if (!(old->stack[spi].spilled_ptr.live & REG_LIVE_READ)) {
9122            i += BPF_REG_SIZE - 1;
9123            /* explored state didn't use this */
9124            continue;
9125        }
9126
9127        if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_INVALID) {
9128            continue;
9129        }
9130
9131        /* explored stack has more populated slots than current stack
9132         * and these slots were used
9133         */
9134        if (i >= cur->allocated_stack) {
9135            return false;
9136        }
9137
9138        /* if old state was safe with misc data in the stack
9139         * it will be safe with zero-initialized stack.
9140         * The opposite is not true
9141         */
9142        if (old->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_MISC &&
9143            cur->stack[spi].slot_type[i % BPF_REG_SIZE] == STACK_ZERO) {
9144            continue;
9145        }
9146        if (old->stack[spi].slot_type[i % BPF_REG_SIZE] != cur->stack[spi].slot_type[i % BPF_REG_SIZE]) {
9147            /* Ex: old explored (safe) state has STACK_SPILL in
9148             * this stack slot, but current has STACK_MISC ->
9149             * this verifier states are not equivalent,
9150             * return false to continue verification of this path
9151             */
9152            return false;
9153        }
9154        if (i % BPF_REG_SIZE) {
9155            continue;
9156        }
9157        if (old->stack[spi].slot_type[0] != STACK_SPILL) {
9158            continue;
9159        }
9160        if (!regsafe(env, &old->stack[spi].spilled_ptr, &cur->stack[spi].spilled_ptr, idmap)) {
9161            /* when explored and current stack slot are both storing
9162             * spilled registers, check that stored pointers types
9163             * are the same as well.
9164             * Ex: explored safe path could have stored
9165             * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -8}
9166             * but current path has stored:
9167             * (bpf_reg_state) {.type = PTR_TO_STACK, .off = -16}
9168             * such verifier states are not equivalent.
9169             * return false to continue verification of this path
9170             */
9171            return false;
9172        }
9173    }
9174    return true;
9175}
9176
9177static bool refsafe(struct bpf_func_state *old, struct bpf_func_state *cur)
9178{
9179    if (old->acquired_refs != cur->acquired_refs) {
9180        return false;
9181    }
9182    return !memcmp(old->refs, cur->refs, sizeof(*old->refs) * old->acquired_refs);
9183}
9184
9185/* compare two verifier states
9186 *
9187 * all states stored in state_list are known to be valid, since
9188 * verifier reached 'bpf_exit' instruction through them
9189 *
9190 * this function is called when verifier exploring different branches of
9191 * execution popped from the state stack. If it sees an old state that has
9192 * more strict register state and more strict stack state then this execution
9193 * branch doesn't need to be explored further, since verifier already
9194 * concluded that more strict state leads to valid finish.
9195 *
9196 * Therefore two states are equivalent if register state is more conservative
9197 * and explored stack state is more conservative than the current one.
9198 * Example:
9199 *       explored                   current
9200 * (slot1=INV slot2=MISC) == (slot1=MISC slot2=MISC)
9201 * (slot1=MISC slot2=MISC) != (slot1=INV slot2=MISC)
9202 *
9203 * In other words if current stack state (one being explored) has more
9204 * valid slots than old one that already passed validation, it means
9205 * the verifier can stop exploring and conclude that current state is valid too
9206 *
9207 * Similarly with registers. If explored state has register type as invalid
9208 * whereas register type in current state is meaningful, it means that
9209 * the current state will reach 'bpf_exit' instruction safely
9210 */
9211static bool func_states_equal(struct bpf_verifier_env *env, struct bpf_func_state *old, struct bpf_func_state *cur)
9212{
9213    int i;
9214
9215    memset(env->idmap_scratch, 0, sizeof(env->idmap_scratch));
9216    for (i = 0; i < MAX_BPF_REG; i++) {
9217        if (!regsafe(env, &old->regs[i], &cur->regs[i], env->idmap_scratch)) {
9218            return false;
9219        }
9220    }
9221
9222    if (!stacksafe(env, old, cur, env->idmap_scratch)) {
9223        return false;
9224    }
9225
9226    if (!refsafe(old, cur)) {
9227        return false;
9228    }
9229
9230    return true;
9231}
9232
9233static bool states_equal(struct bpf_verifier_env *env, struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
9234{
9235    int i;
9236
9237    if (old->curframe != cur->curframe) {
9238        return false;
9239    }
9240
9241    /* Verification state from speculative execution simulation
9242     * must never prune a non-speculative execution one.
9243     */
9244    if (old->speculative && !cur->speculative) {
9245        return false;
9246    }
9247
9248    if (old->active_spin_lock != cur->active_spin_lock) {
9249        return false;
9250    }
9251
9252    /* for states to be equal callsites have to be the same
9253     * and all frame states need to be equivalent
9254     */
9255    for (i = 0; i <= old->curframe; i++) {
9256        if (old->frame[i]->callsite != cur->frame[i]->callsite) {
9257            return false;
9258        }
9259        if (!func_states_equal(env, old->frame[i], cur->frame[i])) {
9260            return false;
9261        }
9262    }
9263    return true;
9264}
9265
9266/* Return 0 if no propagation happened. Return negative error code if error
9267 * happened. Otherwise, return the propagated bit.
9268 */
9269static int propagate_liveness_reg(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
9270                                  struct bpf_reg_state *parent_reg)
9271{
9272    u8 parent_flag = parent_reg->live & REG_LIVE_READ;
9273    u8 flag = reg->live & REG_LIVE_READ;
9274    int err;
9275
9276    /* When comes here, read flags of PARENT_REG or REG could be any of
9277     * REG_LIVE_READ64, REG_LIVE_READ32, REG_LIVE_NONE. There is no need
9278     * of propagation if PARENT_REG has strongest REG_LIVE_READ64.
9279     */
9280    if (parent_flag == REG_LIVE_READ64 ||
9281        /* Or if there is no read flag from REG. */
9282        !flag ||
9283        /* Or if the read flag from REG is the same as PARENT_REG. */
9284        parent_flag == flag) {
9285        return 0;
9286    }
9287
9288    err = mark_reg_read(env, reg, parent_reg, flag);
9289    if (err) {
9290        return err;
9291    }
9292
9293    return flag;
9294}
9295
9296/* A write screens off any subsequent reads; but write marks come from the
9297 * straight-line code between a state and its parent.  When we arrive at an
9298 * equivalent state (jump target or such) we didn't arrive by the straight-line
9299 * code, so read marks in the state must propagate to the parent regardless
9300 * of the state's write marks. That's what 'parent == state->parent' comparison
9301 * in mark_reg_read() is for.
9302 */
9303static int propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate,
9304                              struct bpf_verifier_state *vparent)
9305{
9306    struct bpf_reg_state *state_reg, *parent_reg;
9307    struct bpf_func_state *state, *parent;
9308    int i, frame, err = 0;
9309
9310    if (vparent->curframe != vstate->curframe) {
9311        WARN(1, "propagate_live: parent frame %d current frame %d\n", vparent->curframe, vstate->curframe);
9312        return -EFAULT;
9313    }
9314    /* Propagate read liveness of registers... */
9315    BUILD_BUG_ON(BPF_REG_FP + 1 != MAX_BPF_REG);
9316    for (frame = 0; frame <= vstate->curframe; frame++) {
9317        parent = vparent->frame[frame];
9318        state = vstate->frame[frame];
9319        parent_reg = parent->regs;
9320        state_reg = state->regs;
9321        /* We don't need to worry about FP liveness, it's read-only */
9322        for (i = frame < vstate->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9323            err = propagate_liveness_reg(env, &state_reg[i], &parent_reg[i]);
9324            if (err < 0) {
9325                return err;
9326            }
9327            if (err == REG_LIVE_READ64) {
9328                mark_insn_zext(env, &parent_reg[i]);
9329            }
9330        }
9331
9332        /* Propagate stack slots. */
9333        for (i = 0; i < state->allocated_stack / BPF_REG_SIZE && i < parent->allocated_stack / BPF_REG_SIZE; i++) {
9334            parent_reg = &parent->stack[i].spilled_ptr;
9335            state_reg = &state->stack[i].spilled_ptr;
9336            err = propagate_liveness_reg(env, state_reg, parent_reg);
9337            if (err < 0) {
9338                return err;
9339            }
9340        }
9341    }
9342    return 0;
9343}
9344
9345/* find precise scalars in the previous equivalent state and
9346 * propagate them into the current state
9347 */
9348static int propagate_precision(struct bpf_verifier_env *env, const struct bpf_verifier_state *old)
9349{
9350    struct bpf_reg_state *state_reg;
9351    struct bpf_func_state *state;
9352    int i, err = 0;
9353
9354    state = old->frame[old->curframe];
9355    state_reg = state->regs;
9356    for (i = 0; i < BPF_REG_FP; i++, state_reg++) {
9357        if (state_reg->type != SCALAR_VALUE || !state_reg->precise) {
9358            continue;
9359        }
9360        if (env->log.level & BPF_LOG_LEVEL2) {
9361            verbose(env, "propagating r%d\n", i);
9362        }
9363        err = mark_chain_precision(env, i);
9364        if (err < 0) {
9365            return err;
9366        }
9367    }
9368
9369    for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) {
9370        if (state->stack[i].slot_type[0] != STACK_SPILL) {
9371            continue;
9372        }
9373        state_reg = &state->stack[i].spilled_ptr;
9374        if (state_reg->type != SCALAR_VALUE || !state_reg->precise) {
9375            continue;
9376        }
9377        if (env->log.level & BPF_LOG_LEVEL2) {
9378            verbose(env, "propagating fp%d\n", (-i - 1) * BPF_REG_SIZE);
9379        }
9380        err = mark_chain_precision_stack(env, i);
9381        if (err < 0) {
9382            return err;
9383        }
9384    }
9385    return 0;
9386}
9387
9388static bool states_maybe_looping(struct bpf_verifier_state *old, struct bpf_verifier_state *cur)
9389{
9390    struct bpf_func_state *fold, *fcur;
9391    int i, fr = cur->curframe;
9392
9393    if (old->curframe != fr) {
9394        return false;
9395    }
9396
9397    fold = old->frame[fr];
9398    fcur = cur->frame[fr];
9399    for (i = 0; i < MAX_BPF_REG; i++) {
9400        if (memcmp(&fold->regs[i], &fcur->regs[i], offsetof(struct bpf_reg_state, parent))) {
9401            return false;
9402        }
9403    }
9404    return true;
9405}
9406
9407static int is_state_visited(struct bpf_verifier_env *env, int insn_idx)
9408{
9409    struct bpf_verifier_state_list *new_sl;
9410    struct bpf_verifier_state_list *sl, **pprev;
9411    struct bpf_verifier_state *cur = env->cur_state, *new;
9412    int i, j, err, states_cnt = 0;
9413    bool add_new_state = env->test_state_freq ? true : false;
9414
9415    cur->last_insn_idx = env->prev_insn_idx;
9416    if (!env->insn_aux_data[insn_idx].prune_point) {
9417        /* this 'insn_idx' instruction wasn't marked, so we will not
9418         * be doing state search here
9419         */
9420        return 0;
9421    }
9422
9423    /* bpf progs typically have pruning point every 4 instructions
9424     * http://vger.kernel.org/bpfconf2019.html#session-1
9425     * Do not add new state for future pruning if the verifier hasn't seen
9426     * at least 2 jumps and at least 8 instructions.
9427     * This heuristics helps decrease 'total_states' and 'peak_states' metric.
9428     * In tests that amounts to up to 50% reduction into total verifier
9429     * memory consumption and 20% verifier time speedup.
9430     */
9431    if (env->jmps_processed - env->prev_jmps_processed >= 2 && env->insn_processed - env->prev_insn_processed >= 8) {
9432        add_new_state = true;
9433    }
9434
9435    pprev = explored_state(env, insn_idx);
9436    sl = *pprev;
9437
9438    clean_live_states(env, insn_idx, cur);
9439
9440    while (sl) {
9441        states_cnt++;
9442        if (sl->state.insn_idx != insn_idx) {
9443            goto next;
9444        }
9445        if (sl->state.branches) {
9446            if (states_maybe_looping(&sl->state, cur) && states_equal(env, &sl->state, cur)) {
9447                verbose_linfo(env, insn_idx, "; ");
9448                verbose(env, "infinite loop detected at insn %d\n", insn_idx);
9449                return -EINVAL;
9450            }
9451            /* if the verifier is processing a loop, avoid adding new state
9452             * too often, since different loop iterations have distinct
9453             * states and may not help future pruning.
9454             * This threshold shouldn't be too low to make sure that
9455             * a loop with large bound will be rejected quickly.
9456             * The most abusive loop will be:
9457             * r1 += 1
9458             * if r1 < 1000000 goto pc-2
9459             * 1M insn_procssed limit / 100 == 10k peak states.
9460             * This threshold shouldn't be too high either, since states
9461             * at the end of the loop are likely to be useful in pruning.
9462             */
9463            if (env->jmps_processed - env->prev_jmps_processed < 20 &&
9464                env->insn_processed - env->prev_insn_processed < 100) {
9465                add_new_state = false;
9466            }
9467            goto miss;
9468        }
9469        if (states_equal(env, &sl->state, cur)) {
9470            sl->hit_cnt++;
9471            /* reached equivalent register/stack state,
9472             * prune the search.
9473             * Registers read by the continuation are read by us.
9474             * If we have any write marks in env->cur_state, they
9475             * will prevent corresponding reads in the continuation
9476             * from reaching our parent (an explored_state).  Our
9477             * own state will get the read marks recorded, but
9478             * they'll be immediately forgotten as we're pruning
9479             * this state and will pop a new one.
9480             */
9481            err = propagate_liveness(env, &sl->state, cur);
9482
9483            /* if previous state reached the exit with precision and
9484             * current state is equivalent to it (except precsion marks)
9485             * the precision needs to be propagated back in
9486             * the current state.
9487             */
9488            err = err ?: push_jmp_history(env, cur);
9489            err = err ?: propagate_precision(env, &sl->state);
9490            if (err) {
9491                return err;
9492            }
9493            return 1;
9494        }
9495    miss:
9496        /* when new state is not going to be added do not increase miss count.
9497         * Otherwise several loop iterations will remove the state
9498         * recorded earlier. The goal of these heuristics is to have
9499         * states from some iterations of the loop (some in the beginning
9500         * and some at the end) to help pruning.
9501         */
9502        if (add_new_state) {
9503            sl->miss_cnt++;
9504        }
9505        /* heuristic to determine whether this state is beneficial
9506         * to keep checking from state equivalence point of view.
9507         * Higher numbers increase max_states_per_insn and verification time,
9508         * but do not meaningfully decrease insn_processed.
9509         */
9510        if (sl->miss_cnt > sl->hit_cnt * 3 + 3) {
9511            /* the state is unlikely to be useful. Remove it to
9512             * speed up verification
9513             */
9514            *pprev = sl->next;
9515            if (sl->state.frame[0]->regs[0].live & REG_LIVE_DONE) {
9516                u32 br = sl->state.branches;
9517
9518                WARN_ONCE(br, "BUG live_done but branches_to_explore %d\n", br);
9519                free_verifier_state(&sl->state, false);
9520                kfree(sl);
9521                env->peak_states--;
9522            } else {
9523                /* cannot free this state, since parentage chain may
9524                 * walk it later. Add it for free_list instead to
9525                 * be freed at the end of verification
9526                 */
9527                sl->next = env->free_list;
9528                env->free_list = sl;
9529            }
9530            sl = *pprev;
9531            continue;
9532        }
9533    next:
9534        pprev = &sl->next;
9535        sl = *pprev;
9536    }
9537
9538    if (env->max_states_per_insn < states_cnt) {
9539        env->max_states_per_insn = states_cnt;
9540    }
9541
9542    if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES) {
9543        return push_jmp_history(env, cur);
9544    }
9545
9546    if (!add_new_state) {
9547        return push_jmp_history(env, cur);
9548    }
9549
9550    /* There were no equivalent states, remember the current one.
9551     * Technically the current state is not proven to be safe yet,
9552     * but it will either reach outer most bpf_exit (which means it's safe)
9553     * or it will be rejected. When there are no loops the verifier won't be
9554     * seeing this tuple (frame[0].callsite, frame[1].callsite, .. insn_idx)
9555     * again on the way to bpf_exit.
9556     * When looping the sl->state.branches will be > 0 and this state
9557     * will not be considered for equivalence until branches == 0.
9558     */
9559    new_sl = kzalloc(sizeof(struct bpf_verifier_state_list), GFP_KERNEL);
9560    if (!new_sl) {
9561        return -ENOMEM;
9562    }
9563    env->total_states++;
9564    env->peak_states++;
9565    env->prev_jmps_processed = env->jmps_processed;
9566    env->prev_insn_processed = env->insn_processed;
9567
9568    /* add new state to the head of linked list */
9569    new = &new_sl->state;
9570    err = copy_verifier_state(new, cur);
9571    if (err) {
9572        free_verifier_state(new, false);
9573        kfree(new_sl);
9574        return err;
9575    }
9576    new->insn_idx = insn_idx;
9577    WARN_ONCE(new->branches != 1, "BUG is_state_visited:branches_to_explore=%d insn %d\n", new->branches, insn_idx);
9578
9579    cur->parent = new;
9580    cur->first_insn_idx = insn_idx;
9581    clear_jmp_history(cur);
9582    new_sl->next = *explored_state(env, insn_idx);
9583    *explored_state(env, insn_idx) = new_sl;
9584    /* connect new state to parentage chain. Current frame needs all
9585     * registers connected. Only r6 - r9 of the callers are alive (pushed
9586     * to the stack implicitly by JITs) so in callers' frames connect just
9587     * r6 - r9 as an optimization. Callers will have r1 - r5 connected to
9588     * the state of the call instruction (with WRITTEN set), and r0 comes
9589     * from callee with its full parentage chain, anyway.
9590     */
9591    /* clear write marks in current state: the writes we did are not writes
9592     * our child did, so they don't screen off its reads from us.
9593     * (There are no read marks in current state, because reads always mark
9594     * their parent and current state never has children yet.  Only
9595     * explored_states can get read marks.)
9596     */
9597    for (j = 0; j <= cur->curframe; j++) {
9598        for (i = j < cur->curframe ? BPF_REG_6 : 0; i < BPF_REG_FP; i++) {
9599            cur->frame[j]->regs[i].parent = &new->frame[j]->regs[i];
9600        }
9601        for (i = 0; i < BPF_REG_FP; i++) {
9602            cur->frame[j]->regs[i].live = REG_LIVE_NONE;
9603        }
9604    }
9605
9606    /* all stack frames are accessible from callee, clear them all */
9607    for (j = 0; j <= cur->curframe; j++) {
9608        struct bpf_func_state *frame = cur->frame[j];
9609        struct bpf_func_state *newframe = new->frame[j];
9610
9611        for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) {
9612            frame->stack[i].spilled_ptr.live = REG_LIVE_NONE;
9613            frame->stack[i].spilled_ptr.parent = &newframe->stack[i].spilled_ptr;
9614        }
9615    }
9616    return 0;
9617}
9618
9619/* Return true if it's OK to have the same insn return a different type. */
9620static bool reg_type_mismatch_ok(enum bpf_reg_type type)
9621{
9622    switch (base_type(type)) {
9623        case PTR_TO_CTX:
9624        case PTR_TO_SOCKET:
9625        case PTR_TO_SOCK_COMMON:
9626        case PTR_TO_TCP_SOCK:
9627        case PTR_TO_XDP_SOCK:
9628        case PTR_TO_BTF_ID:
9629            return false;
9630        default:
9631            return true;
9632    }
9633}
9634
9635/* If an instruction was previously used with particular pointer types, then we
9636 * need to be careful to avoid cases such as the below, where it may be ok
9637 * for one branch accessing the pointer, but not ok for the other branch:
9638 *
9639 * R1 = sock_ptr
9640 * goto X;
9641 * ...
9642 * R1 = some_other_valid_ptr;
9643 * goto X;
9644 * ...
9645 * R2 = *(u32 *)(R1 + 0);
9646 */
9647static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
9648{
9649    return src != prev && (!reg_type_mismatch_ok(src) || !reg_type_mismatch_ok(prev));
9650}
9651
9652static int do_check(struct bpf_verifier_env *env)
9653{
9654    bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
9655    struct bpf_verifier_state *state = env->cur_state;
9656    struct bpf_insn *insns = env->prog->insnsi;
9657    struct bpf_reg_state *regs;
9658    int insn_cnt = env->prog->len;
9659    bool do_print_state = false;
9660    int prev_insn_idx = -1;
9661
9662    for (;;) {
9663        struct bpf_insn *insn;
9664        u8 class;
9665        int err;
9666
9667        env->prev_insn_idx = prev_insn_idx;
9668        if (env->insn_idx >= insn_cnt) {
9669            verbose(env, "invalid insn idx %d insn_cnt %d\n", env->insn_idx, insn_cnt);
9670            return -EFAULT;
9671        }
9672
9673        insn = &insns[env->insn_idx];
9674        class = BPF_CLASS(insn->code);
9675
9676        if (++env->insn_processed > BPF_COMPLEXITY_LIMIT_INSNS) {
9677            verbose(env, "BPF program is too large. Processed %d insn\n", env->insn_processed);
9678            return -E2BIG;
9679        }
9680
9681        err = is_state_visited(env, env->insn_idx);
9682        if (err < 0) {
9683            return err;
9684        }
9685        if (err == 1) {
9686            /* found equivalent state, can prune the search */
9687            if (env->log.level & BPF_LOG_LEVEL) {
9688                if (do_print_state) {
9689                    verbose(env, "\nfrom %d to %d%s: safe\n", env->prev_insn_idx, env->insn_idx,
9690                            env->cur_state->speculative ? " (speculative execution)" : "");
9691                } else {
9692                    verbose(env, "%d: safe\n", env->insn_idx);
9693                }
9694            }
9695            goto process_bpf_exit;
9696        }
9697
9698        if (signal_pending(current)) {
9699            return -EAGAIN;
9700        }
9701
9702        if (need_resched()) {
9703            cond_resched();
9704        }
9705
9706        if ((env->log.level & BPF_LOG_LEVEL2) || ((env->log.level & BPF_LOG_LEVEL) && do_print_state)) {
9707            if (env->log.level & BPF_LOG_LEVEL2) {
9708                verbose(env, "%d:", env->insn_idx);
9709            } else {
9710                verbose(env, "\nfrom %d to %d%s:", env->prev_insn_idx, env->insn_idx,
9711                        env->cur_state->speculative ? " (speculative execution)" : "");
9712            }
9713            print_verifier_state(env, state->frame[state->curframe]);
9714            do_print_state = false;
9715        }
9716
9717        if (env->log.level & BPF_LOG_LEVEL) {
9718            const struct bpf_insn_cbs cbs = {
9719                .cb_print = verbose,
9720                .private_data = env,
9721            };
9722
9723            verbose_linfo(env, env->insn_idx, "; ");
9724            verbose(env, "%d: ", env->insn_idx);
9725            print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
9726        }
9727
9728        if (bpf_prog_is_dev_bound(env->prog->aux)) {
9729            err = bpf_prog_offload_verify_insn(env, env->insn_idx, env->prev_insn_idx);
9730            if (err) {
9731                return err;
9732            }
9733        }
9734
9735        regs = cur_regs(env);
9736        sanitize_mark_insn_seen(env);
9737        prev_insn_idx = env->insn_idx;
9738
9739        if (class == BPF_ALU || class == BPF_ALU64) {
9740            err = check_alu_op(env, insn);
9741            if (err) {
9742                return err;
9743            }
9744        } else if (class == BPF_LDX) {
9745            enum bpf_reg_type *prev_src_type, src_reg_type;
9746
9747            /* check for reserved fields is already done */
9748
9749            /* check src operand */
9750            err = check_reg_arg(env, insn->src_reg, SRC_OP);
9751            if (err) {
9752                return err;
9753            }
9754
9755            err = check_reg_arg(env, insn->dst_reg, DST_OP_NO_MARK);
9756            if (err) {
9757                return err;
9758            }
9759
9760            src_reg_type = regs[insn->src_reg].type;
9761
9762            /* check that memory (src_reg + off) is readable,
9763             * the state of dst_reg will be updated by this func
9764             */
9765            err = check_mem_access(env, env->insn_idx, insn->src_reg, insn->off, BPF_SIZE(insn->code), BPF_READ,
9766                                   insn->dst_reg, false);
9767            if (err) {
9768                return err;
9769            }
9770
9771            prev_src_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9772
9773            if (*prev_src_type == NOT_INIT) {
9774                /* saw a valid insn
9775                 * dst_reg = *(u32 *)(src_reg + off)
9776                 * save type to validate intersecting paths
9777                 */
9778                *prev_src_type = src_reg_type;
9779            } else if (reg_type_mismatch(src_reg_type, *prev_src_type)) {
9780                /* ABuser program is trying to use the same insn
9781                 * dst_reg = *(u32*) (src_reg + off)
9782                 * with different pointer types:
9783                 * src_reg == ctx in one branch and
9784                 * src_reg == stack|map in some other branch.
9785                 * Reject it.
9786                 */
9787                verbose(env, "same insn cannot be used with different pointers\n");
9788                return -EINVAL;
9789            }
9790        } else if (class == BPF_STX) {
9791            enum bpf_reg_type *prev_dst_type, dst_reg_type;
9792            if (BPF_MODE(insn->code) == BPF_XADD) {
9793                err = check_xadd(env, env->insn_idx, insn);
9794                if (err) {
9795                    return err;
9796                }
9797                env->insn_idx++;
9798                continue;
9799            }
9800
9801            /* check src1 operand */
9802            err = check_reg_arg(env, insn->src_reg, SRC_OP);
9803            if (err) {
9804                return err;
9805            }
9806            /* check src2 operand */
9807            err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9808            if (err) {
9809                return err;
9810            }
9811
9812            dst_reg_type = regs[insn->dst_reg].type;
9813
9814            /* check that memory (dst_reg + off) is writeable */
9815            err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE,
9816                                   insn->src_reg, false);
9817            if (err) {
9818                return err;
9819            }
9820
9821            prev_dst_type = &env->insn_aux_data[env->insn_idx].ptr_type;
9822
9823            if (*prev_dst_type == NOT_INIT) {
9824                *prev_dst_type = dst_reg_type;
9825            } else if (reg_type_mismatch(dst_reg_type, *prev_dst_type)) {
9826                verbose(env, "same insn cannot be used with different pointers\n");
9827                return -EINVAL;
9828            }
9829        } else if (class == BPF_ST) {
9830            if (BPF_MODE(insn->code) != BPF_MEM || insn->src_reg != BPF_REG_0) {
9831                verbose(env, "BPF_ST uses reserved fields\n");
9832                return -EINVAL;
9833            }
9834            /* check src operand */
9835            err = check_reg_arg(env, insn->dst_reg, SRC_OP);
9836            if (err) {
9837                return err;
9838            }
9839            if (is_ctx_reg(env, insn->dst_reg)) {
9840                verbose(env, "BPF_ST stores into R%d %s is not allowed\n", insn->dst_reg,
9841                        reg_type_str(env, reg_state(env, insn->dst_reg)->type));
9842                return -EACCES;
9843            }
9844
9845            /* check that memory (dst_reg + off) is writeable */
9846            err = check_mem_access(env, env->insn_idx, insn->dst_reg, insn->off, BPF_SIZE(insn->code), BPF_WRITE, -1,
9847                                   false);
9848            if (err) {
9849                return err;
9850            }
9851        } else if (class == BPF_JMP || class == BPF_JMP32) {
9852            u8 opcode = BPF_OP(insn->code);
9853            env->jmps_processed++;
9854            if (opcode == BPF_CALL) {
9855                if (BPF_SRC(insn->code) != BPF_K || insn->off != 0 ||
9856                    (insn->src_reg != BPF_REG_0 && insn->src_reg != BPF_PSEUDO_CALL) || insn->dst_reg != BPF_REG_0 ||
9857                    class == BPF_JMP32) {
9858                    verbose(env, "BPF_CALL uses reserved fields\n");
9859                    return -EINVAL;
9860                }
9861
9862                if (env->cur_state->active_spin_lock &&
9863                    (insn->src_reg == BPF_PSEUDO_CALL || insn->imm != BPF_FUNC_spin_unlock)) {
9864                    verbose(env, "function calls are not allowed while holding a lock\n");
9865                    return -EINVAL;
9866                }
9867                if (insn->src_reg == BPF_PSEUDO_CALL) {
9868                    err = check_func_call(env, insn, &env->insn_idx);
9869                } else {
9870                    err = check_helper_call(env, insn->imm, env->insn_idx);
9871                }
9872                if (err) {
9873                    return err;
9874                }
9875            } else if (opcode == BPF_JA) {
9876                if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 ||
9877                    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
9878                    verbose(env, "BPF_JA uses reserved fields\n");
9879                    return -EINVAL;
9880                }
9881                env->insn_idx += insn->off + 1;
9882                continue;
9883            } else if (opcode == BPF_EXIT) {
9884                if (BPF_SRC(insn->code) != BPF_K || insn->imm != 0 || insn->src_reg != BPF_REG_0 ||
9885                    insn->dst_reg != BPF_REG_0 || class == BPF_JMP32) {
9886                    verbose(env, "BPF_EXIT uses reserved fields\n");
9887                    return -EINVAL;
9888                }
9889                if (env->cur_state->active_spin_lock) {
9890                    verbose(env, "bpf_spin_unlock is missing\n");
9891                    return -EINVAL;
9892                }
9893                if (state->curframe) {
9894                    /* exit from nested function */
9895                    err = prepare_func_exit(env, &env->insn_idx);
9896                    if (err) {
9897                        return err;
9898                    }
9899                    do_print_state = true;
9900                    continue;
9901                }
9902
9903                err = check_reference_leak(env);
9904                if (err) {
9905                    return err;
9906                }
9907
9908                err = check_return_code(env);
9909                if (err) {
9910                    return err;
9911                }
9912            process_bpf_exit:
9913                update_branch_counts(env, env->cur_state);
9914                err = pop_stack(env, &prev_insn_idx, &env->insn_idx, pop_log);
9915                if (err < 0) {
9916                    if (err != -ENOENT) {
9917                        return err;
9918                    }
9919                    break;
9920                } else {
9921                    do_print_state = true;
9922                    continue;
9923                }
9924            } else {
9925                err = check_cond_jmp_op(env, insn, &env->insn_idx);
9926                if (err) {
9927                    return err;
9928                }
9929            }
9930        } else if (class == BPF_LD) {
9931            u8 mode = BPF_MODE(insn->code);
9932            if (mode == BPF_ABS || mode == BPF_IND) {
9933                err = check_ld_abs(env, insn);
9934                if (err) {
9935                    return err;
9936                }
9937            } else if (mode == BPF_IMM) {
9938                err = check_ld_imm(env, insn);
9939                if (err) {
9940                    return err;
9941                }
9942                env->insn_idx++;
9943                sanitize_mark_insn_seen(env);
9944            } else {
9945                verbose(env, "invalid BPF_LD mode\n");
9946                return -EINVAL;
9947            }
9948        } else {
9949            verbose(env, "unknown insn class %d\n", class);
9950            return -EINVAL;
9951        }
9952        env->insn_idx++;
9953    }
9954
9955    return 0;
9956}
9957
9958/* replace pseudo btf_id with kernel symbol address */
9959static int check_pseudo_btf_id(struct bpf_verifier_env *env, struct bpf_insn *insn, struct bpf_insn_aux_data *aux)
9960{
9961    const struct btf_var_secinfo *vsi;
9962    const struct btf_type *datasec;
9963    const struct btf_type *t;
9964    const char *sym_name;
9965    bool percpu = false;
9966    u32 type, id = insn->imm;
9967    s32 datasec_id;
9968    u64 addr;
9969    int i;
9970
9971    if (!btf_vmlinux) {
9972        verbose(env, "kernel is missing BTF, make sure CONFIG_DEBUG_INFO_BTF=y is specified in Kconfig.\n");
9973        return -EINVAL;
9974    }
9975
9976    if (insn[1].imm != 0) {
9977        verbose(env, "reserved field (insn[1].imm) is used in pseudo_btf_id ldimm64 insn.\n");
9978        return -EINVAL;
9979    }
9980
9981    t = btf_type_by_id(btf_vmlinux, id);
9982    if (!t) {
9983        verbose(env, "ldimm64 insn specifies invalid btf_id %d.\n", id);
9984        return -ENOENT;
9985    }
9986
9987    if (!btf_type_is_var(t)) {
9988        verbose(env, "pseudo btf_id %d in ldimm64 isn't KIND_VAR.\n", id);
9989        return -EINVAL;
9990    }
9991
9992    sym_name = btf_name_by_offset(btf_vmlinux, t->name_off);
9993    addr = kallsyms_lookup_name(sym_name);
9994    if (!addr) {
9995        verbose(env, "ldimm64 failed to find the address for kernel symbol '%s'.\n", sym_name);
9996        return -ENOENT;
9997    }
9998
9999    datasec_id = btf_find_by_name_kind(btf_vmlinux, ".data..percpu", BTF_KIND_DATASEC);
10000    if (datasec_id > 0) {
10001        datasec = btf_type_by_id(btf_vmlinux, datasec_id);
10002        for_each_vsi(i, datasec, vsi)
10003        {
10004            if (vsi->type == id) {
10005                percpu = true;
10006                break;
10007            }
10008        }
10009    }
10010
10011    insn[0].imm = (u32)addr;
10012    insn[1].imm = addr >> VERIFIER_THIRTYTWO;
10013
10014    type = t->type;
10015    t = btf_type_skip_modifiers(btf_vmlinux, type, NULL);
10016    if (percpu) {
10017        aux->btf_var.reg_type = PTR_TO_PERCPU_BTF_ID;
10018        aux->btf_var.btf_id = type;
10019    } else if (!btf_type_is_struct(t)) {
10020        const struct btf_type *ret;
10021        const char *tname;
10022        u32 tsize;
10023
10024        /* resolve the type size of ksym. */
10025        ret = btf_resolve_size(btf_vmlinux, t, &tsize);
10026        if (IS_ERR(ret)) {
10027            tname = btf_name_by_offset(btf_vmlinux, t->name_off);
10028            verbose(env, "ldimm64 unable to resolve the size of type '%s': %ld\n", tname, PTR_ERR(ret));
10029            return -EINVAL;
10030        }
10031        aux->btf_var.reg_type = PTR_TO_MEM | MEM_RDONLY;
10032        aux->btf_var.mem_size = tsize;
10033    } else {
10034        aux->btf_var.reg_type = PTR_TO_BTF_ID;
10035        aux->btf_var.btf_id = type;
10036    }
10037    return 0;
10038}
10039
10040static int check_map_prealloc(struct bpf_map *map)
10041{
10042    return (map->map_type != BPF_MAP_TYPE_HASH && map->map_type != BPF_MAP_TYPE_PERCPU_HASH &&
10043            map->map_type != BPF_MAP_TYPE_HASH_OF_MAPS) ||
10044           !(map->map_flags & BPF_F_NO_PREALLOC);
10045}
10046
10047static bool is_tracing_prog_type(enum bpf_prog_type type)
10048{
10049    switch (type) {
10050        case BPF_PROG_TYPE_KPROBE:
10051        case BPF_PROG_TYPE_TRACEPOINT:
10052        case BPF_PROG_TYPE_PERF_EVENT:
10053        case BPF_PROG_TYPE_RAW_TRACEPOINT:
10054            return true;
10055        default:
10056            return false;
10057    }
10058}
10059
10060static bool is_preallocated_map(struct bpf_map *map)
10061{
10062    if (!check_map_prealloc(map)) {
10063        return false;
10064    }
10065    if (map->inner_map_meta && !check_map_prealloc(map->inner_map_meta)) {
10066        return false;
10067    }
10068    return true;
10069}
10070
10071static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_map *map, struct bpf_prog *prog)
10072
10073{
10074    enum bpf_prog_type prog_type = resolve_prog_type(prog);
10075    /*
10076     * Validate that trace type programs use preallocated hash maps.
10077     *
10078     * For programs attached to PERF events this is mandatory as the
10079     * perf NMI can hit any arbitrary code sequence.
10080     *
10081     * All other trace types using preallocated hash maps are unsafe as
10082     * well because tracepoint or kprobes can be inside locked regions
10083     * of the memory allocator or at a place where a recursion into the
10084     * memory allocator would see inconsistent state.
10085     *
10086     * On RT enabled kernels run-time allocation of all trace type
10087     * programs is strictly prohibited due to lock type constraints. On
10088     * !RT kernels it is allowed for backwards compatibility reasons for
10089     * now, but warnings are emitted so developers are made aware of
10090     * the unsafety and can fix their programs before this is enforced.
10091     */
10092    if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
10093        if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
10094            verbose(env, "perf_event programs can only use preallocated hash map\n");
10095            return -EINVAL;
10096        }
10097        if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
10098            verbose(env, "trace type programs can only use preallocated hash map\n");
10099            return -EINVAL;
10100        }
10101        WARN_ONCE(1, "trace type BPF program uses run-time allocation\n");
10102        verbose(
10103            env,
10104            "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
10105    }
10106
10107    if ((is_tracing_prog_type(prog_type) || prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && map_value_has_spin_lock(map)) {
10108        verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
10109        return -EINVAL;
10110    }
10111
10112    if ((bpf_prog_is_dev_bound(prog->aux) || bpf_map_is_dev_bound(map)) && !bpf_offload_prog_map_match(prog, map)) {
10113        verbose(env, "offload device mismatch between prog and map\n");
10114        return -EINVAL;
10115    }
10116
10117    if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS) {
10118        verbose(env, "bpf_struct_ops map cannot be used in prog\n");
10119        return -EINVAL;
10120    }
10121
10122    if (prog->aux->sleepable) {
10123        switch (map->map_type) {
10124            case BPF_MAP_TYPE_HASH:
10125            case BPF_MAP_TYPE_LRU_HASH:
10126            case BPF_MAP_TYPE_ARRAY:
10127                if (!is_preallocated_map(map)) {
10128                    verbose(env, "Sleepable programs can only use preallocated hash maps\n");
10129                    return -EINVAL;
10130                }
10131                break;
10132            default:
10133                verbose(env, "Sleepable programs can only use array and hash maps\n");
10134                return -EINVAL;
10135        }
10136    }
10137
10138    return 0;
10139}
10140
10141static bool bpf_map_is_cgroup_storage(struct bpf_map *map)
10142{
10143    return (map->map_type == BPF_MAP_TYPE_CGROUP_STORAGE || map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
10144}
10145
10146/* find and rewrite pseudo imm in ld_imm64 instructions:
10147 *
10148 * 1. if it accesses map FD, replace it with actual map pointer.
10149 * 2. if it accesses btf_id of a VAR, replace it with pointer to the var.
10150 *
10151 * NOTE: btf_vmlinux is required for converting pseudo btf_id.
10152 */
10153static int resolve_pseudo_ldimm64(struct bpf_verifier_env *env)
10154{
10155    struct bpf_insn *insn = env->prog->insnsi;
10156    int insn_cnt = env->prog->len;
10157    int i, j, err;
10158
10159    err = bpf_prog_calc_tag(env->prog);
10160    if (err) {
10161        return err;
10162    }
10163
10164    for (i = 0; i < insn_cnt; i++, insn++) {
10165        if (BPF_CLASS(insn->code) == BPF_LDX && (BPF_MODE(insn->code) != BPF_MEM || insn->imm != 0)) {
10166            verbose(env, "BPF_LDX uses reserved fields\n");
10167            return -EINVAL;
10168        }
10169
10170        if (BPF_CLASS(insn->code) == BPF_STX &&
10171            ((BPF_MODE(insn->code) != BPF_MEM && BPF_MODE(insn->code) != BPF_XADD) || insn->imm != 0)) {
10172            verbose(env, "BPF_STX uses reserved fields\n");
10173            return -EINVAL;
10174        }
10175
10176        if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
10177            struct bpf_insn_aux_data *aux;
10178            struct bpf_map *map;
10179            struct fd f;
10180            u64 addr;
10181
10182            if (i == insn_cnt - 1 || insn[1].code != 0 || insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
10183                insn[1].off != 0) {
10184                verbose(env, "invalid bpf_ld_imm64 insn\n");
10185                return -EINVAL;
10186            }
10187
10188            if (insn[0].src_reg == 0) {
10189                /* valid generic load 64-bit imm */
10190                goto next_insn;
10191            }
10192
10193            if (insn[0].src_reg == BPF_PSEUDO_BTF_ID) {
10194                aux = &env->insn_aux_data[i];
10195                err = check_pseudo_btf_id(env, insn, aux);
10196                if (err) {
10197                    return err;
10198                }
10199                goto next_insn;
10200            }
10201
10202            /* In final convert_pseudo_ld_imm64() step, this is
10203             * converted into regular 64-bit imm load insn.
10204             */
10205            if ((insn[0].src_reg != BPF_PSEUDO_MAP_FD && insn[0].src_reg != BPF_PSEUDO_MAP_VALUE) ||
10206                (insn[0].src_reg == BPF_PSEUDO_MAP_FD && insn[1].imm != 0)) {
10207                verbose(env, "unrecognized bpf_ld_imm64 insn\n");
10208                return -EINVAL;
10209            }
10210
10211            f = fdget(insn[0].imm);
10212            map = __bpf_map_get(f);
10213            if (IS_ERR(map)) {
10214                verbose(env, "fd %d is not pointing to valid bpf_map\n", insn[0].imm);
10215                return PTR_ERR(map);
10216            }
10217
10218            err = check_map_prog_compatibility(env, map, env->prog);
10219            if (err) {
10220                fdput(f);
10221                return err;
10222            }
10223
10224            aux = &env->insn_aux_data[i];
10225            if (insn->src_reg == BPF_PSEUDO_MAP_FD) {
10226                addr = (unsigned long)map;
10227            } else {
10228                u32 off = insn[1].imm;
10229
10230                if (off >= BPF_MAX_VAR_OFF) {
10231                    verbose(env, "direct value offset of %u is not allowed\n", off);
10232                    fdput(f);
10233                    return -EINVAL;
10234                }
10235
10236                if (!map->ops->map_direct_value_addr) {
10237                    verbose(env, "no direct value access support for this map type\n");
10238                    fdput(f);
10239                    return -EINVAL;
10240                }
10241
10242                err = map->ops->map_direct_value_addr(map, &addr, off);
10243                if (err) {
10244                    verbose(env, "invalid access to map value pointer, value_size=%u off=%u\n", map->value_size, off);
10245                    fdput(f);
10246                    return err;
10247                }
10248
10249                aux->map_off = off;
10250                addr += off;
10251            }
10252
10253            insn[0].imm = (u32)addr;
10254            insn[1].imm = addr >> VERIFIER_THIRTYTWO;
10255
10256            /* check whether we recorded this map already */
10257            for (j = 0; j < env->used_map_cnt; j++) {
10258                if (env->used_maps[j] == map) {
10259                    aux->map_index = j;
10260                    fdput(f);
10261                    goto next_insn;
10262                }
10263            }
10264
10265            if (env->used_map_cnt >= MAX_USED_MAPS) {
10266                fdput(f);
10267                return -E2BIG;
10268            }
10269
10270            /* hold the map. If the program is rejected by verifier,
10271             * the map will be released by release_maps() or it
10272             * will be used by the valid program until it's unloaded
10273             * and all maps are released in free_used_maps()
10274             */
10275            bpf_map_inc(map);
10276
10277            aux->map_index = env->used_map_cnt;
10278            env->used_maps[env->used_map_cnt++] = map;
10279
10280            if (bpf_map_is_cgroup_storage(map) && bpf_cgroup_storage_assign(env->prog->aux, map)) {
10281                verbose(env, "only one cgroup storage of each type is allowed\n");
10282                fdput(f);
10283                return -EBUSY;
10284            }
10285
10286            fdput(f);
10287        next_insn:
10288            insn++;
10289            i++;
10290            continue;
10291        }
10292
10293        /* Basic sanity check before we invest more work here. */
10294        if (!bpf_opcode_in_insntable(insn->code)) {
10295            verbose(env, "unknown opcode %02x\n", insn->code);
10296            return -EINVAL;
10297        }
10298    }
10299
10300    /* now all pseudo BPF_LD_IMM64 instructions load valid
10301     * 'struct bpf_map *' into a register instead of user map_fd.
10302     * These pointers will be used later by verifier to validate map access.
10303     */
10304    return 0;
10305}
10306
10307/* drop refcnt of maps used by the rejected program */
10308static void release_maps(struct bpf_verifier_env *env)
10309{
10310    __bpf_free_used_maps(env->prog->aux, env->used_maps, env->used_map_cnt);
10311}
10312
10313/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
10314static void convert_pseudo_ld_imm64(struct bpf_verifier_env *env)
10315{
10316    struct bpf_insn *insn = env->prog->insnsi;
10317    int insn_cnt = env->prog->len;
10318    int i;
10319
10320    for (i = 0; i < insn_cnt; i++, insn++) {
10321        if (insn->code == (BPF_LD | BPF_IMM | BPF_DW)) {
10322            insn->src_reg = 0;
10323        }
10324    }
10325}
10326
10327/* single env->prog->insni[off] instruction was replaced with the range
10328 * insni[off, off + cnt).  Adjust corresponding insn_aux_data by copying
10329 * [0, off) and [off, end) to new locations, so the patched range stays zero
10330 */
10331static void adjust_insn_aux_data(struct bpf_verifier_env *env, struct bpf_insn_aux_data *new_data,
10332                                 struct bpf_prog *new_prog, u32 off, u32 cnt)
10333{
10334    struct bpf_insn_aux_data *old_data = env->insn_aux_data;
10335    struct bpf_insn *insn = new_prog->insnsi;
10336    u32 old_seen = old_data[off].seen;
10337    u32 prog_len;
10338    int i;
10339
10340    /* aux info at OFF always needs adjustment, no matter fast path
10341     * (cnt == 1) is taken or not. There is no guarantee INSN at OFF is the
10342     * original insn at old prog.
10343     */
10344    old_data[off].zext_dst = insn_has_def32(env, insn + off + cnt - 1);
10345
10346    if (cnt == 1) {
10347        return;
10348    }
10349    prog_len = new_prog->len;
10350
10351    memcpy(new_data, old_data, sizeof(struct bpf_insn_aux_data) * off);
10352    memcpy(new_data + off + cnt - 1, old_data + off, sizeof(struct bpf_insn_aux_data) * (prog_len - off - cnt + 1));
10353    for (i = off; i < off + cnt - 1; i++) {
10354        /* Expand insni[off]'s seen count to the patched range. */
10355        new_data[i].seen = old_seen;
10356        new_data[i].zext_dst = insn_has_def32(env, insn + i);
10357    }
10358    env->insn_aux_data = new_data;
10359    vfree(old_data);
10360}
10361
10362static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len)
10363{
10364    int i;
10365
10366    if (len == 1) {
10367        return;
10368    }
10369    /* NOTE: fake 'exit' subprog should be updated as well. */
10370    for (i = 0; i <= env->subprog_cnt; i++) {
10371        if (env->subprog_info[i].start <= off) {
10372            continue;
10373        }
10374        env->subprog_info[i].start += len - 1;
10375    }
10376}
10377
10378static void adjust_poke_descs(struct bpf_prog *prog, u32 off, u32 len)
10379{
10380    struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
10381    int i, sz = prog->aux->size_poke_tab;
10382    struct bpf_jit_poke_descriptor *desc;
10383
10384    for (i = 0; i < sz; i++) {
10385        desc = &tab[i];
10386        if (desc->insn_idx <= off) {
10387            continue;
10388        }
10389        desc->insn_idx += len - 1;
10390    }
10391}
10392
10393static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch,
10394                                            u32 len)
10395{
10396    struct bpf_prog *new_prog;
10397    struct bpf_insn_aux_data *new_data = NULL;
10398
10399    if (len > 1) {
10400        new_data = vzalloc(array_size(env->prog->len + len - 1, sizeof(struct bpf_insn_aux_data)));
10401        if (!new_data) {
10402            return NULL;
10403        }
10404    }
10405
10406    new_prog = bpf_patch_insn_single(env->prog, off, patch, len);
10407    if (IS_ERR(new_prog)) {
10408        if (PTR_ERR(new_prog) == -ERANGE) {
10409            verbose(env, "insn %d cannot be patched due to 16-bit range\n", env->insn_aux_data[off].orig_idx);
10410        }
10411        vfree(new_data);
10412        return NULL;
10413    }
10414    adjust_insn_aux_data(env, new_data, new_prog, off, len);
10415    adjust_subprog_starts(env, off, len);
10416    adjust_poke_descs(new_prog, off, len);
10417    return new_prog;
10418}
10419
10420static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)
10421{
10422    int i, j;
10423
10424    /* find first prog starting at or after off (first to remove) */
10425    for (i = 0; i < env->subprog_cnt; i++) {
10426        if (env->subprog_info[i].start >= off) {
10427            break;
10428        }
10429    }
10430    /* find first prog starting at or after off + cnt (first to stay) */
10431    for (j = i; j < env->subprog_cnt; j++) {
10432        if (env->subprog_info[j].start >= off + cnt) {
10433            break;
10434        }
10435    }
10436    /* if j doesn't start exactly at off + cnt, we are just removing
10437     * the front of previous prog
10438     */
10439    if (env->subprog_info[j].start != off + cnt) {
10440        j--;
10441    }
10442
10443    if (j > i) {
10444        struct bpf_prog_aux *aux = env->prog->aux;
10445        int move;
10446
10447        /* move fake 'exit' subprog as well */
10448        move = env->subprog_cnt + 1 - j;
10449
10450        memmove(env->subprog_info + i, env->subprog_info + j, sizeof(*env->subprog_info) * move);
10451        env->subprog_cnt -= j - i;
10452
10453        /* remove func_info */
10454        if (aux->func_info) {
10455            move = aux->func_info_cnt - j;
10456
10457            memmove(aux->func_info + i, aux->func_info + j, sizeof(*aux->func_info) * move);
10458            aux->func_info_cnt -= j - i;
10459            /* func_info->insn_off is set after all code rewrites,
10460             * in adjust_btf_func() - no need to adjust
10461             */
10462        }
10463    } else {
10464        /* convert i from "first prog to remove" to "first to adjust" */
10465        if (env->subprog_info[i].start == off) {
10466            i++;
10467        }
10468    }
10469
10470    /* update fake 'exit' subprog as well */
10471    for (; i <= env->subprog_cnt; i++) {
10472        env->subprog_info[i].start -= cnt;
10473    }
10474
10475    return 0;
10476}
10477
10478static int bpf_adj_linfo_after_remove(struct bpf_verifier_env *env, u32 off, u32 cnt)
10479{
10480    struct bpf_prog *prog = env->prog;
10481    u32 i, l_off, l_cnt, nr_linfo;
10482    struct bpf_line_info *linfo;
10483
10484    nr_linfo = prog->aux->nr_linfo;
10485    if (!nr_linfo) {
10486        return 0;
10487    }
10488
10489    linfo = prog->aux->linfo;
10490
10491    /* find first line info to remove, count lines to be removed */
10492    for (i = 0; i < nr_linfo; i++) {
10493        if (linfo[i].insn_off >= off) {
10494            break;
10495        }
10496    }
10497
10498    l_off = i;
10499    l_cnt = 0;
10500    for (; i < nr_linfo; i++) {
10501        if (linfo[i].insn_off < off + cnt) {
10502            l_cnt++;
10503        } else {
10504            break;
10505        }
10506    }
10507
10508    /* First live insn doesn't match first live linfo, it needs to "inherit"
10509     * last removed linfo.  prog is already modified, so prog->len == off
10510     * means no live instructions after (tail of the program was removed).
10511     */
10512    if (prog->len != off && l_cnt && (i == nr_linfo || linfo[i].insn_off != off + cnt)) {
10513        l_cnt--;
10514        linfo[--i].insn_off = off + cnt;
10515    }
10516
10517    /* remove the line info which refer to the removed instructions */
10518    if (l_cnt) {
10519        memmove(linfo + l_off, linfo + i, sizeof(*linfo) * (nr_linfo - i));
10520
10521        prog->aux->nr_linfo -= l_cnt;
10522        nr_linfo = prog->aux->nr_linfo;
10523    }
10524
10525    /* pull all linfo[i].insn_off >= off + cnt in by cnt */
10526    for (i = l_off; i < nr_linfo; i++) {
10527        linfo[i].insn_off -= cnt;
10528    }
10529
10530    /* fix up all subprogs (incl. 'exit') which start >= off */
10531    for (i = 0; i <= env->subprog_cnt; i++) {
10532        if (env->subprog_info[i].linfo_idx > l_off) {
10533            /* program may have started in the removed region but
10534             * may not be fully removed
10535             */
10536            if (env->subprog_info[i].linfo_idx >= l_off + l_cnt) {
10537                env->subprog_info[i].linfo_idx -= l_cnt;
10538            } else {
10539                env->subprog_info[i].linfo_idx = l_off;
10540            }
10541        }
10542    }
10543
10544    return 0;
10545}
10546
10547static int verifier_remove_insns(struct bpf_verifier_env *env, u32 off, u32 cnt)
10548{
10549    struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10550    unsigned int orig_prog_len = env->prog->len;
10551    int err;
10552
10553    if (bpf_prog_is_dev_bound(env->prog->aux)) {
10554        bpf_prog_offload_remove_insns(env, off, cnt);
10555    }
10556
10557    err = bpf_remove_insns(env->prog, off, cnt);
10558    if (err) {
10559        return err;
10560    }
10561
10562    err = adjust_subprog_starts_after_remove(env, off, cnt);
10563    if (err) {
10564        return err;
10565    }
10566
10567    err = bpf_adj_linfo_after_remove(env, off, cnt);
10568    if (err) {
10569        return err;
10570    }
10571
10572    memmove(aux_data + off, aux_data + off + cnt, sizeof(*aux_data) * (orig_prog_len - off - cnt));
10573
10574    return 0;
10575}
10576
10577/* The verifier does more data flow analysis than llvm and will not
10578 * explore branches that are dead at run time. Malicious programs can
10579 * have dead code too. Therefore replace all dead at-run-time code
10580 * with 'ja -1'.
10581 *
10582 * Just nops are not optimal, e.g. if they would sit at the end of the
10583 * program and through another bug we would manage to jump there, then
10584 * we'd execute beyond program memory otherwise. Returning exception
10585 * code also wouldn't work since we can have subprogs where the dead
10586 * code could be located.
10587 */
10588static void sanitize_dead_code(struct bpf_verifier_env *env)
10589{
10590    struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10591    struct bpf_insn trap = BPF_JMP_IMM(BPF_JA, 0, 0, -1);
10592    struct bpf_insn *insn = env->prog->insnsi;
10593    const int insn_cnt = env->prog->len;
10594    int i;
10595
10596    for (i = 0; i < insn_cnt; i++) {
10597        if (aux_data[i].seen) {
10598            continue;
10599        }
10600        memcpy(insn + i, &trap, sizeof(trap));
10601        aux_data[i].zext_dst = false;
10602    }
10603}
10604
10605static bool insn_is_cond_jump(u8 code)
10606{
10607    u8 op;
10608
10609    if (BPF_CLASS(code) == BPF_JMP32) {
10610        return true;
10611    }
10612
10613    if (BPF_CLASS(code) != BPF_JMP) {
10614        return false;
10615    }
10616
10617    op = BPF_OP(code);
10618    return op != BPF_JA && op != BPF_EXIT && op != BPF_CALL;
10619}
10620
10621static void opt_hard_wire_dead_code_branches(struct bpf_verifier_env *env)
10622{
10623    struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10624    struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10625    struct bpf_insn *insn = env->prog->insnsi;
10626    const int insn_cnt = env->prog->len;
10627    int i;
10628
10629    for (i = 0; i < insn_cnt; i++, insn++) {
10630        if (!insn_is_cond_jump(insn->code)) {
10631            continue;
10632        }
10633
10634        if (!aux_data[i + 1].seen) {
10635            ja.off = insn->off;
10636        } else if (!aux_data[i + 1 + insn->off].seen) {
10637            ja.off = 0;
10638        } else {
10639            continue;
10640        }
10641
10642        if (bpf_prog_is_dev_bound(env->prog->aux)) {
10643            bpf_prog_offload_replace_insn(env, i, &ja);
10644        }
10645
10646        memcpy(insn, &ja, sizeof(ja));
10647    }
10648}
10649
10650static int opt_remove_dead_code(struct bpf_verifier_env *env)
10651{
10652    struct bpf_insn_aux_data *aux_data = env->insn_aux_data;
10653    int insn_cnt = env->prog->len;
10654    int i, err;
10655
10656    for (i = 0; i < insn_cnt; i++) {
10657        int j;
10658
10659        j = 0;
10660        while (i + j < insn_cnt && !aux_data[i + j].seen) {
10661            j++;
10662        }
10663        if (!j) {
10664            continue;
10665        }
10666
10667        err = verifier_remove_insns(env, i, j);
10668        if (err) {
10669            return err;
10670        }
10671        insn_cnt = env->prog->len;
10672    }
10673
10674    return 0;
10675}
10676
10677static int opt_remove_nops(struct bpf_verifier_env *env)
10678{
10679    const struct bpf_insn ja = BPF_JMP_IMM(BPF_JA, 0, 0, 0);
10680    struct bpf_insn *insn = env->prog->insnsi;
10681    int insn_cnt = env->prog->len;
10682    int i, err;
10683
10684    for (i = 0; i < insn_cnt; i++) {
10685        if (memcmp(&insn[i], &ja, sizeof(ja))) {
10686            continue;
10687        }
10688
10689        err = verifier_remove_insns(env, i, 1);
10690        if (err) {
10691            return err;
10692        }
10693        insn_cnt--;
10694        i--;
10695    }
10696
10697    return 0;
10698}
10699
10700static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env, const union bpf_attr *attr)
10701{
10702    struct bpf_insn *patch, zext_patch[2], rnd_hi32_patch[4];
10703    struct bpf_insn_aux_data *aux = env->insn_aux_data;
10704    int i, patch_len, delta = 0, len = env->prog->len;
10705    struct bpf_insn *insns = env->prog->insnsi;
10706    struct bpf_prog *new_prog;
10707    bool rnd_hi32;
10708
10709    rnd_hi32 = attr->prog_flags & BPF_F_TEST_RND_HI32;
10710    zext_patch[1] = BPF_ZEXT_REG(0);
10711    rnd_hi32_patch[1] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_AX, 0);
10712    rnd_hi32_patch[2] = BPF_ALU64_IMM(BPF_LSH, BPF_REG_AX, 32);
10713    rnd_hi32_patch[3] = BPF_ALU64_REG(BPF_OR, 0, BPF_REG_AX);
10714    for (i = 0; i < len; i++) {
10715        int adj_idx = i + delta;
10716        struct bpf_insn insn;
10717
10718        insn = insns[adj_idx];
10719        if (!aux[adj_idx].zext_dst) {
10720            u8 code, class;
10721            u32 imm_rnd;
10722
10723            if (!rnd_hi32) {
10724                continue;
10725            }
10726
10727            code = insn.code;
10728            class = BPF_CLASS(code);
10729            if (insn_no_def(&insn)) {
10730                continue;
10731            }
10732
10733            /* NOTE: arg "reg" (the fourth one) is only used for
10734             *       BPF_STX which has been ruled out in above
10735             *       check, it is safe to pass NULL here.
10736             */
10737            if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
10738                if (class == BPF_LD && BPF_MODE(code) == BPF_IMM) {
10739                    i++;
10740                }
10741                continue;
10742            }
10743
10744            /* ctx load could be transformed into wider load. */
10745            if (class == BPF_LDX && aux[adj_idx].ptr_type == PTR_TO_CTX) {
10746                continue;
10747            }
10748
10749            imm_rnd = get_random_int();
10750            rnd_hi32_patch[0] = insn;
10751            rnd_hi32_patch[1].imm = imm_rnd;
10752            rnd_hi32_patch[3].dst_reg = insn.dst_reg;
10753            patch = rnd_hi32_patch;
10754            patch_len = VERIFIER_FOUR;
10755            goto apply_patch_buffer;
10756        }
10757
10758        if (!bpf_jit_needs_zext()) {
10759            continue;
10760        }
10761
10762        zext_patch[0] = insn;
10763        zext_patch[1].dst_reg = insn.dst_reg;
10764        zext_patch[1].src_reg = insn.dst_reg;
10765        patch = zext_patch;
10766        patch_len = 2;
10767    apply_patch_buffer:
10768        new_prog = bpf_patch_insn_data(env, adj_idx, patch, patch_len);
10769        if (!new_prog) {
10770            return -ENOMEM;
10771        }
10772        env->prog = new_prog;
10773        insns = new_prog->insnsi;
10774        aux = env->insn_aux_data;
10775        delta += patch_len - 1;
10776    }
10777
10778    return 0;
10779}
10780
10781/* convert load instructions that access fields of a context type into a
10782 * sequence of instructions that access fields of the underlying structure:
10783 *     struct __sk_buff    -> struct sk_buff
10784 *     struct bpf_sock_ops -> struct sock
10785 */
10786static int convert_ctx_accesses(struct bpf_verifier_env *env)
10787{
10788    const struct bpf_verifier_ops *ops = env->ops;
10789    int i, cnt, size, ctx_field_size, delta = 0;
10790    const int insn_cnt = env->prog->len;
10791    struct bpf_insn insn_buf[VERIFIER_SIXTEEN], *insn;
10792    u32 target_size, size_default, off;
10793    struct bpf_prog *new_prog;
10794    enum bpf_access_type type;
10795    bool is_narrower_load;
10796
10797    if (ops->gen_prologue || env->seen_direct_write) {
10798        if (!ops->gen_prologue) {
10799            verbose(env, "bpf verifier is misconfigured\n");
10800            return -EINVAL;
10801        }
10802        cnt = ops->gen_prologue(insn_buf, env->seen_direct_write, env->prog);
10803        if (cnt >= ARRAY_SIZE(insn_buf)) {
10804            verbose(env, "bpf verifier is misconfigured\n");
10805            return -EINVAL;
10806        } else if (cnt) {
10807            new_prog = bpf_patch_insn_data(env, 0, insn_buf, cnt);
10808            if (!new_prog) {
10809                return -ENOMEM;
10810            }
10811
10812            env->prog = new_prog;
10813            delta += cnt - 1;
10814        }
10815    }
10816
10817    if (bpf_prog_is_dev_bound(env->prog->aux)) {
10818        return 0;
10819    }
10820
10821    insn = env->prog->insnsi + delta;
10822
10823    for (i = 0; i < insn_cnt; i++, insn++) {
10824        bpf_convert_ctx_access_t convert_ctx_access;
10825        bool ctx_access;
10826
10827        if (insn->code == (BPF_LDX | BPF_MEM | BPF_B) || insn->code == (BPF_LDX | BPF_MEM | BPF_H) ||
10828            insn->code == (BPF_LDX | BPF_MEM | BPF_W) || insn->code == (BPF_LDX | BPF_MEM | BPF_DW)) {
10829            type = BPF_READ;
10830            ctx_access = true;
10831        } else if (insn->code == (BPF_STX | BPF_MEM | BPF_B) || insn->code == (BPF_STX | BPF_MEM | BPF_H) ||
10832                   insn->code == (BPF_STX | BPF_MEM | BPF_W) || insn->code == (BPF_STX | BPF_MEM | BPF_DW) ||
10833                   insn->code == (BPF_ST | BPF_MEM | BPF_B) || insn->code == (BPF_ST | BPF_MEM | BPF_H) ||
10834                   insn->code == (BPF_ST | BPF_MEM | BPF_W) || insn->code == (BPF_ST | BPF_MEM | BPF_DW)) {
10835            type = BPF_WRITE;
10836            ctx_access = BPF_CLASS(insn->code) == BPF_STX;
10837        } else {
10838            continue;
10839        }
10840
10841        if (type == BPF_WRITE && env->insn_aux_data[i + delta].sanitize_stack_spill) {
10842            struct bpf_insn patch[] = {
10843                *insn,
10844                BPF_ST_NOSPEC(),
10845            };
10846
10847            cnt = ARRAY_SIZE(patch);
10848            new_prog = bpf_patch_insn_data(env, i + delta, patch, cnt);
10849            if (!new_prog) {
10850                return -ENOMEM;
10851            }
10852
10853            delta += cnt - 1;
10854            env->prog = new_prog;
10855            insn = new_prog->insnsi + i + delta;
10856            continue;
10857        }
10858
10859        if (!ctx_access) {
10860            continue;
10861        }
10862
10863        switch (env->insn_aux_data[i + delta].ptr_type) {
10864            case PTR_TO_CTX:
10865                if (!ops->convert_ctx_access) {
10866                    continue;
10867                }
10868                convert_ctx_access = ops->convert_ctx_access;
10869                break;
10870            case PTR_TO_SOCKET:
10871            case PTR_TO_SOCK_COMMON:
10872                convert_ctx_access = bpf_sock_convert_ctx_access;
10873                break;
10874            case PTR_TO_TCP_SOCK:
10875                convert_ctx_access = bpf_tcp_sock_convert_ctx_access;
10876                break;
10877            case PTR_TO_XDP_SOCK:
10878                convert_ctx_access = bpf_xdp_sock_convert_ctx_access;
10879                break;
10880            case PTR_TO_BTF_ID:
10881                if (type == BPF_READ) {
10882                    insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code);
10883                    env->prog->aux->num_exentries++;
10884                } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
10885                    verbose(env, "Writes through BTF pointers are not allowed\n");
10886                    return -EINVAL;
10887                }
10888                continue;
10889            default:
10890                continue;
10891        }
10892
10893        ctx_field_size = env->insn_aux_data[i + delta].ctx_field_size;
10894        size = BPF_LDST_BYTES(insn);
10895
10896        /* If the read access is a narrower load of the field,
10897         * convert to a 4/8-byte load, to minimum program type specific
10898         * convert_ctx_access changes. If conversion is successful,
10899         * we will apply proper mask to the result.
10900         */
10901        is_narrower_load = size < ctx_field_size;
10902        size_default = bpf_ctx_off_adjust_machine(ctx_field_size);
10903        off = insn->off;
10904        if (is_narrower_load) {
10905            u8 size_code;
10906
10907            if (type == BPF_WRITE) {
10908                verbose(env, "bpf verifier narrow ctx access misconfigured\n");
10909                return -EINVAL;
10910            }
10911
10912            size_code = BPF_H;
10913            if (ctx_field_size == VERIFIER_FOUR) {
10914                size_code = BPF_W;
10915            } else if (ctx_field_size == VERIFIER_EIGHT) {
10916                size_code = BPF_DW;
10917            }
10918
10919            insn->off = off & ~(size_default - 1);
10920            insn->code = BPF_LDX | BPF_MEM | size_code;
10921        }
10922
10923        target_size = 0;
10924        cnt = convert_ctx_access(type, insn, insn_buf, env->prog, &target_size);
10925        if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf) || (ctx_field_size && !target_size)) {
10926            verbose(env, "bpf verifier is misconfigured\n");
10927            return -EINVAL;
10928        }
10929
10930        if (is_narrower_load && size < target_size) {
10931            u8 shift = bpf_ctx_narrow_access_offset(off, size, size_default) * VERIFIER_EIGHT;
10932            if (shift && cnt + 1 >= ARRAY_SIZE(insn_buf)) {
10933                verbose(env, "bpf verifier narrow ctx load misconfigured\n");
10934                return -EINVAL;
10935            }
10936            if (ctx_field_size <= VERIFIER_FOUR) {
10937                if (shift) {
10938                    insn_buf[cnt++] = BPF_ALU32_IMM(BPF_RSH, insn->dst_reg, shift);
10939                }
10940                insn_buf[cnt++] = BPF_ALU32_IMM(BPF_AND, insn->dst_reg, ((1 << size) * VERIFIER_EIGHT) - 1);
10941            } else {
10942                if (shift) {
10943                    insn_buf[cnt++] = BPF_ALU64_IMM(BPF_RSH, insn->dst_reg, shift);
10944                }
10945                insn_buf[cnt++] = BPF_ALU64_IMM(BPF_AND, insn->dst_reg, ((1ULL << size) * VERIFIER_EIGHT) - 1);
10946            }
10947        }
10948
10949        new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
10950        if (!new_prog) {
10951            return -ENOMEM;
10952        }
10953
10954        delta += cnt - 1;
10955
10956        /* keep walking new program and skip insns we just inserted */
10957        env->prog = new_prog;
10958        insn = new_prog->insnsi + i + delta;
10959    }
10960
10961    return 0;
10962}
10963
10964static int jit_subprogs(struct bpf_verifier_env *env)
10965{
10966    struct bpf_prog *prog = env->prog, **func, *tmp;
10967    int i, j, subprog_start, subprog_end = 0, len, subprog;
10968    struct bpf_map *map_ptr;
10969    struct bpf_insn *insn;
10970    void *old_bpf_func;
10971    int err, num_exentries;
10972
10973    if (env->subprog_cnt <= 1) {
10974        return 0;
10975    }
10976
10977    for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
10978        if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
10979            continue;
10980        }
10981        /* Upon error here we cannot fall back to interpreter but
10982         * need a hard reject of the program. Thus -EFAULT is
10983         * propagated in any case.
10984         */
10985        subprog = find_subprog(env, i + insn->imm + 1);
10986        if (subprog < 0) {
10987            WARN_ONCE(1, "verifier bug. No program starts at insn %d\n", i + insn->imm + 1);
10988            return -EFAULT;
10989        }
10990        /* temporarily remember subprog id inside insn instead of
10991         * aux_data, since next loop will split up all insns into funcs
10992         */
10993        insn->off = subprog;
10994        /* remember original imm in case JIT fails and fallback
10995         * to interpreter will be needed
10996         */
10997        env->insn_aux_data[i].call_imm = insn->imm;
10998        /* point imm to __bpf_call_base+1 from JITs point of view */
10999        insn->imm = 1;
11000    }
11001
11002    err = bpf_prog_alloc_jited_linfo(prog);
11003    if (err) {
11004        goto out_undo_insn;
11005    }
11006
11007    err = -ENOMEM;
11008    func = kcalloc(env->subprog_cnt, sizeof(prog), GFP_KERNEL);
11009    if (!func) {
11010        goto out_undo_insn;
11011    }
11012
11013    for (i = 0; i < env->subprog_cnt; i++) {
11014        subprog_start = subprog_end;
11015        subprog_end = env->subprog_info[i + 1].start;
11016
11017        len = subprog_end - subprog_start;
11018        /* BPF_PROG_RUN doesn't call subprogs directly,
11019         * hence main prog stats include the runtime of subprogs.
11020         * subprogs don't have IDs and not reachable via prog_get_next_id
11021         * func[i]->aux->stats will never be accessed and stays NULL
11022         */
11023        func[i] = bpf_prog_alloc_no_stats(bpf_prog_size(len), GFP_USER);
11024        if (!func[i]) {
11025            goto out_free;
11026        }
11027        memcpy(func[i]->insnsi, &prog->insnsi[subprog_start], len * sizeof(struct bpf_insn));
11028        func[i]->type = prog->type;
11029        func[i]->len = len;
11030        if (bpf_prog_calc_tag(func[i])) {
11031            goto out_free;
11032        }
11033        func[i]->is_func = 1;
11034        func[i]->aux->func_idx = i;
11035        /* Below members will be freed only at prog->aux */
11036        func[i]->aux->btf = prog->aux->btf;
11037        func[i]->aux->func_info = prog->aux->func_info;
11038        func[i]->aux->poke_tab = prog->aux->poke_tab;
11039        func[i]->aux->size_poke_tab = prog->aux->size_poke_tab;
11040
11041        for (j = 0; j < prog->aux->size_poke_tab; j++) {
11042            struct bpf_jit_poke_descriptor *poke;
11043
11044            poke = &prog->aux->poke_tab[j];
11045            if (poke->insn_idx < subprog_end && poke->insn_idx >= subprog_start) {
11046                poke->aux = func[i]->aux;
11047            }
11048        }
11049
11050        /* Use bpf_prog_F_tag to indicate functions in stack traces.
11051         * Long term would need debug info to populate names
11052         */
11053        func[i]->aux->name[0] = 'F';
11054        func[i]->aux->stack_depth = env->subprog_info[i].stack_depth;
11055        func[i]->jit_requested = 1;
11056        func[i]->aux->linfo = prog->aux->linfo;
11057        func[i]->aux->nr_linfo = prog->aux->nr_linfo;
11058        func[i]->aux->jited_linfo = prog->aux->jited_linfo;
11059        func[i]->aux->linfo_idx = env->subprog_info[i].linfo_idx;
11060        num_exentries = 0;
11061        insn = func[i]->insnsi;
11062        for (j = 0; j < func[i]->len; j++, insn++) {
11063            if (BPF_CLASS(insn->code) == BPF_LDX && BPF_MODE(insn->code) == BPF_PROBE_MEM) {
11064                num_exentries++;
11065            }
11066        }
11067        func[i]->aux->num_exentries = num_exentries;
11068        func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
11069        func[i] = bpf_int_jit_compile(func[i]);
11070        if (!func[i]->jited) {
11071            err = -ENOTSUPP;
11072            goto out_free;
11073        }
11074        cond_resched();
11075    }
11076
11077    /* at this point all bpf functions were successfully JITed
11078     * now populate all bpf_calls with correct addresses and
11079     * run last pass of JIT
11080     */
11081    for (i = 0; i < env->subprog_cnt; i++) {
11082        insn = func[i]->insnsi;
11083        for (j = 0; j < func[i]->len; j++, insn++) {
11084            if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11085                continue;
11086            }
11087            subprog = insn->off;
11088            insn->imm = BPF_CAST_CALL(func[subprog]->bpf_func) - __bpf_call_base;
11089        }
11090
11091        /* we use the aux data to keep a list of the start addresses
11092         * of the JITed images for each function in the program
11093         *
11094         * for some architectures, such as powerpc64, the imm field
11095         * might not be large enough to hold the offset of the start
11096         * address of the callee's JITed image from __bpf_call_base
11097         *
11098         * in such cases, we can lookup the start address of a callee
11099         * by using its subprog id, available from the off field of
11100         * the call instruction, as an index for this list
11101         */
11102        func[i]->aux->func = func;
11103        func[i]->aux->func_cnt = env->subprog_cnt;
11104    }
11105    for (i = 0; i < env->subprog_cnt; i++) {
11106        old_bpf_func = func[i]->bpf_func;
11107        tmp = bpf_int_jit_compile(func[i]);
11108        if (tmp != func[i] || func[i]->bpf_func != old_bpf_func) {
11109            verbose(env, "JIT doesn't support bpf-to-bpf calls\n");
11110            err = -ENOTSUPP;
11111            goto out_free;
11112        }
11113        cond_resched();
11114    }
11115
11116    /* finally lock prog and jit images for all functions and
11117     * populate kallsysm
11118     */
11119    for (i = 0; i < env->subprog_cnt; i++) {
11120        bpf_prog_lock_ro(func[i]);
11121        bpf_prog_kallsyms_add(func[i]);
11122    }
11123
11124    /* Last step: make now unused interpreter insns from main
11125     * prog consistent for later dump requests, so they can
11126     * later look the same as if they were interpreted only.
11127     */
11128    for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11129        if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11130            continue;
11131        }
11132        insn->off = env->insn_aux_data[i].call_imm;
11133        subprog = find_subprog(env, i + insn->off + 1);
11134        insn->imm = subprog;
11135    }
11136
11137    prog->jited = 1;
11138    prog->bpf_func = func[0]->bpf_func;
11139    prog->aux->func = func;
11140    prog->aux->func_cnt = env->subprog_cnt;
11141    bpf_prog_free_unused_jited_linfo(prog);
11142    return 0;
11143out_free:
11144    /* We failed JIT'ing, so at this point we need to unregister poke
11145     * descriptors from subprogs, so that kernel is not attempting to
11146     * patch it anymore as we're freeing the subprog JIT memory.
11147     */
11148    for (i = 0; i < prog->aux->size_poke_tab; i++) {
11149        map_ptr = prog->aux->poke_tab[i].tail_call.map;
11150        map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
11151    }
11152    /* At this point we're guaranteed that poke descriptors are not
11153     * live anymore. We can just unlink its descriptor table as it's
11154     * released with the main prog.
11155     */
11156    for (i = 0; i < env->subprog_cnt; i++) {
11157        if (!func[i]) {
11158            continue;
11159        }
11160        func[i]->aux->poke_tab = NULL;
11161        bpf_jit_free(func[i]);
11162    }
11163    kfree(func);
11164out_undo_insn:
11165    /* cleanup main prog to be interpreted */
11166    prog->jit_requested = 0;
11167    for (i = 0, insn = prog->insnsi; i < prog->len; i++, insn++) {
11168        if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11169            continue;
11170        }
11171        insn->off = 0;
11172        insn->imm = env->insn_aux_data[i].call_imm;
11173    }
11174    bpf_prog_free_jited_linfo(prog);
11175    return err;
11176}
11177
11178static int fixup_call_args(struct bpf_verifier_env *env)
11179{
11180#ifndef CONFIG_BPF_JIT_ALWAYS_ON
11181    struct bpf_prog *prog = env->prog;
11182    struct bpf_insn *insn = prog->insnsi;
11183    int i, depth;
11184#endif
11185    int err = 0;
11186
11187    if (env->prog->jit_requested && !bpf_prog_is_dev_bound(env->prog->aux)) {
11188        err = jit_subprogs(env);
11189        if (err == 0) {
11190            return 0;
11191        }
11192        if (err == -EFAULT) {
11193            return err;
11194        }
11195    }
11196#ifndef CONFIG_BPF_JIT_ALWAYS_ON
11197    if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
11198        /* When JIT fails the progs with bpf2bpf calls and tail_calls
11199         * have to be rejected, since interpreter doesn't support them yet.
11200         */
11201        verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
11202        return -EINVAL;
11203    }
11204    for (i = 0; i < prog->len; i++, insn++) {
11205        if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) {
11206            continue;
11207        }
11208        depth = get_callee_stack_depth(env, insn, i);
11209        if (depth < 0) {
11210            return depth;
11211        }
11212        bpf_patch_call_args(insn, depth);
11213    }
11214    err = 0;
11215#endif
11216    return err;
11217}
11218
11219/* fixup insn->imm field of bpf_call instructions
11220 * and inline eligible helpers as explicit sequence of BPF instructions
11221 *
11222 * this function is called after eBPF program passed verification
11223 */
11224static int fixup_bpf_calls(struct bpf_verifier_env *env)
11225{
11226    struct bpf_prog *prog = env->prog;
11227    bool expect_blinding = bpf_jit_blinding_enabled(prog);
11228    struct bpf_insn *insn = prog->insnsi;
11229    const struct bpf_func_proto *fn;
11230    const int insn_cnt = prog->len;
11231    const struct bpf_map_ops *ops;
11232    struct bpf_insn_aux_data *aux;
11233    struct bpf_insn insn_buf[VERIFIER_SIXTEEN];
11234    struct bpf_prog *new_prog;
11235    struct bpf_map *map_ptr;
11236    int i, ret, cnt, delta = 0;
11237
11238    for (i = 0; i < insn_cnt; i++, insn++) {
11239        if (insn->code == (BPF_ALU64 | BPF_MOD | BPF_X) || insn->code == (BPF_ALU64 | BPF_DIV | BPF_X) ||
11240            insn->code == (BPF_ALU | BPF_MOD | BPF_X) || insn->code == (BPF_ALU | BPF_DIV | BPF_X)) {
11241            bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
11242            bool isdiv = BPF_OP(insn->code) == BPF_DIV;
11243            struct bpf_insn *patchlet;
11244            struct bpf_insn chk_and_div[] = {
11245                /* [R,W]x div 0 -> 0 */
11246                BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JNE | BPF_K, insn->src_reg, 0, 2, 0),
11247                BPF_ALU32_REG(BPF_XOR, insn->dst_reg, insn->dst_reg),
11248                BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11249                *insn,
11250            };
11251            struct bpf_insn chk_and_mod[] = {
11252                /* [R,W]x mod 0 -> [R,W]x */
11253                BPF_RAW_INSN((is64 ? BPF_JMP : BPF_JMP32) | BPF_JEQ | BPF_K, insn->src_reg, 0, 1 + (is64 ? 0 : 1), 0),
11254                *insn,
11255                BPF_JMP_IMM(BPF_JA, 0, 0, 1),
11256                BPF_MOV32_REG(insn->dst_reg, insn->dst_reg),
11257            };
11258
11259            patchlet = isdiv ? chk_and_div : chk_and_mod;
11260            cnt = isdiv ? ARRAY_SIZE(chk_and_div) : ARRAY_SIZE(chk_and_mod) - (is64 ? 0x2 : 0);
11261
11262            new_prog = bpf_patch_insn_data(env, i + delta, patchlet, cnt);
11263            if (!new_prog) {
11264                return -ENOMEM;
11265            }
11266
11267            delta += cnt - 1;
11268            env->prog = prog = new_prog;
11269            insn = new_prog->insnsi + i + delta;
11270            continue;
11271        }
11272
11273        if (BPF_CLASS(insn->code) == BPF_LD && (BPF_MODE(insn->code) == BPF_ABS || BPF_MODE(insn->code) == BPF_IND)) {
11274            cnt = env->ops->gen_ld_abs(insn, insn_buf);
11275            if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11276                verbose(env, "bpf verifier is misconfigured\n");
11277                return -EINVAL;
11278            }
11279
11280            new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11281            if (!new_prog) {
11282                return -ENOMEM;
11283            }
11284
11285            delta += cnt - 1;
11286            env->prog = prog = new_prog;
11287            insn = new_prog->insnsi + i + delta;
11288            continue;
11289        }
11290
11291        if (insn->code == (BPF_ALU64 | BPF_ADD | BPF_X) || insn->code == (BPF_ALU64 | BPF_SUB | BPF_X)) {
11292            const u8 code_add = BPF_ALU64 | BPF_ADD | BPF_X;
11293            const u8 code_sub = BPF_ALU64 | BPF_SUB | BPF_X;
11294            struct bpf_insn insn_buf_in[VERIFIER_SIXTEEN];
11295            struct bpf_insn *patch = &insn_buf_in[0];
11296            bool issrc, isneg, isimm;
11297            u32 off_reg;
11298
11299            aux = &env->insn_aux_data[i + delta];
11300            if (!aux->alu_state || aux->alu_state == BPF_ALU_NON_POINTER) {
11301                continue;
11302            }
11303
11304            isneg = aux->alu_state & BPF_ALU_NEG_VALUE;
11305            issrc = (aux->alu_state & BPF_ALU_SANITIZE) == BPF_ALU_SANITIZE_SRC;
11306            isimm = aux->alu_state & BPF_ALU_IMMEDIATE;
11307
11308            off_reg = issrc ? insn->src_reg : insn->dst_reg;
11309            if (isimm) {
11310                *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11311            } else {
11312                if (isneg) {
11313                    *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11314                }
11315                *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
11316                *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
11317                *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
11318                *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
11319                *patch++ = BPF_ALU64_IMM(BPF_ARSH, BPF_REG_AX, VERIFIER_SIXTYTHREE);
11320                *patch++ = BPF_ALU64_REG(BPF_AND, BPF_REG_AX, off_reg);
11321            }
11322            if (!issrc) {
11323                *patch++ = BPF_MOV64_REG(insn->dst_reg, insn->src_reg);
11324            }
11325            insn->src_reg = BPF_REG_AX;
11326            if (isneg) {
11327                insn->code = insn->code == code_add ? code_sub : code_add;
11328            }
11329            *patch++ = *insn;
11330            if (issrc && isneg && !isimm) {
11331                *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
11332            }
11333            cnt = patch - insn_buf_in;
11334
11335            new_prog = bpf_patch_insn_data(env, i + delta, insn_buf_in, cnt);
11336            if (!new_prog) {
11337                return -ENOMEM;
11338            }
11339
11340            delta += cnt - 1;
11341            env->prog = prog = new_prog;
11342            insn = new_prog->insnsi + i + delta;
11343            continue;
11344        }
11345
11346        if (insn->code != (BPF_JMP | BPF_CALL)) {
11347            continue;
11348        }
11349        if (insn->src_reg == BPF_PSEUDO_CALL) {
11350            continue;
11351        }
11352
11353        if (insn->imm == BPF_FUNC_get_route_realm) {
11354            prog->dst_needed = 1;
11355        }
11356        if (insn->imm == BPF_FUNC_get_prandom_u32) {
11357            bpf_user_rnd_init_once();
11358        }
11359        if (insn->imm == BPF_FUNC_override_return) {
11360            prog->kprobe_override = 1;
11361        }
11362        if (insn->imm == BPF_FUNC_tail_call) {
11363            /* If we tail call into other programs, we
11364             * cannot make any assumptions since they can
11365             * be replaced dynamically during runtime in
11366             * the program array.
11367             */
11368            prog->cb_access = 1;
11369            if (!allow_tail_call_in_subprogs(env)) {
11370                prog->aux->stack_depth = MAX_BPF_STACK;
11371            }
11372            prog->aux->max_pkt_offset = MAX_PACKET_OFF;
11373
11374            /* mark bpf_tail_call as different opcode to avoid
11375             * conditional branch in the interpeter for every normal
11376             * call and to prevent accidental JITing by JIT compiler
11377             * that doesn't support bpf_tail_call yet
11378             */
11379            insn->imm = 0;
11380            insn->code = BPF_JMP | BPF_TAIL_CALL;
11381
11382            aux = &env->insn_aux_data[i + delta];
11383            if (env->bpf_capable && !expect_blinding && prog->jit_requested && !bpf_map_key_poisoned(aux) &&
11384                !bpf_map_ptr_poisoned(aux) && !bpf_map_ptr_unpriv(aux)) {
11385                struct bpf_jit_poke_descriptor desc = {
11386                    .reason = BPF_POKE_REASON_TAIL_CALL,
11387                    .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
11388                    .tail_call.key = bpf_map_key_immediate(aux),
11389                    .insn_idx = i + delta,
11390                };
11391
11392                ret = bpf_jit_add_poke_descriptor(prog, &desc);
11393                if (ret < 0) {
11394                    verbose(env, "adding tail call poke descriptor failed\n");
11395                    return ret;
11396                }
11397
11398                insn->imm = ret + 1;
11399                continue;
11400            }
11401
11402            if (!bpf_map_ptr_unpriv(aux)) {
11403                continue;
11404            }
11405
11406            /* instead of changing every JIT dealing with tail_call
11407             * emit two extra insns:
11408             * if (index >= max_entries) goto out;
11409             * index &= array->index_mask;
11410             * to avoid out-of-bounds cpu speculation
11411             */
11412            if (bpf_map_ptr_poisoned(aux)) {
11413                verbose(env, "tail_call abusing map_ptr\n");
11414                return -EINVAL;
11415            }
11416
11417            map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11418            insn_buf[0x0] = BPF_JMP_IMM(BPF_JGE, BPF_REG_3, map_ptr->max_entries, 0x2);
11419            insn_buf[0x1] = BPF_ALU32_IMM(BPF_AND, BPF_REG_3, container_of(map_ptr, struct bpf_array, map)->index_mask);
11420            insn_buf[0x2] = *insn;
11421            cnt = VERIFIER_THREE;
11422            new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11423            if (!new_prog) {
11424                return -ENOMEM;
11425            }
11426
11427            delta += cnt - 1;
11428            env->prog = prog = new_prog;
11429            insn = new_prog->insnsi + i + delta;
11430            continue;
11431        }
11432
11433        /* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
11434         * and other inlining handlers are currently limited to 64 bit
11435         * only.
11436         */
11437        if (prog->jit_requested && BITS_PER_LONG == VERIFIER_SIXTYFOUR &&
11438            (insn->imm == BPF_FUNC_map_lookup_elem || insn->imm == BPF_FUNC_map_update_elem ||
11439             insn->imm == BPF_FUNC_map_delete_elem || insn->imm == BPF_FUNC_map_push_elem ||
11440             insn->imm == BPF_FUNC_map_pop_elem || insn->imm == BPF_FUNC_map_peek_elem)) {
11441            aux = &env->insn_aux_data[i + delta];
11442            if (bpf_map_ptr_poisoned(aux)) {
11443                goto patch_call_imm;
11444            }
11445
11446            map_ptr = BPF_MAP_PTR(aux->map_ptr_state);
11447            ops = map_ptr->ops;
11448            if (insn->imm == BPF_FUNC_map_lookup_elem && ops->map_gen_lookup) {
11449                cnt = ops->map_gen_lookup(map_ptr, insn_buf);
11450                if (cnt == -EOPNOTSUPP) {
11451                    goto patch_map_ops_generic;
11452                }
11453                if (cnt <= 0 || cnt >= ARRAY_SIZE(insn_buf)) {
11454                    verbose(env, "bpf verifier is misconfigured\n");
11455                    return -EINVAL;
11456                }
11457
11458                new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11459                if (!new_prog) {
11460                    return -ENOMEM;
11461                }
11462
11463                delta += cnt - 1;
11464                env->prog = prog = new_prog;
11465                insn = new_prog->insnsi + i + delta;
11466                continue;
11467            }
11468
11469            BUILD_BUG_ON(!__same_type(ops->map_lookup_elem, (void *(*)(struct bpf_map * map, void *key)) NULL));
11470            BUILD_BUG_ON(!__same_type(ops->map_delete_elem, (int (*)(struct bpf_map * map, void *key)) NULL));
11471            BUILD_BUG_ON(!__same_type(ops->map_update_elem,
11472                                      (int (*)(struct bpf_map * map, void *key, void *value, u64 flags)) NULL));
11473            BUILD_BUG_ON(
11474                !__same_type(ops->map_push_elem, (int (*)(struct bpf_map * map, void *value, u64 flags)) NULL));
11475            BUILD_BUG_ON(!__same_type(ops->map_pop_elem, (int (*)(struct bpf_map * map, void *value)) NULL));
11476            BUILD_BUG_ON(!__same_type(ops->map_peek_elem, (int (*)(struct bpf_map * map, void *value)) NULL));
11477        patch_map_ops_generic:
11478            switch (insn->imm) {
11479                case BPF_FUNC_map_lookup_elem:
11480                    insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) - __bpf_call_base;
11481                    continue;
11482                case BPF_FUNC_map_update_elem:
11483                    insn->imm = BPF_CAST_CALL(ops->map_update_elem) - __bpf_call_base;
11484                    continue;
11485                case BPF_FUNC_map_delete_elem:
11486                    insn->imm = BPF_CAST_CALL(ops->map_delete_elem) - __bpf_call_base;
11487                    continue;
11488                case BPF_FUNC_map_push_elem:
11489                    insn->imm = BPF_CAST_CALL(ops->map_push_elem) - __bpf_call_base;
11490                    continue;
11491                case BPF_FUNC_map_pop_elem:
11492                    insn->imm = BPF_CAST_CALL(ops->map_pop_elem) - __bpf_call_base;
11493                    continue;
11494                case BPF_FUNC_map_peek_elem:
11495                    insn->imm = BPF_CAST_CALL(ops->map_peek_elem) - __bpf_call_base;
11496                    continue;
11497                default:
11498                    break;
11499            }
11500
11501            goto patch_call_imm;
11502        }
11503
11504        if (prog->jit_requested && BITS_PER_LONG == VERIFIER_SIXTYFOUR && insn->imm == BPF_FUNC_jiffies64) {
11505            struct bpf_insn ld_jiffies_addr[2] = {
11506                BPF_LD_IMM64(BPF_REG_0, (unsigned long)&jiffies),
11507            };
11508
11509            insn_buf[0x0] = ld_jiffies_addr[0];
11510            insn_buf[0x1] = ld_jiffies_addr[1];
11511            insn_buf[0x2] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0);
11512            cnt = VERIFIER_THREE;
11513
11514            new_prog = bpf_patch_insn_data(env, i + delta, insn_buf, cnt);
11515            if (!new_prog) {
11516                return -ENOMEM;
11517            }
11518
11519            delta += cnt - 1;
11520            env->prog = prog = new_prog;
11521            insn = new_prog->insnsi + i + delta;
11522            continue;
11523        }
11524
11525    patch_call_imm:
11526        fn = env->ops->get_func_proto(insn->imm, env->prog);
11527        /* all functions that have prototype and verifier allowed
11528         * programs to call them, must be real in-kernel functions
11529         */
11530        if (!fn->func) {
11531            verbose(env, "kernel subsystem misconfigured func %s#%d\n", func_id_name(insn->imm), insn->imm);
11532            return -EFAULT;
11533        }
11534        insn->imm = fn->func - __bpf_call_base;
11535    }
11536
11537    /* Since poke tab is now finalized, publish aux to tracker. */
11538    for (i = 0; i < prog->aux->size_poke_tab; i++) {
11539        map_ptr = prog->aux->poke_tab[i].tail_call.map;
11540        if (!map_ptr->ops->map_poke_track || !map_ptr->ops->map_poke_untrack || !map_ptr->ops->map_poke_run) {
11541            verbose(env, "bpf verifier is misconfigured\n");
11542            return -EINVAL;
11543        }
11544
11545        ret = map_ptr->ops->map_poke_track(map_ptr, prog->aux);
11546        if (ret < 0) {
11547            verbose(env, "tracking tail call prog failed\n");
11548            return ret;
11549        }
11550    }
11551
11552    return 0;
11553}
11554
11555static void free_states(struct bpf_verifier_env *env)
11556{
11557    struct bpf_verifier_state_list *sl, *sln;
11558    int i;
11559
11560    sl = env->free_list;
11561    while (sl) {
11562        sln = sl->next;
11563        free_verifier_state(&sl->state, false);
11564        kfree(sl);
11565        sl = sln;
11566    }
11567    env->free_list = NULL;
11568
11569    if (!env->explored_states) {
11570        return;
11571    }
11572
11573    for (i = 0; i < state_htab_size(env); i++) {
11574        sl = env->explored_states[i];
11575
11576        while (sl) {
11577            sln = sl->next;
11578            free_verifier_state(&sl->state, false);
11579            kfree(sl);
11580            sl = sln;
11581        }
11582        env->explored_states[i] = NULL;
11583    }
11584}
11585
11586static int do_check_common(struct bpf_verifier_env *env, int subprog)
11587{
11588    bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
11589    struct bpf_verifier_state *state;
11590    struct bpf_reg_state *regs;
11591    int ret, i;
11592
11593    env->prev_linfo = NULL;
11594    env->pass_cnt++;
11595
11596    state = kzalloc(sizeof(struct bpf_verifier_state), GFP_KERNEL);
11597    if (!state) {
11598        return -ENOMEM;
11599    }
11600    state->curframe = 0;
11601    state->speculative = false;
11602    state->branches = 1;
11603    state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL);
11604    if (!state->frame[0]) {
11605        kfree(state);
11606        return -ENOMEM;
11607    }
11608    env->cur_state = state;
11609    init_func_state(env, state->frame[0], BPF_MAIN_FUNC /* callsite */, 0 /* frameno */, subprog);
11610
11611    regs = state->frame[state->curframe]->regs;
11612    if (subprog || env->prog->type == BPF_PROG_TYPE_EXT) {
11613        ret = btf_prepare_func_args(env, subprog, regs);
11614        if (ret) {
11615            goto out;
11616        }
11617        for (i = BPF_REG_1; i <= BPF_REG_5; i++) {
11618            if (regs[i].type == PTR_TO_CTX) {
11619                mark_reg_known_zero(env, regs, i);
11620            } else if (regs[i].type == SCALAR_VALUE) {
11621                mark_reg_unknown(env, regs, i);
11622            }
11623        }
11624    } else {
11625        /* 1st arg to a function */
11626        regs[BPF_REG_1].type = PTR_TO_CTX;
11627        mark_reg_known_zero(env, regs, BPF_REG_1);
11628        ret = btf_check_func_arg_match(env, subprog, regs);
11629        if (ret == -EFAULT) {
11630            /* unlikely verifier bug. abort.
11631             * ret == 0 and ret < 0 are sadly acceptable for
11632             * main() function due to backward compatibility.
11633             * Like socket filter program may be written as:
11634             * int bpf_prog(struct pt_regs *ctx)
11635             * and never dereference that ctx in the program.
11636             * 'struct pt_regs' is a type mismatch for socket
11637             * filter that should be using 'struct __sk_buff'.
11638             */
11639            goto out;
11640        }
11641    }
11642
11643    ret = do_check(env);
11644out:
11645    /* check for NULL is necessary, since cur_state can be freed inside
11646     * do_check() under memory pressure.
11647     */
11648    if (env->cur_state) {
11649        free_verifier_state(env->cur_state, true);
11650        env->cur_state = NULL;
11651    }
11652    while (!pop_stack(env, NULL, NULL, false)) {
11653        ;
11654    }
11655    if (!ret && pop_log) {
11656        bpf_vlog_reset(&env->log, 0);
11657    }
11658    free_states(env);
11659    return ret;
11660}
11661
11662/* Verify all global functions in a BPF program one by one based on their BTF.
11663 * All global functions must pass verification. Otherwise the whole program is rejected.
11664 * Consider:
11665 * int bar(int);
11666 * int foo(int f)
11667 * {
11668 *    return bar(f);
11669 * }
11670 * int bar(int b)
11671 * {
11672 *    ...
11673 * }
11674 * foo() will be verified first for R1=any_scalar_value. During verification it
11675 * will be assumed that bar() already verified successfully and call to bar()
11676 * from foo() will be checked for type match only. Later bar() will be verified
11677 * independently to check that it's safe for R1=any_scalar_value.
11678 */
11679static int do_check_subprogs(struct bpf_verifier_env *env)
11680{
11681    struct bpf_prog_aux *aux = env->prog->aux;
11682    int i, ret;
11683
11684    if (!aux->func_info) {
11685        return 0;
11686    }
11687
11688    for (i = 1; i < env->subprog_cnt; i++) {
11689        if (aux->func_info_aux[i].linkage != BTF_FUNC_GLOBAL) {
11690            continue;
11691        }
11692        env->insn_idx = env->subprog_info[i].start;
11693        WARN_ON_ONCE(env->insn_idx == 0);
11694        ret = do_check_common(env, i);
11695        if (ret) {
11696            return ret;
11697        } else if (env->log.level & BPF_LOG_LEVEL) {
11698            verbose(env, "Func#%d is safe for any args that match its prototype\n", i);
11699        }
11700    }
11701    return 0;
11702}
11703
11704static int do_check_main(struct bpf_verifier_env *env)
11705{
11706    int ret;
11707
11708    env->insn_idx = 0;
11709    ret = do_check_common(env, 0);
11710    if (!ret) {
11711        env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
11712    }
11713    return ret;
11714}
11715
11716static void print_verification_stats(struct bpf_verifier_env *env)
11717{
11718    int i;
11719
11720    if (env->log.level & BPF_LOG_STATS) {
11721        verbose(env, "verification time %lld usec\n", div_u64(env->verification_time, VERIFIER_ONETHOUSAND));
11722        verbose(env, "stack depth ");
11723        for (i = 0; i < env->subprog_cnt; i++) {
11724            u32 depth = env->subprog_info[i].stack_depth;
11725
11726            verbose(env, "%d", depth);
11727            if (i + 1 < env->subprog_cnt) {
11728                verbose(env, "+");
11729            }
11730        }
11731        verbose(env, "\n");
11732    }
11733    verbose(env,
11734            "processed %d insns (limit %d) max_states_per_insn %d "
11735            "total_states %d peak_states %d mark_read %d\n",
11736            env->insn_processed, BPF_COMPLEXITY_LIMIT_INSNS, env->max_states_per_insn, env->total_states,
11737            env->peak_states, env->longest_mark_read_walk);
11738}
11739
11740static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
11741{
11742    const struct btf_type *t, *func_proto;
11743    const struct bpf_struct_ops *st_ops;
11744    const struct btf_member *member;
11745    struct bpf_prog *prog = env->prog;
11746    u32 btf_id, member_idx;
11747    const char *mname;
11748
11749    if (!prog->gpl_compatible) {
11750        verbose(env, "struct ops programs must have a GPL compatible license\n");
11751        return -EINVAL;
11752    }
11753
11754    btf_id = prog->aux->attach_btf_id;
11755    st_ops = bpf_struct_ops_find(btf_id);
11756    if (!st_ops) {
11757        verbose(env, "attach_btf_id %u is not a supported struct\n", btf_id);
11758        return -ENOTSUPP;
11759    }
11760
11761    t = st_ops->type;
11762    member_idx = prog->expected_attach_type;
11763    if (member_idx >= btf_type_vlen(t)) {
11764        verbose(env, "attach to invalid member idx %u of struct %s\n", member_idx, st_ops->name);
11765        return -EINVAL;
11766    }
11767
11768    member = &btf_type_member(t)[member_idx];
11769    mname = btf_name_by_offset(btf_vmlinux, member->name_off);
11770    func_proto = btf_type_resolve_func_ptr(btf_vmlinux, member->type, NULL);
11771    if (!func_proto) {
11772        verbose(env, "attach to invalid member %s(@idx %u) of struct %s\n", mname, member_idx, st_ops->name);
11773        return -EINVAL;
11774    }
11775
11776    if (st_ops->check_member) {
11777        int err = st_ops->check_member(t, member);
11778        if (err) {
11779            verbose(env, "attach to unsupported member %s of struct %s\n", mname, st_ops->name);
11780            return err;
11781        }
11782    }
11783
11784    prog->aux->attach_func_proto = func_proto;
11785    prog->aux->attach_func_name = mname;
11786    env->ops = st_ops->verifier_ops;
11787
11788    return 0;
11789}
11790#define SECURITY_PREFIX "security_"
11791
11792static int check_attach_modify_return(unsigned long addr, const char *func_name)
11793{
11794    if (within_error_injection_list(addr) || !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) {
11795        return 0;
11796    }
11797    return -EINVAL;
11798}
11799
11800/* non exhaustive list of sleepable bpf_lsm_*() functions */
11801BTF_SET_START(btf_sleepable_lsm_hooks)
11802#ifdef CONFIG_BPF_LSM
11803BTF_ID(func, bpf_lsm_bprm_committed_creds)
11804#else
11805BTF_ID_UNUSED
11806#endif
11807BTF_SET_END(btf_sleepable_lsm_hooks)
11808
11809static int check_sleepable_lsm_hook(u32 btf_id)
11810{
11811    return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
11812}
11813
11814/* list of non-sleepable functions that are otherwise on
11815 * ALLOW_ERROR_INJECTION list
11816 */
11817BTF_SET_START(btf_non_sleepable_error_inject)
11818/* Three functions below can be called from sleepable and non-sleepable context.
11819 * Assume non-sleepable from bpf safety point of view.
11820 */
11821BTF_ID(func, __add_to_page_cache_locked)
11822BTF_ID(func, should_fail_alloc_page)
11823BTF_ID(func, should_failslab)
11824BTF_SET_END(btf_non_sleepable_error_inject)
11825
11826static int check_non_sleepable_error_inject(u32 btf_id)
11827{
11828    return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
11829}
11830
11831int bpf_check_attach_target(struct bpf_verifier_log *log, const struct bpf_prog *prog, const struct bpf_prog *tgt_prog,
11832                            u32 btf_id, struct bpf_attach_target_info *tgt_info)
11833{
11834    bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
11835    const char prefix[] = "btf_trace_";
11836    int ret = 0, subprog = -1, i;
11837    const struct btf_type *t;
11838    bool conservative = true;
11839    const char *tname;
11840    struct btf *btf;
11841    long addr = 0;
11842
11843    if (!btf_id) {
11844        bpf_log(log, "Tracing programs must provide btf_id\n");
11845        return -EINVAL;
11846    }
11847    btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
11848    if (!btf) {
11849        bpf_log(log, "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
11850        return -EINVAL;
11851    }
11852    t = btf_type_by_id(btf, btf_id);
11853    if (!t) {
11854        bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
11855        return -EINVAL;
11856    }
11857    tname = btf_name_by_offset(btf, t->name_off);
11858    if (!tname) {
11859        bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
11860        return -EINVAL;
11861    }
11862    if (tgt_prog) {
11863        struct bpf_prog_aux *aux = tgt_prog->aux;
11864
11865        for (i = 0; i < aux->func_info_cnt; i++) {
11866            if (aux->func_info[i].type_id == btf_id) {
11867                subprog = i;
11868                break;
11869            }
11870        }
11871        if (subprog == -1) {
11872            bpf_log(log, "Subprog %s doesn't exist\n", tname);
11873            return -EINVAL;
11874        }
11875        conservative = aux->func_info_aux[subprog].unreliable;
11876        if (prog_extension) {
11877            if (conservative) {
11878                bpf_log(log, "Cannot replace static functions\n");
11879                return -EINVAL;
11880            }
11881            if (!prog->jit_requested) {
11882                bpf_log(log, "Extension programs should be JITed\n");
11883                return -EINVAL;
11884            }
11885        }
11886        if (!tgt_prog->jited) {
11887            bpf_log(log, "Can attach to only JITed progs\n");
11888            return -EINVAL;
11889        }
11890        if (tgt_prog->type == prog->type) {
11891            /* Cannot fentry/fexit another fentry/fexit program.
11892             * Cannot attach program extension to another extension.
11893             * It's ok to attach fentry/fexit to extension program.
11894             */
11895            bpf_log(log, "Cannot recursively attach\n");
11896            return -EINVAL;
11897        }
11898        if (tgt_prog->type == BPF_PROG_TYPE_TRACING && prog_extension &&
11899            (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) {
11900            /* Program extensions can extend all program types
11901             * except fentry/fexit. The reason is the following.
11902             * The fentry/fexit programs are used for performance
11903             * analysis, stats and can be attached to any program
11904             * type except themselves. When extension program is
11905             * replacing XDP function it is necessary to allow
11906             * performance analysis of all functions. Both original
11907             * XDP program and its program extension. Hence
11908             * attaching fentry/fexit to BPF_PROG_TYPE_EXT is
11909             * allowed. If extending of fentry/fexit was allowed it
11910             * would be possible to create long call chain
11911             * fentry->extension->fentry->extension beyond
11912             * reasonable stack size. Hence extending fentry is not
11913             * allowed.
11914             */
11915            bpf_log(log, "Cannot extend fentry/fexit\n");
11916            return -EINVAL;
11917        }
11918    } else {
11919        if (prog_extension) {
11920            bpf_log(log, "Cannot replace kernel functions\n");
11921            return -EINVAL;
11922        }
11923    }
11924
11925    switch (prog->expected_attach_type) {
11926        case BPF_TRACE_RAW_TP:
11927            if (tgt_prog) {
11928                bpf_log(log, "Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
11929                return -EINVAL;
11930            }
11931            if (!btf_type_is_typedef(t)) {
11932                bpf_log(log, "attach_btf_id %u is not a typedef\n", btf_id);
11933                return -EINVAL;
11934            }
11935            if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
11936                bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", btf_id, tname);
11937                return -EINVAL;
11938            }
11939            tname += sizeof(prefix) - 1;
11940            t = btf_type_by_id(btf, t->type);
11941            if (!btf_type_is_ptr(t)) {
11942                /* should never happen in valid vmlinux build */
11943                return -EINVAL;
11944            }
11945            t = btf_type_by_id(btf, t->type);
11946            if (!btf_type_is_func_proto(t)) {
11947                /* should never happen in valid vmlinux build */
11948                return -EINVAL;
11949            }
11950
11951            break;
11952        case BPF_TRACE_ITER:
11953            if (!btf_type_is_func(t)) {
11954                bpf_log(log, "attach_btf_id %u is not a function\n", btf_id);
11955                return -EINVAL;
11956            }
11957            t = btf_type_by_id(btf, t->type);
11958            if (!btf_type_is_func_proto(t)) {
11959                return -EINVAL;
11960            }
11961            ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
11962            if (ret) {
11963                return ret;
11964            }
11965            break;
11966        default:
11967            if (!prog_extension) {
11968                return -EINVAL;
11969            }
11970            fallthrough;
11971        case BPF_MODIFY_RETURN:
11972        case BPF_LSM_MAC:
11973        case BPF_TRACE_FENTRY:
11974        case BPF_TRACE_FEXIT:
11975            if (!btf_type_is_func(t)) {
11976                bpf_log(log, "attach_btf_id %u is not a function\n", btf_id);
11977                return -EINVAL;
11978            }
11979            if (prog_extension && btf_check_type_match(log, prog, btf, t)) {
11980                return -EINVAL;
11981            }
11982            t = btf_type_by_id(btf, t->type);
11983            if (!btf_type_is_func_proto(t)) {
11984                return -EINVAL;
11985            }
11986
11987            if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
11988                (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
11989                 prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) {
11990                return -EINVAL;
11991            }
11992
11993            if (tgt_prog && conservative) {
11994                t = NULL;
11995            }
11996
11997            ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
11998            if (ret < 0) {
11999                return ret;
12000            }
12001
12002            if (tgt_prog) {
12003                if (subprog == 0) {
12004                    addr = (long)tgt_prog->bpf_func;
12005                } else {
12006                    addr = (long)tgt_prog->aux->func[subprog]->bpf_func;
12007                }
12008            } else {
12009                addr = kallsyms_lookup_name(tname);
12010                if (!addr) {
12011                    bpf_log(log, "The address of function %s cannot be found\n", tname);
12012                    return -ENOENT;
12013                }
12014            }
12015
12016            if (prog->aux->sleepable) {
12017                ret = -EINVAL;
12018                switch (prog->type) {
12019                    case BPF_PROG_TYPE_TRACING:
12020                        /* fentry/fexit/fmod_ret progs can be sleepable only if they are
12021                         * attached to ALLOW_ERROR_INJECTION and are not in denylist.
12022                         */
12023                        if (!check_non_sleepable_error_inject(btf_id) && within_error_injection_list(addr)) {
12024                            ret = 0;
12025                        }
12026                        break;
12027                    case BPF_PROG_TYPE_LSM:
12028                        /* LSM progs check that they are attached to bpf_lsm_*() funcs.
12029                         * Only some of them are sleepable.
12030                         */
12031                        if (check_sleepable_lsm_hook(btf_id)) {
12032                            ret = 0;
12033                        }
12034                        break;
12035                    default:
12036                        break;
12037                }
12038                if (ret) {
12039                    bpf_log(log, "%s is not sleepable\n", tname);
12040                    return ret;
12041                }
12042            } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
12043                if (tgt_prog) {
12044                    bpf_log(log, "can't modify return codes of BPF programs\n");
12045                    return -EINVAL;
12046                }
12047                ret = check_attach_modify_return(addr, tname);
12048                if (ret) {
12049                    bpf_log(log, "%s() is not modifiable\n", tname);
12050                    return ret;
12051                }
12052            }
12053
12054            break;
12055    }
12056    tgt_info->tgt_addr = addr;
12057    tgt_info->tgt_name = tname;
12058    tgt_info->tgt_type = t;
12059    return 0;
12060}
12061
12062static int check_attach_btf_id(struct bpf_verifier_env *env)
12063{
12064    struct bpf_prog *prog = env->prog;
12065    struct bpf_prog *tgt_prog = prog->aux->dst_prog;
12066    struct bpf_attach_target_info tgt_info = {};
12067    u32 btf_id = prog->aux->attach_btf_id;
12068    struct bpf_trampoline *tr;
12069    int ret;
12070    u64 key;
12071
12072    if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM) {
12073        verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
12074        return -EINVAL;
12075    }
12076
12077    if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
12078        return check_struct_ops_btf_id(env);
12079    }
12080
12081    if (prog->type != BPF_PROG_TYPE_TRACING && prog->type != BPF_PROG_TYPE_LSM && prog->type != BPF_PROG_TYPE_EXT) {
12082        return 0;
12083    }
12084
12085    ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
12086    if (ret) {
12087        return ret;
12088    }
12089
12090    if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
12091        /* to make freplace equivalent to their targets, they need to
12092         * inherit env->ops and expected_attach_type for the rest of the
12093         * verification
12094         */
12095        env->ops = bpf_verifier_ops[tgt_prog->type];
12096        prog->expected_attach_type = tgt_prog->expected_attach_type;
12097    }
12098
12099    /* store info about the attachment target that will be used later */
12100    prog->aux->attach_func_proto = tgt_info.tgt_type;
12101    prog->aux->attach_func_name = tgt_info.tgt_name;
12102
12103    if (tgt_prog) {
12104        prog->aux->saved_dst_prog_type = tgt_prog->type;
12105        prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
12106    }
12107
12108    if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
12109        prog->aux->attach_btf_trace = true;
12110        return 0;
12111    } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
12112        if (!bpf_iter_prog_supported(prog)) {
12113            return -EINVAL;
12114        }
12115        return 0;
12116    }
12117
12118    if (prog->type == BPF_PROG_TYPE_LSM) {
12119        ret = bpf_lsm_verify_prog(&env->log, prog);
12120        if (ret < 0) {
12121            return ret;
12122        }
12123    }
12124
12125    key = bpf_trampoline_compute_key(tgt_prog, btf_id);
12126    tr = bpf_trampoline_get(key, &tgt_info);
12127    if (!tr) {
12128        return -ENOMEM;
12129    }
12130
12131    prog->aux->dst_trampoline = tr;
12132    return 0;
12133}
12134
12135struct btf *bpf_get_btf_vmlinux(void)
12136{
12137    if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
12138        mutex_lock(&bpf_verifier_lock);
12139        if (!btf_vmlinux) {
12140            btf_vmlinux = btf_parse_vmlinux();
12141        }
12142        mutex_unlock(&bpf_verifier_lock);
12143    }
12144    return btf_vmlinux;
12145}
12146
12147int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, union bpf_attr __user *uattr)
12148{
12149    u64 start_time = ktime_get_ns();
12150    struct bpf_verifier_env *env;
12151    struct bpf_verifier_log *log;
12152    int i, len, ret = -EINVAL;
12153    bool is_priv;
12154
12155    /* no program is valid */
12156    if (ARRAY_SIZE(bpf_verifier_ops) == 0) {
12157        return -EINVAL;
12158    }
12159
12160    /* 'struct bpf_verifier_env' can be global, but since it's not small,
12161     * allocate/free it every time bpf_check() is called
12162     */
12163    env = kzalloc(sizeof(struct bpf_verifier_env), GFP_KERNEL);
12164    if (!env) {
12165        return -ENOMEM;
12166    }
12167    log = &env->log;
12168
12169    len = (*prog)->len;
12170    env->insn_aux_data = vzalloc(array_size(sizeof(struct bpf_insn_aux_data), len));
12171    ret = -ENOMEM;
12172    if (!env->insn_aux_data) {
12173        goto err_free_env;
12174    }
12175    for (i = 0; i < len; i++) {
12176        env->insn_aux_data[i].orig_idx = i;
12177    }
12178    env->prog = *prog;
12179    env->ops = bpf_verifier_ops[env->prog->type];
12180    is_priv = bpf_capable();
12181
12182    bpf_get_btf_vmlinux();
12183
12184    /* grab the mutex to protect few globals used by verifier */
12185    if (!is_priv) {
12186        mutex_lock(&bpf_verifier_lock);
12187    }
12188
12189    if (attr->log_level || attr->log_buf || attr->log_size) {
12190        /* user requested verbose verifier output
12191         * and supplied buffer to store the verification trace
12192         */
12193        log->level = attr->log_level;
12194        log->ubuf = (char __user *)(unsigned long)attr->log_buf;
12195        log->len_total = attr->log_size;
12196
12197        /* log attributes have to be sane */
12198        if (!bpf_verifier_log_attr_valid(log)) {
12199            ret = -EINVAL;
12200            goto err_unlock;
12201        }
12202    }
12203
12204    if (IS_ERR(btf_vmlinux)) {
12205        /* Either gcc or pahole or kernel are broken. */
12206        verbose(env, "in-kernel BTF is malformed\n");
12207        ret = PTR_ERR(btf_vmlinux);
12208        goto skip_full_check;
12209    }
12210
12211    env->strict_alignment = !!(attr->prog_flags & BPF_F_STRICT_ALIGNMENT);
12212    if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) {
12213        env->strict_alignment = true;
12214    }
12215    if (attr->prog_flags & BPF_F_ANY_ALIGNMENT) {
12216        env->strict_alignment = false;
12217    }
12218
12219    env->allow_ptr_leaks = bpf_allow_ptr_leaks();
12220    env->allow_uninit_stack = bpf_allow_uninit_stack();
12221    env->allow_ptr_to_map_access = bpf_allow_ptr_to_map_access();
12222    env->bypass_spec_v1 = bpf_bypass_spec_v1();
12223    env->bypass_spec_v4 = bpf_bypass_spec_v4();
12224    env->bpf_capable = bpf_capable();
12225
12226    if (is_priv) {
12227        env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
12228    }
12229
12230    env->explored_states = kvcalloc(state_htab_size(env), sizeof(struct bpf_verifier_state_list *), GFP_USER);
12231    ret = -ENOMEM;
12232    if (!env->explored_states) {
12233        goto skip_full_check;
12234    }
12235
12236    ret = check_subprogs(env);
12237    if (ret < 0) {
12238        goto skip_full_check;
12239    }
12240
12241    ret = check_btf_info(env, attr, uattr);
12242    if (ret < 0) {
12243        goto skip_full_check;
12244    }
12245
12246    ret = check_attach_btf_id(env);
12247    if (ret) {
12248        goto skip_full_check;
12249    }
12250
12251    ret = resolve_pseudo_ldimm64(env);
12252    if (ret < 0) {
12253        goto skip_full_check;
12254    }
12255
12256    if (bpf_prog_is_dev_bound(env->prog->aux)) {
12257        ret = bpf_prog_offload_verifier_prep(env->prog);
12258        if (ret) {
12259            goto skip_full_check;
12260        }
12261    }
12262
12263    ret = check_cfg(env);
12264    if (ret < 0) {
12265        goto skip_full_check;
12266    }
12267
12268    ret = do_check_subprogs(env);
12269    ret = ret ?: do_check_main(env);
12270
12271    if (ret == 0 && bpf_prog_is_dev_bound(env->prog->aux)) {
12272        ret = bpf_prog_offload_finalize(env);
12273    }
12274
12275skip_full_check:
12276    kvfree(env->explored_states);
12277
12278    if (ret == 0) {
12279        ret = check_max_stack_depth(env);
12280    }
12281
12282    /* instruction rewrites happen after this point */
12283    if (is_priv) {
12284        if (ret == 0) {
12285            opt_hard_wire_dead_code_branches(env);
12286        }
12287        if (ret == 0) {
12288            ret = opt_remove_dead_code(env);
12289        }
12290        if (ret == 0) {
12291            ret = opt_remove_nops(env);
12292        }
12293    } else {
12294        if (ret == 0) {
12295            sanitize_dead_code(env);
12296        }
12297    }
12298
12299    if (ret == 0) {
12300        /* program is valid, convert *(u32*)(ctx + off) accesses */
12301        ret = convert_ctx_accesses(env);
12302    }
12303
12304    if (ret == 0) {
12305        ret = fixup_bpf_calls(env);
12306    }
12307
12308    /* do 32-bit optimization after insn patching has done so those patched
12309     * insns could be handled correctly.
12310     */
12311    if (ret == 0 && !bpf_prog_is_dev_bound(env->prog->aux)) {
12312        ret = opt_subreg_zext_lo32_rnd_hi32(env, attr);
12313        env->prog->aux->verifier_zext = bpf_jit_needs_zext() ? !ret : false;
12314    }
12315
12316    if (ret == 0) {
12317        ret = fixup_call_args(env);
12318    }
12319
12320    env->verification_time = ktime_get_ns() - start_time;
12321    print_verification_stats(env);
12322
12323    if (log->level && bpf_verifier_log_full(log)) {
12324        ret = -ENOSPC;
12325    }
12326    if (log->level && !log->ubuf) {
12327        ret = -EFAULT;
12328        goto err_release_maps;
12329    }
12330
12331    if (ret == 0 && env->used_map_cnt) {
12332        /* if program passed verifier, update used_maps in bpf_prog_info */
12333        env->prog->aux->used_maps = kmalloc_array(env->used_map_cnt, sizeof(env->used_maps[0]), GFP_KERNEL);
12334
12335        if (!env->prog->aux->used_maps) {
12336            ret = -ENOMEM;
12337            goto err_release_maps;
12338        }
12339
12340        memcpy(env->prog->aux->used_maps, env->used_maps, sizeof(env->used_maps[0]) * env->used_map_cnt);
12341        env->prog->aux->used_map_cnt = env->used_map_cnt;
12342
12343        /* program is valid. Convert pseudo bpf_ld_imm64 into generic
12344         * bpf_ld_imm64 instructions
12345         */
12346        convert_pseudo_ld_imm64(env);
12347    }
12348
12349    if (ret == 0) {
12350        adjust_btf_func(env);
12351    }
12352
12353err_release_maps:
12354    if (!env->prog->aux->used_maps) {
12355        /* if we didn't copy map pointers into bpf_prog_info, release
12356         * them now. Otherwise free_used_maps() will release them.
12357         */
12358        release_maps(env);
12359    }
12360
12361    /* extension progs temporarily inherit the attach_type of their targets
12362       for verification purposes, so set it back to zero before returning
12363     */
12364    if (env->prog->type == BPF_PROG_TYPE_EXT) {
12365        env->prog->expected_attach_type = 0;
12366    }
12367
12368    *prog = env->prog;
12369err_unlock:
12370    if (!is_priv) {
12371        mutex_unlock(&bpf_verifier_lock);
12372    }
12373    vfree(env->insn_aux_data);
12374err_free_env:
12375    kfree(env);
12376    return ret;
12377}
12378