162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* Copyright (c) 2019 Facebook */ 362306a36Sopenharmony_ci 462306a36Sopenharmony_ci#include <linux/init.h> 562306a36Sopenharmony_ci#include <linux/types.h> 662306a36Sopenharmony_ci#include <linux/bpf_verifier.h> 762306a36Sopenharmony_ci#include <linux/bpf.h> 862306a36Sopenharmony_ci#include <linux/btf.h> 962306a36Sopenharmony_ci#include <linux/btf_ids.h> 1062306a36Sopenharmony_ci#include <linux/filter.h> 1162306a36Sopenharmony_ci#include <net/tcp.h> 1262306a36Sopenharmony_ci#include <net/bpf_sk_storage.h> 1362306a36Sopenharmony_ci 1462306a36Sopenharmony_ci/* "extern" is to avoid sparse warning. It is only used in bpf_struct_ops.c. */ 1562306a36Sopenharmony_ciextern struct bpf_struct_ops bpf_tcp_congestion_ops; 1662306a36Sopenharmony_ci 1762306a36Sopenharmony_cistatic u32 unsupported_ops[] = { 1862306a36Sopenharmony_ci offsetof(struct tcp_congestion_ops, get_info), 1962306a36Sopenharmony_ci}; 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistatic const struct btf_type *tcp_sock_type; 2262306a36Sopenharmony_cistatic u32 tcp_sock_id, sock_id; 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_cistatic int bpf_tcp_ca_init(struct btf *btf) 2562306a36Sopenharmony_ci{ 2662306a36Sopenharmony_ci s32 type_id; 2762306a36Sopenharmony_ci 2862306a36Sopenharmony_ci type_id = btf_find_by_name_kind(btf, "sock", BTF_KIND_STRUCT); 2962306a36Sopenharmony_ci if (type_id < 0) 3062306a36Sopenharmony_ci return -EINVAL; 3162306a36Sopenharmony_ci sock_id = type_id; 3262306a36Sopenharmony_ci 3362306a36Sopenharmony_ci type_id = btf_find_by_name_kind(btf, "tcp_sock", BTF_KIND_STRUCT); 3462306a36Sopenharmony_ci if (type_id < 0) 3562306a36Sopenharmony_ci return -EINVAL; 3662306a36Sopenharmony_ci tcp_sock_id = type_id; 3762306a36Sopenharmony_ci tcp_sock_type = btf_type_by_id(btf, tcp_sock_id); 3862306a36Sopenharmony_ci 3962306a36Sopenharmony_ci return 0; 4062306a36Sopenharmony_ci} 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_cistatic bool is_unsupported(u32 member_offset) 4362306a36Sopenharmony_ci{ 4462306a36Sopenharmony_ci unsigned int i; 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_ci for (i = 0; i < ARRAY_SIZE(unsupported_ops); i++) { 4762306a36Sopenharmony_ci if (member_offset == unsupported_ops[i]) 4862306a36Sopenharmony_ci return true; 4962306a36Sopenharmony_ci } 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci return false; 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistatic bool bpf_tcp_ca_is_valid_access(int off, int size, 5562306a36Sopenharmony_ci enum bpf_access_type type, 5662306a36Sopenharmony_ci const struct bpf_prog *prog, 5762306a36Sopenharmony_ci struct bpf_insn_access_aux *info) 5862306a36Sopenharmony_ci{ 5962306a36Sopenharmony_ci if (!bpf_tracing_btf_ctx_access(off, size, type, prog, info)) 6062306a36Sopenharmony_ci return false; 6162306a36Sopenharmony_ci 6262306a36Sopenharmony_ci if (base_type(info->reg_type) == PTR_TO_BTF_ID && 6362306a36Sopenharmony_ci !bpf_type_has_unsafe_modifiers(info->reg_type) && 6462306a36Sopenharmony_ci info->btf_id == sock_id) 6562306a36Sopenharmony_ci /* promote it to tcp_sock */ 6662306a36Sopenharmony_ci info->btf_id = tcp_sock_id; 6762306a36Sopenharmony_ci 6862306a36Sopenharmony_ci return true; 6962306a36Sopenharmony_ci} 7062306a36Sopenharmony_ci 7162306a36Sopenharmony_cistatic int bpf_tcp_ca_btf_struct_access(struct bpf_verifier_log *log, 7262306a36Sopenharmony_ci const struct bpf_reg_state *reg, 7362306a36Sopenharmony_ci int off, int size) 7462306a36Sopenharmony_ci{ 7562306a36Sopenharmony_ci const struct btf_type *t; 7662306a36Sopenharmony_ci size_t end; 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ci t = btf_type_by_id(reg->btf, reg->btf_id); 7962306a36Sopenharmony_ci if (t != tcp_sock_type) { 8062306a36Sopenharmony_ci bpf_log(log, "only read is supported\n"); 8162306a36Sopenharmony_ci return -EACCES; 8262306a36Sopenharmony_ci } 8362306a36Sopenharmony_ci 8462306a36Sopenharmony_ci switch (off) { 8562306a36Sopenharmony_ci case offsetof(struct sock, sk_pacing_rate): 8662306a36Sopenharmony_ci end = offsetofend(struct sock, sk_pacing_rate); 8762306a36Sopenharmony_ci break; 8862306a36Sopenharmony_ci case offsetof(struct sock, sk_pacing_status): 8962306a36Sopenharmony_ci end = offsetofend(struct sock, sk_pacing_status); 9062306a36Sopenharmony_ci break; 9162306a36Sopenharmony_ci case bpf_ctx_range(struct inet_connection_sock, icsk_ca_priv): 9262306a36Sopenharmony_ci end = offsetofend(struct inet_connection_sock, icsk_ca_priv); 9362306a36Sopenharmony_ci break; 9462306a36Sopenharmony_ci case offsetof(struct inet_connection_sock, icsk_ack.pending): 9562306a36Sopenharmony_ci end = offsetofend(struct inet_connection_sock, 9662306a36Sopenharmony_ci icsk_ack.pending); 9762306a36Sopenharmony_ci break; 9862306a36Sopenharmony_ci case offsetof(struct tcp_sock, snd_cwnd): 9962306a36Sopenharmony_ci end = offsetofend(struct tcp_sock, snd_cwnd); 10062306a36Sopenharmony_ci break; 10162306a36Sopenharmony_ci case offsetof(struct tcp_sock, snd_cwnd_cnt): 10262306a36Sopenharmony_ci end = offsetofend(struct tcp_sock, snd_cwnd_cnt); 10362306a36Sopenharmony_ci break; 10462306a36Sopenharmony_ci case offsetof(struct tcp_sock, snd_ssthresh): 10562306a36Sopenharmony_ci end = offsetofend(struct tcp_sock, snd_ssthresh); 10662306a36Sopenharmony_ci break; 10762306a36Sopenharmony_ci case offsetof(struct tcp_sock, ecn_flags): 10862306a36Sopenharmony_ci end = offsetofend(struct tcp_sock, ecn_flags); 10962306a36Sopenharmony_ci break; 11062306a36Sopenharmony_ci case offsetof(struct tcp_sock, app_limited): 11162306a36Sopenharmony_ci end = offsetofend(struct tcp_sock, app_limited); 11262306a36Sopenharmony_ci break; 11362306a36Sopenharmony_ci default: 11462306a36Sopenharmony_ci bpf_log(log, "no write support to tcp_sock at off %d\n", off); 11562306a36Sopenharmony_ci return -EACCES; 11662306a36Sopenharmony_ci } 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci if (off + size > end) { 11962306a36Sopenharmony_ci bpf_log(log, 12062306a36Sopenharmony_ci "write access at off %d with size %d beyond the member of tcp_sock ended at %zu\n", 12162306a36Sopenharmony_ci off, size, end); 12262306a36Sopenharmony_ci return -EACCES; 12362306a36Sopenharmony_ci } 12462306a36Sopenharmony_ci 12562306a36Sopenharmony_ci return 0; 12662306a36Sopenharmony_ci} 12762306a36Sopenharmony_ci 12862306a36Sopenharmony_ciBPF_CALL_2(bpf_tcp_send_ack, struct tcp_sock *, tp, u32, rcv_nxt) 12962306a36Sopenharmony_ci{ 13062306a36Sopenharmony_ci /* bpf_tcp_ca prog cannot have NULL tp */ 13162306a36Sopenharmony_ci __tcp_send_ack((struct sock *)tp, rcv_nxt); 13262306a36Sopenharmony_ci return 0; 13362306a36Sopenharmony_ci} 13462306a36Sopenharmony_ci 13562306a36Sopenharmony_cistatic const struct bpf_func_proto bpf_tcp_send_ack_proto = { 13662306a36Sopenharmony_ci .func = bpf_tcp_send_ack, 13762306a36Sopenharmony_ci .gpl_only = false, 13862306a36Sopenharmony_ci /* In case we want to report error later */ 13962306a36Sopenharmony_ci .ret_type = RET_INTEGER, 14062306a36Sopenharmony_ci .arg1_type = ARG_PTR_TO_BTF_ID, 14162306a36Sopenharmony_ci .arg1_btf_id = &tcp_sock_id, 14262306a36Sopenharmony_ci .arg2_type = ARG_ANYTHING, 14362306a36Sopenharmony_ci}; 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_cistatic u32 prog_ops_moff(const struct bpf_prog *prog) 14662306a36Sopenharmony_ci{ 14762306a36Sopenharmony_ci const struct btf_member *m; 14862306a36Sopenharmony_ci const struct btf_type *t; 14962306a36Sopenharmony_ci u32 midx; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci midx = prog->expected_attach_type; 15262306a36Sopenharmony_ci t = bpf_tcp_congestion_ops.type; 15362306a36Sopenharmony_ci m = &btf_type_member(t)[midx]; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci return __btf_member_bit_offset(t, m) / 8; 15662306a36Sopenharmony_ci} 15762306a36Sopenharmony_ci 15862306a36Sopenharmony_cistatic const struct bpf_func_proto * 15962306a36Sopenharmony_cibpf_tcp_ca_get_func_proto(enum bpf_func_id func_id, 16062306a36Sopenharmony_ci const struct bpf_prog *prog) 16162306a36Sopenharmony_ci{ 16262306a36Sopenharmony_ci switch (func_id) { 16362306a36Sopenharmony_ci case BPF_FUNC_tcp_send_ack: 16462306a36Sopenharmony_ci return &bpf_tcp_send_ack_proto; 16562306a36Sopenharmony_ci case BPF_FUNC_sk_storage_get: 16662306a36Sopenharmony_ci return &bpf_sk_storage_get_proto; 16762306a36Sopenharmony_ci case BPF_FUNC_sk_storage_delete: 16862306a36Sopenharmony_ci return &bpf_sk_storage_delete_proto; 16962306a36Sopenharmony_ci case BPF_FUNC_setsockopt: 17062306a36Sopenharmony_ci /* Does not allow release() to call setsockopt. 17162306a36Sopenharmony_ci * release() is called when the current bpf-tcp-cc 17262306a36Sopenharmony_ci * is retiring. It is not allowed to call 17362306a36Sopenharmony_ci * setsockopt() to make further changes which 17462306a36Sopenharmony_ci * may potentially allocate new resources. 17562306a36Sopenharmony_ci */ 17662306a36Sopenharmony_ci if (prog_ops_moff(prog) != 17762306a36Sopenharmony_ci offsetof(struct tcp_congestion_ops, release)) 17862306a36Sopenharmony_ci return &bpf_sk_setsockopt_proto; 17962306a36Sopenharmony_ci return NULL; 18062306a36Sopenharmony_ci case BPF_FUNC_getsockopt: 18162306a36Sopenharmony_ci /* Since get/setsockopt is usually expected to 18262306a36Sopenharmony_ci * be available together, disable getsockopt for 18362306a36Sopenharmony_ci * release also to avoid usage surprise. 18462306a36Sopenharmony_ci * The bpf-tcp-cc already has a more powerful way 18562306a36Sopenharmony_ci * to read tcp_sock from the PTR_TO_BTF_ID. 18662306a36Sopenharmony_ci */ 18762306a36Sopenharmony_ci if (prog_ops_moff(prog) != 18862306a36Sopenharmony_ci offsetof(struct tcp_congestion_ops, release)) 18962306a36Sopenharmony_ci return &bpf_sk_getsockopt_proto; 19062306a36Sopenharmony_ci return NULL; 19162306a36Sopenharmony_ci case BPF_FUNC_ktime_get_coarse_ns: 19262306a36Sopenharmony_ci return &bpf_ktime_get_coarse_ns_proto; 19362306a36Sopenharmony_ci default: 19462306a36Sopenharmony_ci return bpf_base_func_proto(func_id); 19562306a36Sopenharmony_ci } 19662306a36Sopenharmony_ci} 19762306a36Sopenharmony_ci 19862306a36Sopenharmony_ciBTF_SET8_START(bpf_tcp_ca_check_kfunc_ids) 19962306a36Sopenharmony_ciBTF_ID_FLAGS(func, tcp_reno_ssthresh) 20062306a36Sopenharmony_ciBTF_ID_FLAGS(func, tcp_reno_cong_avoid) 20162306a36Sopenharmony_ciBTF_ID_FLAGS(func, tcp_reno_undo_cwnd) 20262306a36Sopenharmony_ciBTF_ID_FLAGS(func, tcp_slow_start) 20362306a36Sopenharmony_ciBTF_ID_FLAGS(func, tcp_cong_avoid_ai) 20462306a36Sopenharmony_ciBTF_SET8_END(bpf_tcp_ca_check_kfunc_ids) 20562306a36Sopenharmony_ci 20662306a36Sopenharmony_cistatic const struct btf_kfunc_id_set bpf_tcp_ca_kfunc_set = { 20762306a36Sopenharmony_ci .owner = THIS_MODULE, 20862306a36Sopenharmony_ci .set = &bpf_tcp_ca_check_kfunc_ids, 20962306a36Sopenharmony_ci}; 21062306a36Sopenharmony_ci 21162306a36Sopenharmony_cistatic const struct bpf_verifier_ops bpf_tcp_ca_verifier_ops = { 21262306a36Sopenharmony_ci .get_func_proto = bpf_tcp_ca_get_func_proto, 21362306a36Sopenharmony_ci .is_valid_access = bpf_tcp_ca_is_valid_access, 21462306a36Sopenharmony_ci .btf_struct_access = bpf_tcp_ca_btf_struct_access, 21562306a36Sopenharmony_ci}; 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_cistatic int bpf_tcp_ca_init_member(const struct btf_type *t, 21862306a36Sopenharmony_ci const struct btf_member *member, 21962306a36Sopenharmony_ci void *kdata, const void *udata) 22062306a36Sopenharmony_ci{ 22162306a36Sopenharmony_ci const struct tcp_congestion_ops *utcp_ca; 22262306a36Sopenharmony_ci struct tcp_congestion_ops *tcp_ca; 22362306a36Sopenharmony_ci u32 moff; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci utcp_ca = (const struct tcp_congestion_ops *)udata; 22662306a36Sopenharmony_ci tcp_ca = (struct tcp_congestion_ops *)kdata; 22762306a36Sopenharmony_ci 22862306a36Sopenharmony_ci moff = __btf_member_bit_offset(t, member) / 8; 22962306a36Sopenharmony_ci switch (moff) { 23062306a36Sopenharmony_ci case offsetof(struct tcp_congestion_ops, flags): 23162306a36Sopenharmony_ci if (utcp_ca->flags & ~TCP_CONG_MASK) 23262306a36Sopenharmony_ci return -EINVAL; 23362306a36Sopenharmony_ci tcp_ca->flags = utcp_ca->flags; 23462306a36Sopenharmony_ci return 1; 23562306a36Sopenharmony_ci case offsetof(struct tcp_congestion_ops, name): 23662306a36Sopenharmony_ci if (bpf_obj_name_cpy(tcp_ca->name, utcp_ca->name, 23762306a36Sopenharmony_ci sizeof(tcp_ca->name)) <= 0) 23862306a36Sopenharmony_ci return -EINVAL; 23962306a36Sopenharmony_ci return 1; 24062306a36Sopenharmony_ci } 24162306a36Sopenharmony_ci 24262306a36Sopenharmony_ci return 0; 24362306a36Sopenharmony_ci} 24462306a36Sopenharmony_ci 24562306a36Sopenharmony_cistatic int bpf_tcp_ca_check_member(const struct btf_type *t, 24662306a36Sopenharmony_ci const struct btf_member *member, 24762306a36Sopenharmony_ci const struct bpf_prog *prog) 24862306a36Sopenharmony_ci{ 24962306a36Sopenharmony_ci if (is_unsupported(__btf_member_bit_offset(t, member) / 8)) 25062306a36Sopenharmony_ci return -ENOTSUPP; 25162306a36Sopenharmony_ci return 0; 25262306a36Sopenharmony_ci} 25362306a36Sopenharmony_ci 25462306a36Sopenharmony_cistatic int bpf_tcp_ca_reg(void *kdata) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci return tcp_register_congestion_control(kdata); 25762306a36Sopenharmony_ci} 25862306a36Sopenharmony_ci 25962306a36Sopenharmony_cistatic void bpf_tcp_ca_unreg(void *kdata) 26062306a36Sopenharmony_ci{ 26162306a36Sopenharmony_ci tcp_unregister_congestion_control(kdata); 26262306a36Sopenharmony_ci} 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_cistatic int bpf_tcp_ca_update(void *kdata, void *old_kdata) 26562306a36Sopenharmony_ci{ 26662306a36Sopenharmony_ci return tcp_update_congestion_control(kdata, old_kdata); 26762306a36Sopenharmony_ci} 26862306a36Sopenharmony_ci 26962306a36Sopenharmony_cistatic int bpf_tcp_ca_validate(void *kdata) 27062306a36Sopenharmony_ci{ 27162306a36Sopenharmony_ci return tcp_validate_congestion_control(kdata); 27262306a36Sopenharmony_ci} 27362306a36Sopenharmony_ci 27462306a36Sopenharmony_cistruct bpf_struct_ops bpf_tcp_congestion_ops = { 27562306a36Sopenharmony_ci .verifier_ops = &bpf_tcp_ca_verifier_ops, 27662306a36Sopenharmony_ci .reg = bpf_tcp_ca_reg, 27762306a36Sopenharmony_ci .unreg = bpf_tcp_ca_unreg, 27862306a36Sopenharmony_ci .update = bpf_tcp_ca_update, 27962306a36Sopenharmony_ci .check_member = bpf_tcp_ca_check_member, 28062306a36Sopenharmony_ci .init_member = bpf_tcp_ca_init_member, 28162306a36Sopenharmony_ci .init = bpf_tcp_ca_init, 28262306a36Sopenharmony_ci .validate = bpf_tcp_ca_validate, 28362306a36Sopenharmony_ci .name = "tcp_congestion_ops", 28462306a36Sopenharmony_ci}; 28562306a36Sopenharmony_ci 28662306a36Sopenharmony_cistatic int __init bpf_tcp_ca_kfunc_init(void) 28762306a36Sopenharmony_ci{ 28862306a36Sopenharmony_ci return register_btf_kfunc_id_set(BPF_PROG_TYPE_STRUCT_OPS, &bpf_tcp_ca_kfunc_set); 28962306a36Sopenharmony_ci} 29062306a36Sopenharmony_cilate_initcall(bpf_tcp_ca_kfunc_init); 291