162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0-only 262306a36Sopenharmony_ci/* 362306a36Sopenharmony_ci * Pluggable TCP congestion control support and newReno 462306a36Sopenharmony_ci * congestion control. 562306a36Sopenharmony_ci * Based on ideas from I/O scheduler support and Web100. 662306a36Sopenharmony_ci * 762306a36Sopenharmony_ci * Copyright (C) 2005 Stephen Hemminger <shemminger@osdl.org> 862306a36Sopenharmony_ci */ 962306a36Sopenharmony_ci 1062306a36Sopenharmony_ci#define pr_fmt(fmt) "TCP: " fmt 1162306a36Sopenharmony_ci 1262306a36Sopenharmony_ci#include <linux/module.h> 1362306a36Sopenharmony_ci#include <linux/mm.h> 1462306a36Sopenharmony_ci#include <linux/types.h> 1562306a36Sopenharmony_ci#include <linux/list.h> 1662306a36Sopenharmony_ci#include <linux/gfp.h> 1762306a36Sopenharmony_ci#include <linux/jhash.h> 1862306a36Sopenharmony_ci#include <net/tcp.h> 1962306a36Sopenharmony_ci#include <trace/events/tcp.h> 2062306a36Sopenharmony_ci 2162306a36Sopenharmony_cistatic DEFINE_SPINLOCK(tcp_cong_list_lock); 2262306a36Sopenharmony_cistatic LIST_HEAD(tcp_cong_list); 2362306a36Sopenharmony_ci 2462306a36Sopenharmony_ci/* Simple linear search, don't expect many entries! */ 2562306a36Sopenharmony_cistruct tcp_congestion_ops *tcp_ca_find(const char *name) 2662306a36Sopenharmony_ci{ 2762306a36Sopenharmony_ci struct tcp_congestion_ops *e; 2862306a36Sopenharmony_ci 2962306a36Sopenharmony_ci list_for_each_entry_rcu(e, &tcp_cong_list, list) { 3062306a36Sopenharmony_ci if (strcmp(e->name, name) == 0) 3162306a36Sopenharmony_ci return e; 3262306a36Sopenharmony_ci } 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci return NULL; 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_civoid tcp_set_ca_state(struct sock *sk, const u8 ca_state) 3862306a36Sopenharmony_ci{ 3962306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 4062306a36Sopenharmony_ci 4162306a36Sopenharmony_ci trace_tcp_cong_state_set(sk, ca_state); 4262306a36Sopenharmony_ci 4362306a36Sopenharmony_ci if (icsk->icsk_ca_ops->set_state) 4462306a36Sopenharmony_ci icsk->icsk_ca_ops->set_state(sk, ca_state); 4562306a36Sopenharmony_ci icsk->icsk_ca_state = ca_state; 4662306a36Sopenharmony_ci} 4762306a36Sopenharmony_ci 4862306a36Sopenharmony_ci/* Must be called with rcu lock held */ 4962306a36Sopenharmony_cistatic struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, 5062306a36Sopenharmony_ci const char *name) 5162306a36Sopenharmony_ci{ 5262306a36Sopenharmony_ci struct tcp_congestion_ops *ca = tcp_ca_find(name); 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_ci#ifdef CONFIG_MODULES 5562306a36Sopenharmony_ci if (!ca && capable(CAP_NET_ADMIN)) { 5662306a36Sopenharmony_ci rcu_read_unlock(); 5762306a36Sopenharmony_ci request_module("tcp_%s", name); 5862306a36Sopenharmony_ci rcu_read_lock(); 5962306a36Sopenharmony_ci ca = tcp_ca_find(name); 6062306a36Sopenharmony_ci } 6162306a36Sopenharmony_ci#endif 6262306a36Sopenharmony_ci return ca; 6362306a36Sopenharmony_ci} 6462306a36Sopenharmony_ci 6562306a36Sopenharmony_ci/* Simple linear search, not much in here. */ 6662306a36Sopenharmony_cistruct tcp_congestion_ops *tcp_ca_find_key(u32 key) 6762306a36Sopenharmony_ci{ 6862306a36Sopenharmony_ci struct tcp_congestion_ops *e; 6962306a36Sopenharmony_ci 7062306a36Sopenharmony_ci list_for_each_entry_rcu(e, &tcp_cong_list, list) { 7162306a36Sopenharmony_ci if (e->key == key) 7262306a36Sopenharmony_ci return e; 7362306a36Sopenharmony_ci } 7462306a36Sopenharmony_ci 7562306a36Sopenharmony_ci return NULL; 7662306a36Sopenharmony_ci} 7762306a36Sopenharmony_ci 7862306a36Sopenharmony_ciint tcp_validate_congestion_control(struct tcp_congestion_ops *ca) 7962306a36Sopenharmony_ci{ 8062306a36Sopenharmony_ci /* all algorithms must implement these */ 8162306a36Sopenharmony_ci if (!ca->ssthresh || !ca->undo_cwnd || 8262306a36Sopenharmony_ci !(ca->cong_avoid || ca->cong_control)) { 8362306a36Sopenharmony_ci pr_err("%s does not implement required ops\n", ca->name); 8462306a36Sopenharmony_ci return -EINVAL; 8562306a36Sopenharmony_ci } 8662306a36Sopenharmony_ci 8762306a36Sopenharmony_ci return 0; 8862306a36Sopenharmony_ci} 8962306a36Sopenharmony_ci 9062306a36Sopenharmony_ci/* Attach new congestion control algorithm to the list 9162306a36Sopenharmony_ci * of available options. 9262306a36Sopenharmony_ci */ 9362306a36Sopenharmony_ciint tcp_register_congestion_control(struct tcp_congestion_ops *ca) 9462306a36Sopenharmony_ci{ 9562306a36Sopenharmony_ci int ret; 9662306a36Sopenharmony_ci 9762306a36Sopenharmony_ci ret = tcp_validate_congestion_control(ca); 9862306a36Sopenharmony_ci if (ret) 9962306a36Sopenharmony_ci return ret; 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); 10262306a36Sopenharmony_ci 10362306a36Sopenharmony_ci spin_lock(&tcp_cong_list_lock); 10462306a36Sopenharmony_ci if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) { 10562306a36Sopenharmony_ci pr_notice("%s already registered or non-unique key\n", 10662306a36Sopenharmony_ci ca->name); 10762306a36Sopenharmony_ci ret = -EEXIST; 10862306a36Sopenharmony_ci } else { 10962306a36Sopenharmony_ci list_add_tail_rcu(&ca->list, &tcp_cong_list); 11062306a36Sopenharmony_ci pr_debug("%s registered\n", ca->name); 11162306a36Sopenharmony_ci } 11262306a36Sopenharmony_ci spin_unlock(&tcp_cong_list_lock); 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci return ret; 11562306a36Sopenharmony_ci} 11662306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_register_congestion_control); 11762306a36Sopenharmony_ci 11862306a36Sopenharmony_ci/* 11962306a36Sopenharmony_ci * Remove congestion control algorithm, called from 12062306a36Sopenharmony_ci * the module's remove function. Module ref counts are used 12162306a36Sopenharmony_ci * to ensure that this can't be done till all sockets using 12262306a36Sopenharmony_ci * that method are closed. 12362306a36Sopenharmony_ci */ 12462306a36Sopenharmony_civoid tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) 12562306a36Sopenharmony_ci{ 12662306a36Sopenharmony_ci spin_lock(&tcp_cong_list_lock); 12762306a36Sopenharmony_ci list_del_rcu(&ca->list); 12862306a36Sopenharmony_ci spin_unlock(&tcp_cong_list_lock); 12962306a36Sopenharmony_ci 13062306a36Sopenharmony_ci /* Wait for outstanding readers to complete before the 13162306a36Sopenharmony_ci * module gets removed entirely. 13262306a36Sopenharmony_ci * 13362306a36Sopenharmony_ci * A try_module_get() should fail by now as our module is 13462306a36Sopenharmony_ci * in "going" state since no refs are held anymore and 13562306a36Sopenharmony_ci * module_exit() handler being called. 13662306a36Sopenharmony_ci */ 13762306a36Sopenharmony_ci synchronize_rcu(); 13862306a36Sopenharmony_ci} 13962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); 14062306a36Sopenharmony_ci 14162306a36Sopenharmony_ci/* Replace a registered old ca with a new one. 14262306a36Sopenharmony_ci * 14362306a36Sopenharmony_ci * The new ca must have the same name as the old one, that has been 14462306a36Sopenharmony_ci * registered. 14562306a36Sopenharmony_ci */ 14662306a36Sopenharmony_ciint tcp_update_congestion_control(struct tcp_congestion_ops *ca, struct tcp_congestion_ops *old_ca) 14762306a36Sopenharmony_ci{ 14862306a36Sopenharmony_ci struct tcp_congestion_ops *existing; 14962306a36Sopenharmony_ci int ret; 15062306a36Sopenharmony_ci 15162306a36Sopenharmony_ci ret = tcp_validate_congestion_control(ca); 15262306a36Sopenharmony_ci if (ret) 15362306a36Sopenharmony_ci return ret; 15462306a36Sopenharmony_ci 15562306a36Sopenharmony_ci ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name)); 15662306a36Sopenharmony_ci 15762306a36Sopenharmony_ci spin_lock(&tcp_cong_list_lock); 15862306a36Sopenharmony_ci existing = tcp_ca_find_key(old_ca->key); 15962306a36Sopenharmony_ci if (ca->key == TCP_CA_UNSPEC || !existing || strcmp(existing->name, ca->name)) { 16062306a36Sopenharmony_ci pr_notice("%s not registered or non-unique key\n", 16162306a36Sopenharmony_ci ca->name); 16262306a36Sopenharmony_ci ret = -EINVAL; 16362306a36Sopenharmony_ci } else if (existing != old_ca) { 16462306a36Sopenharmony_ci pr_notice("invalid old congestion control algorithm to replace\n"); 16562306a36Sopenharmony_ci ret = -EINVAL; 16662306a36Sopenharmony_ci } else { 16762306a36Sopenharmony_ci /* Add the new one before removing the old one to keep 16862306a36Sopenharmony_ci * one implementation available all the time. 16962306a36Sopenharmony_ci */ 17062306a36Sopenharmony_ci list_add_tail_rcu(&ca->list, &tcp_cong_list); 17162306a36Sopenharmony_ci list_del_rcu(&existing->list); 17262306a36Sopenharmony_ci pr_debug("%s updated\n", ca->name); 17362306a36Sopenharmony_ci } 17462306a36Sopenharmony_ci spin_unlock(&tcp_cong_list_lock); 17562306a36Sopenharmony_ci 17662306a36Sopenharmony_ci /* Wait for outstanding readers to complete before the 17762306a36Sopenharmony_ci * module or struct_ops gets removed entirely. 17862306a36Sopenharmony_ci */ 17962306a36Sopenharmony_ci if (!ret) 18062306a36Sopenharmony_ci synchronize_rcu(); 18162306a36Sopenharmony_ci 18262306a36Sopenharmony_ci return ret; 18362306a36Sopenharmony_ci} 18462306a36Sopenharmony_ci 18562306a36Sopenharmony_ciu32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) 18662306a36Sopenharmony_ci{ 18762306a36Sopenharmony_ci const struct tcp_congestion_ops *ca; 18862306a36Sopenharmony_ci u32 key = TCP_CA_UNSPEC; 18962306a36Sopenharmony_ci 19062306a36Sopenharmony_ci might_sleep(); 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci rcu_read_lock(); 19362306a36Sopenharmony_ci ca = tcp_ca_find_autoload(net, name); 19462306a36Sopenharmony_ci if (ca) { 19562306a36Sopenharmony_ci key = ca->key; 19662306a36Sopenharmony_ci *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; 19762306a36Sopenharmony_ci } 19862306a36Sopenharmony_ci rcu_read_unlock(); 19962306a36Sopenharmony_ci 20062306a36Sopenharmony_ci return key; 20162306a36Sopenharmony_ci} 20262306a36Sopenharmony_ci 20362306a36Sopenharmony_cichar *tcp_ca_get_name_by_key(u32 key, char *buffer) 20462306a36Sopenharmony_ci{ 20562306a36Sopenharmony_ci const struct tcp_congestion_ops *ca; 20662306a36Sopenharmony_ci char *ret = NULL; 20762306a36Sopenharmony_ci 20862306a36Sopenharmony_ci rcu_read_lock(); 20962306a36Sopenharmony_ci ca = tcp_ca_find_key(key); 21062306a36Sopenharmony_ci if (ca) 21162306a36Sopenharmony_ci ret = strncpy(buffer, ca->name, 21262306a36Sopenharmony_ci TCP_CA_NAME_MAX); 21362306a36Sopenharmony_ci rcu_read_unlock(); 21462306a36Sopenharmony_ci 21562306a36Sopenharmony_ci return ret; 21662306a36Sopenharmony_ci} 21762306a36Sopenharmony_ci 21862306a36Sopenharmony_ci/* Assign choice of congestion control. */ 21962306a36Sopenharmony_civoid tcp_assign_congestion_control(struct sock *sk) 22062306a36Sopenharmony_ci{ 22162306a36Sopenharmony_ci struct net *net = sock_net(sk); 22262306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 22362306a36Sopenharmony_ci const struct tcp_congestion_ops *ca; 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_ci rcu_read_lock(); 22662306a36Sopenharmony_ci ca = rcu_dereference(net->ipv4.tcp_congestion_control); 22762306a36Sopenharmony_ci if (unlikely(!bpf_try_module_get(ca, ca->owner))) 22862306a36Sopenharmony_ci ca = &tcp_reno; 22962306a36Sopenharmony_ci icsk->icsk_ca_ops = ca; 23062306a36Sopenharmony_ci rcu_read_unlock(); 23162306a36Sopenharmony_ci 23262306a36Sopenharmony_ci memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); 23362306a36Sopenharmony_ci if (ca->flags & TCP_CONG_NEEDS_ECN) 23462306a36Sopenharmony_ci INET_ECN_xmit(sk); 23562306a36Sopenharmony_ci else 23662306a36Sopenharmony_ci INET_ECN_dontxmit(sk); 23762306a36Sopenharmony_ci} 23862306a36Sopenharmony_ci 23962306a36Sopenharmony_civoid tcp_init_congestion_control(struct sock *sk) 24062306a36Sopenharmony_ci{ 24162306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 24262306a36Sopenharmony_ci 24362306a36Sopenharmony_ci tcp_sk(sk)->prior_ssthresh = 0; 24462306a36Sopenharmony_ci if (icsk->icsk_ca_ops->init) 24562306a36Sopenharmony_ci icsk->icsk_ca_ops->init(sk); 24662306a36Sopenharmony_ci if (tcp_ca_needs_ecn(sk)) 24762306a36Sopenharmony_ci INET_ECN_xmit(sk); 24862306a36Sopenharmony_ci else 24962306a36Sopenharmony_ci INET_ECN_dontxmit(sk); 25062306a36Sopenharmony_ci icsk->icsk_ca_initialized = 1; 25162306a36Sopenharmony_ci} 25262306a36Sopenharmony_ci 25362306a36Sopenharmony_cistatic void tcp_reinit_congestion_control(struct sock *sk, 25462306a36Sopenharmony_ci const struct tcp_congestion_ops *ca) 25562306a36Sopenharmony_ci{ 25662306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 25762306a36Sopenharmony_ci 25862306a36Sopenharmony_ci tcp_cleanup_congestion_control(sk); 25962306a36Sopenharmony_ci icsk->icsk_ca_ops = ca; 26062306a36Sopenharmony_ci icsk->icsk_ca_setsockopt = 1; 26162306a36Sopenharmony_ci memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); 26262306a36Sopenharmony_ci 26362306a36Sopenharmony_ci if (ca->flags & TCP_CONG_NEEDS_ECN) 26462306a36Sopenharmony_ci INET_ECN_xmit(sk); 26562306a36Sopenharmony_ci else 26662306a36Sopenharmony_ci INET_ECN_dontxmit(sk); 26762306a36Sopenharmony_ci 26862306a36Sopenharmony_ci if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) 26962306a36Sopenharmony_ci tcp_init_congestion_control(sk); 27062306a36Sopenharmony_ci} 27162306a36Sopenharmony_ci 27262306a36Sopenharmony_ci/* Manage refcounts on socket close. */ 27362306a36Sopenharmony_civoid tcp_cleanup_congestion_control(struct sock *sk) 27462306a36Sopenharmony_ci{ 27562306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci if (icsk->icsk_ca_ops->release) 27862306a36Sopenharmony_ci icsk->icsk_ca_ops->release(sk); 27962306a36Sopenharmony_ci bpf_module_put(icsk->icsk_ca_ops, icsk->icsk_ca_ops->owner); 28062306a36Sopenharmony_ci} 28162306a36Sopenharmony_ci 28262306a36Sopenharmony_ci/* Used by sysctl to change default congestion control */ 28362306a36Sopenharmony_ciint tcp_set_default_congestion_control(struct net *net, const char *name) 28462306a36Sopenharmony_ci{ 28562306a36Sopenharmony_ci struct tcp_congestion_ops *ca; 28662306a36Sopenharmony_ci const struct tcp_congestion_ops *prev; 28762306a36Sopenharmony_ci int ret; 28862306a36Sopenharmony_ci 28962306a36Sopenharmony_ci rcu_read_lock(); 29062306a36Sopenharmony_ci ca = tcp_ca_find_autoload(net, name); 29162306a36Sopenharmony_ci if (!ca) { 29262306a36Sopenharmony_ci ret = -ENOENT; 29362306a36Sopenharmony_ci } else if (!bpf_try_module_get(ca, ca->owner)) { 29462306a36Sopenharmony_ci ret = -EBUSY; 29562306a36Sopenharmony_ci } else if (!net_eq(net, &init_net) && 29662306a36Sopenharmony_ci !(ca->flags & TCP_CONG_NON_RESTRICTED)) { 29762306a36Sopenharmony_ci /* Only init netns can set default to a restricted algorithm */ 29862306a36Sopenharmony_ci ret = -EPERM; 29962306a36Sopenharmony_ci } else { 30062306a36Sopenharmony_ci prev = xchg(&net->ipv4.tcp_congestion_control, ca); 30162306a36Sopenharmony_ci if (prev) 30262306a36Sopenharmony_ci bpf_module_put(prev, prev->owner); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci ca->flags |= TCP_CONG_NON_RESTRICTED; 30562306a36Sopenharmony_ci ret = 0; 30662306a36Sopenharmony_ci } 30762306a36Sopenharmony_ci rcu_read_unlock(); 30862306a36Sopenharmony_ci 30962306a36Sopenharmony_ci return ret; 31062306a36Sopenharmony_ci} 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci/* Set default value from kernel configuration at bootup */ 31362306a36Sopenharmony_cistatic int __init tcp_congestion_default(void) 31462306a36Sopenharmony_ci{ 31562306a36Sopenharmony_ci return tcp_set_default_congestion_control(&init_net, 31662306a36Sopenharmony_ci CONFIG_DEFAULT_TCP_CONG); 31762306a36Sopenharmony_ci} 31862306a36Sopenharmony_cilate_initcall(tcp_congestion_default); 31962306a36Sopenharmony_ci 32062306a36Sopenharmony_ci/* Build string with list of available congestion control values */ 32162306a36Sopenharmony_civoid tcp_get_available_congestion_control(char *buf, size_t maxlen) 32262306a36Sopenharmony_ci{ 32362306a36Sopenharmony_ci struct tcp_congestion_ops *ca; 32462306a36Sopenharmony_ci size_t offs = 0; 32562306a36Sopenharmony_ci 32662306a36Sopenharmony_ci rcu_read_lock(); 32762306a36Sopenharmony_ci list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 32862306a36Sopenharmony_ci offs += snprintf(buf + offs, maxlen - offs, 32962306a36Sopenharmony_ci "%s%s", 33062306a36Sopenharmony_ci offs == 0 ? "" : " ", ca->name); 33162306a36Sopenharmony_ci 33262306a36Sopenharmony_ci if (WARN_ON_ONCE(offs >= maxlen)) 33362306a36Sopenharmony_ci break; 33462306a36Sopenharmony_ci } 33562306a36Sopenharmony_ci rcu_read_unlock(); 33662306a36Sopenharmony_ci} 33762306a36Sopenharmony_ci 33862306a36Sopenharmony_ci/* Get current default congestion control */ 33962306a36Sopenharmony_civoid tcp_get_default_congestion_control(struct net *net, char *name) 34062306a36Sopenharmony_ci{ 34162306a36Sopenharmony_ci const struct tcp_congestion_ops *ca; 34262306a36Sopenharmony_ci 34362306a36Sopenharmony_ci rcu_read_lock(); 34462306a36Sopenharmony_ci ca = rcu_dereference(net->ipv4.tcp_congestion_control); 34562306a36Sopenharmony_ci strncpy(name, ca->name, TCP_CA_NAME_MAX); 34662306a36Sopenharmony_ci rcu_read_unlock(); 34762306a36Sopenharmony_ci} 34862306a36Sopenharmony_ci 34962306a36Sopenharmony_ci/* Built list of non-restricted congestion control values */ 35062306a36Sopenharmony_civoid tcp_get_allowed_congestion_control(char *buf, size_t maxlen) 35162306a36Sopenharmony_ci{ 35262306a36Sopenharmony_ci struct tcp_congestion_ops *ca; 35362306a36Sopenharmony_ci size_t offs = 0; 35462306a36Sopenharmony_ci 35562306a36Sopenharmony_ci *buf = '\0'; 35662306a36Sopenharmony_ci rcu_read_lock(); 35762306a36Sopenharmony_ci list_for_each_entry_rcu(ca, &tcp_cong_list, list) { 35862306a36Sopenharmony_ci if (!(ca->flags & TCP_CONG_NON_RESTRICTED)) 35962306a36Sopenharmony_ci continue; 36062306a36Sopenharmony_ci offs += snprintf(buf + offs, maxlen - offs, 36162306a36Sopenharmony_ci "%s%s", 36262306a36Sopenharmony_ci offs == 0 ? "" : " ", ca->name); 36362306a36Sopenharmony_ci 36462306a36Sopenharmony_ci if (WARN_ON_ONCE(offs >= maxlen)) 36562306a36Sopenharmony_ci break; 36662306a36Sopenharmony_ci } 36762306a36Sopenharmony_ci rcu_read_unlock(); 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_ci/* Change list of non-restricted congestion control */ 37162306a36Sopenharmony_ciint tcp_set_allowed_congestion_control(char *val) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci struct tcp_congestion_ops *ca; 37462306a36Sopenharmony_ci char *saved_clone, *clone, *name; 37562306a36Sopenharmony_ci int ret = 0; 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_ci saved_clone = clone = kstrdup(val, GFP_USER); 37862306a36Sopenharmony_ci if (!clone) 37962306a36Sopenharmony_ci return -ENOMEM; 38062306a36Sopenharmony_ci 38162306a36Sopenharmony_ci spin_lock(&tcp_cong_list_lock); 38262306a36Sopenharmony_ci /* pass 1 check for bad entries */ 38362306a36Sopenharmony_ci while ((name = strsep(&clone, " ")) && *name) { 38462306a36Sopenharmony_ci ca = tcp_ca_find(name); 38562306a36Sopenharmony_ci if (!ca) { 38662306a36Sopenharmony_ci ret = -ENOENT; 38762306a36Sopenharmony_ci goto out; 38862306a36Sopenharmony_ci } 38962306a36Sopenharmony_ci } 39062306a36Sopenharmony_ci 39162306a36Sopenharmony_ci /* pass 2 clear old values */ 39262306a36Sopenharmony_ci list_for_each_entry_rcu(ca, &tcp_cong_list, list) 39362306a36Sopenharmony_ci ca->flags &= ~TCP_CONG_NON_RESTRICTED; 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_ci /* pass 3 mark as allowed */ 39662306a36Sopenharmony_ci while ((name = strsep(&val, " ")) && *name) { 39762306a36Sopenharmony_ci ca = tcp_ca_find(name); 39862306a36Sopenharmony_ci WARN_ON(!ca); 39962306a36Sopenharmony_ci if (ca) 40062306a36Sopenharmony_ci ca->flags |= TCP_CONG_NON_RESTRICTED; 40162306a36Sopenharmony_ci } 40262306a36Sopenharmony_ciout: 40362306a36Sopenharmony_ci spin_unlock(&tcp_cong_list_lock); 40462306a36Sopenharmony_ci kfree(saved_clone); 40562306a36Sopenharmony_ci 40662306a36Sopenharmony_ci return ret; 40762306a36Sopenharmony_ci} 40862306a36Sopenharmony_ci 40962306a36Sopenharmony_ci/* Change congestion control for socket. If load is false, then it is the 41062306a36Sopenharmony_ci * responsibility of the caller to call tcp_init_congestion_control or 41162306a36Sopenharmony_ci * tcp_reinit_congestion_control (if the current congestion control was 41262306a36Sopenharmony_ci * already initialized. 41362306a36Sopenharmony_ci */ 41462306a36Sopenharmony_ciint tcp_set_congestion_control(struct sock *sk, const char *name, bool load, 41562306a36Sopenharmony_ci bool cap_net_admin) 41662306a36Sopenharmony_ci{ 41762306a36Sopenharmony_ci struct inet_connection_sock *icsk = inet_csk(sk); 41862306a36Sopenharmony_ci const struct tcp_congestion_ops *ca; 41962306a36Sopenharmony_ci int err = 0; 42062306a36Sopenharmony_ci 42162306a36Sopenharmony_ci if (icsk->icsk_ca_dst_locked) 42262306a36Sopenharmony_ci return -EPERM; 42362306a36Sopenharmony_ci 42462306a36Sopenharmony_ci rcu_read_lock(); 42562306a36Sopenharmony_ci if (!load) 42662306a36Sopenharmony_ci ca = tcp_ca_find(name); 42762306a36Sopenharmony_ci else 42862306a36Sopenharmony_ci ca = tcp_ca_find_autoload(sock_net(sk), name); 42962306a36Sopenharmony_ci 43062306a36Sopenharmony_ci /* No change asking for existing value */ 43162306a36Sopenharmony_ci if (ca == icsk->icsk_ca_ops) { 43262306a36Sopenharmony_ci icsk->icsk_ca_setsockopt = 1; 43362306a36Sopenharmony_ci goto out; 43462306a36Sopenharmony_ci } 43562306a36Sopenharmony_ci 43662306a36Sopenharmony_ci if (!ca) 43762306a36Sopenharmony_ci err = -ENOENT; 43862306a36Sopenharmony_ci else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || cap_net_admin)) 43962306a36Sopenharmony_ci err = -EPERM; 44062306a36Sopenharmony_ci else if (!bpf_try_module_get(ca, ca->owner)) 44162306a36Sopenharmony_ci err = -EBUSY; 44262306a36Sopenharmony_ci else 44362306a36Sopenharmony_ci tcp_reinit_congestion_control(sk, ca); 44462306a36Sopenharmony_ci out: 44562306a36Sopenharmony_ci rcu_read_unlock(); 44662306a36Sopenharmony_ci return err; 44762306a36Sopenharmony_ci} 44862306a36Sopenharmony_ci 44962306a36Sopenharmony_ci/* Slow start is used when congestion window is no greater than the slow start 45062306a36Sopenharmony_ci * threshold. We base on RFC2581 and also handle stretch ACKs properly. 45162306a36Sopenharmony_ci * We do not implement RFC3465 Appropriate Byte Counting (ABC) per se but 45262306a36Sopenharmony_ci * something better;) a packet is only considered (s)acked in its entirety to 45362306a36Sopenharmony_ci * defend the ACK attacks described in the RFC. Slow start processes a stretch 45462306a36Sopenharmony_ci * ACK of degree N as if N acks of degree 1 are received back to back except 45562306a36Sopenharmony_ci * ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and 45662306a36Sopenharmony_ci * returns the leftover acks to adjust cwnd in congestion avoidance mode. 45762306a36Sopenharmony_ci */ 45862306a36Sopenharmony_ci__bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked) 45962306a36Sopenharmony_ci{ 46062306a36Sopenharmony_ci u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh); 46162306a36Sopenharmony_ci 46262306a36Sopenharmony_ci acked -= cwnd - tcp_snd_cwnd(tp); 46362306a36Sopenharmony_ci tcp_snd_cwnd_set(tp, min(cwnd, tp->snd_cwnd_clamp)); 46462306a36Sopenharmony_ci 46562306a36Sopenharmony_ci return acked; 46662306a36Sopenharmony_ci} 46762306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_slow_start); 46862306a36Sopenharmony_ci 46962306a36Sopenharmony_ci/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w), 47062306a36Sopenharmony_ci * for every packet that was ACKed. 47162306a36Sopenharmony_ci */ 47262306a36Sopenharmony_ci__bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked) 47362306a36Sopenharmony_ci{ 47462306a36Sopenharmony_ci /* If credits accumulated at a higher w, apply them gently now. */ 47562306a36Sopenharmony_ci if (tp->snd_cwnd_cnt >= w) { 47662306a36Sopenharmony_ci tp->snd_cwnd_cnt = 0; 47762306a36Sopenharmony_ci tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + 1); 47862306a36Sopenharmony_ci } 47962306a36Sopenharmony_ci 48062306a36Sopenharmony_ci tp->snd_cwnd_cnt += acked; 48162306a36Sopenharmony_ci if (tp->snd_cwnd_cnt >= w) { 48262306a36Sopenharmony_ci u32 delta = tp->snd_cwnd_cnt / w; 48362306a36Sopenharmony_ci 48462306a36Sopenharmony_ci tp->snd_cwnd_cnt -= delta * w; 48562306a36Sopenharmony_ci tcp_snd_cwnd_set(tp, tcp_snd_cwnd(tp) + delta); 48662306a36Sopenharmony_ci } 48762306a36Sopenharmony_ci tcp_snd_cwnd_set(tp, min(tcp_snd_cwnd(tp), tp->snd_cwnd_clamp)); 48862306a36Sopenharmony_ci} 48962306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_cong_avoid_ai); 49062306a36Sopenharmony_ci 49162306a36Sopenharmony_ci/* 49262306a36Sopenharmony_ci * TCP Reno congestion control 49362306a36Sopenharmony_ci * This is special case used for fallback as well. 49462306a36Sopenharmony_ci */ 49562306a36Sopenharmony_ci/* This is Jacobson's slow start and congestion avoidance. 49662306a36Sopenharmony_ci * SIGCOMM '88, p. 328. 49762306a36Sopenharmony_ci */ 49862306a36Sopenharmony_ci__bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked) 49962306a36Sopenharmony_ci{ 50062306a36Sopenharmony_ci struct tcp_sock *tp = tcp_sk(sk); 50162306a36Sopenharmony_ci 50262306a36Sopenharmony_ci if (!tcp_is_cwnd_limited(sk)) 50362306a36Sopenharmony_ci return; 50462306a36Sopenharmony_ci 50562306a36Sopenharmony_ci /* In "safe" area, increase. */ 50662306a36Sopenharmony_ci if (tcp_in_slow_start(tp)) { 50762306a36Sopenharmony_ci acked = tcp_slow_start(tp, acked); 50862306a36Sopenharmony_ci if (!acked) 50962306a36Sopenharmony_ci return; 51062306a36Sopenharmony_ci } 51162306a36Sopenharmony_ci /* In dangerous area, increase slowly. */ 51262306a36Sopenharmony_ci tcp_cong_avoid_ai(tp, tcp_snd_cwnd(tp), acked); 51362306a36Sopenharmony_ci} 51462306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_reno_cong_avoid); 51562306a36Sopenharmony_ci 51662306a36Sopenharmony_ci/* Slow start threshold is half the congestion window (min 2) */ 51762306a36Sopenharmony_ci__bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk) 51862306a36Sopenharmony_ci{ 51962306a36Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 52062306a36Sopenharmony_ci 52162306a36Sopenharmony_ci return max(tcp_snd_cwnd(tp) >> 1U, 2U); 52262306a36Sopenharmony_ci} 52362306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_reno_ssthresh); 52462306a36Sopenharmony_ci 52562306a36Sopenharmony_ci__bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk) 52662306a36Sopenharmony_ci{ 52762306a36Sopenharmony_ci const struct tcp_sock *tp = tcp_sk(sk); 52862306a36Sopenharmony_ci 52962306a36Sopenharmony_ci return max(tcp_snd_cwnd(tp), tp->prior_cwnd); 53062306a36Sopenharmony_ci} 53162306a36Sopenharmony_ciEXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); 53262306a36Sopenharmony_ci 53362306a36Sopenharmony_cistruct tcp_congestion_ops tcp_reno = { 53462306a36Sopenharmony_ci .flags = TCP_CONG_NON_RESTRICTED, 53562306a36Sopenharmony_ci .name = "reno", 53662306a36Sopenharmony_ci .owner = THIS_MODULE, 53762306a36Sopenharmony_ci .ssthresh = tcp_reno_ssthresh, 53862306a36Sopenharmony_ci .cong_avoid = tcp_reno_cong_avoid, 53962306a36Sopenharmony_ci .undo_cwnd = tcp_reno_undo_cwnd, 54062306a36Sopenharmony_ci}; 541