162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0 262306a36Sopenharmony_ci/* Copyright (c) 2019 Facebook */ 362306a36Sopenharmony_ci#include <linux/rculist.h> 462306a36Sopenharmony_ci#include <linux/list.h> 562306a36Sopenharmony_ci#include <linux/hash.h> 662306a36Sopenharmony_ci#include <linux/types.h> 762306a36Sopenharmony_ci#include <linux/spinlock.h> 862306a36Sopenharmony_ci#include <linux/bpf.h> 962306a36Sopenharmony_ci#include <linux/btf_ids.h> 1062306a36Sopenharmony_ci#include <linux/bpf_local_storage.h> 1162306a36Sopenharmony_ci#include <net/sock.h> 1262306a36Sopenharmony_ci#include <uapi/linux/sock_diag.h> 1362306a36Sopenharmony_ci#include <uapi/linux/btf.h> 1462306a36Sopenharmony_ci#include <linux/rcupdate.h> 1562306a36Sopenharmony_ci#include <linux/rcupdate_trace.h> 1662306a36Sopenharmony_ci#include <linux/rcupdate_wait.h> 1762306a36Sopenharmony_ci 1862306a36Sopenharmony_ci#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE) 1962306a36Sopenharmony_ci 2062306a36Sopenharmony_cistatic struct bpf_local_storage_map_bucket * 2162306a36Sopenharmony_ciselect_bucket(struct bpf_local_storage_map *smap, 2262306a36Sopenharmony_ci struct bpf_local_storage_elem *selem) 2362306a36Sopenharmony_ci{ 2462306a36Sopenharmony_ci return &smap->buckets[hash_ptr(selem, smap->bucket_log)]; 2562306a36Sopenharmony_ci} 2662306a36Sopenharmony_ci 2762306a36Sopenharmony_cistatic int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size) 2862306a36Sopenharmony_ci{ 2962306a36Sopenharmony_ci struct bpf_map *map = &smap->map; 3062306a36Sopenharmony_ci 3162306a36Sopenharmony_ci if (!map->ops->map_local_storage_charge) 3262306a36Sopenharmony_ci return 0; 3362306a36Sopenharmony_ci 3462306a36Sopenharmony_ci return map->ops->map_local_storage_charge(smap, owner, size); 3562306a36Sopenharmony_ci} 3662306a36Sopenharmony_ci 3762306a36Sopenharmony_cistatic void mem_uncharge(struct bpf_local_storage_map *smap, void *owner, 3862306a36Sopenharmony_ci u32 size) 3962306a36Sopenharmony_ci{ 4062306a36Sopenharmony_ci struct bpf_map *map = &smap->map; 4162306a36Sopenharmony_ci 4262306a36Sopenharmony_ci if (map->ops->map_local_storage_uncharge) 4362306a36Sopenharmony_ci map->ops->map_local_storage_uncharge(smap, owner, size); 4462306a36Sopenharmony_ci} 4562306a36Sopenharmony_ci 4662306a36Sopenharmony_cistatic struct bpf_local_storage __rcu ** 4762306a36Sopenharmony_ciowner_storage(struct bpf_local_storage_map *smap, void *owner) 4862306a36Sopenharmony_ci{ 4962306a36Sopenharmony_ci struct bpf_map *map = &smap->map; 5062306a36Sopenharmony_ci 5162306a36Sopenharmony_ci return map->ops->map_owner_storage_ptr(owner); 5262306a36Sopenharmony_ci} 5362306a36Sopenharmony_ci 5462306a36Sopenharmony_cistatic bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem) 5562306a36Sopenharmony_ci{ 5662306a36Sopenharmony_ci return !hlist_unhashed_lockless(&selem->snode); 5762306a36Sopenharmony_ci} 5862306a36Sopenharmony_ci 5962306a36Sopenharmony_cistatic bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem) 6062306a36Sopenharmony_ci{ 6162306a36Sopenharmony_ci return !hlist_unhashed(&selem->snode); 6262306a36Sopenharmony_ci} 6362306a36Sopenharmony_ci 6462306a36Sopenharmony_cistatic bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem) 6562306a36Sopenharmony_ci{ 6662306a36Sopenharmony_ci return !hlist_unhashed_lockless(&selem->map_node); 6762306a36Sopenharmony_ci} 6862306a36Sopenharmony_ci 6962306a36Sopenharmony_cistatic bool selem_linked_to_map(const struct bpf_local_storage_elem *selem) 7062306a36Sopenharmony_ci{ 7162306a36Sopenharmony_ci return !hlist_unhashed(&selem->map_node); 7262306a36Sopenharmony_ci} 7362306a36Sopenharmony_ci 7462306a36Sopenharmony_cistruct bpf_local_storage_elem * 7562306a36Sopenharmony_cibpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, 7662306a36Sopenharmony_ci void *value, bool charge_mem, gfp_t gfp_flags) 7762306a36Sopenharmony_ci{ 7862306a36Sopenharmony_ci struct bpf_local_storage_elem *selem; 7962306a36Sopenharmony_ci 8062306a36Sopenharmony_ci if (charge_mem && mem_charge(smap, owner, smap->elem_size)) 8162306a36Sopenharmony_ci return NULL; 8262306a36Sopenharmony_ci 8362306a36Sopenharmony_ci if (smap->bpf_ma) { 8462306a36Sopenharmony_ci migrate_disable(); 8562306a36Sopenharmony_ci selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags); 8662306a36Sopenharmony_ci migrate_enable(); 8762306a36Sopenharmony_ci if (selem) 8862306a36Sopenharmony_ci /* Keep the original bpf_map_kzalloc behavior 8962306a36Sopenharmony_ci * before started using the bpf_mem_cache_alloc. 9062306a36Sopenharmony_ci * 9162306a36Sopenharmony_ci * No need to use zero_map_value. The bpf_selem_free() 9262306a36Sopenharmony_ci * only does bpf_mem_cache_free when there is 9362306a36Sopenharmony_ci * no other bpf prog is using the selem. 9462306a36Sopenharmony_ci */ 9562306a36Sopenharmony_ci memset(SDATA(selem)->data, 0, smap->map.value_size); 9662306a36Sopenharmony_ci } else { 9762306a36Sopenharmony_ci selem = bpf_map_kzalloc(&smap->map, smap->elem_size, 9862306a36Sopenharmony_ci gfp_flags | __GFP_NOWARN); 9962306a36Sopenharmony_ci } 10062306a36Sopenharmony_ci 10162306a36Sopenharmony_ci if (selem) { 10262306a36Sopenharmony_ci if (value) 10362306a36Sopenharmony_ci copy_map_value(&smap->map, SDATA(selem)->data, value); 10462306a36Sopenharmony_ci /* No need to call check_and_init_map_value as memory is zero init */ 10562306a36Sopenharmony_ci return selem; 10662306a36Sopenharmony_ci } 10762306a36Sopenharmony_ci 10862306a36Sopenharmony_ci if (charge_mem) 10962306a36Sopenharmony_ci mem_uncharge(smap, owner, smap->elem_size); 11062306a36Sopenharmony_ci 11162306a36Sopenharmony_ci return NULL; 11262306a36Sopenharmony_ci} 11362306a36Sopenharmony_ci 11462306a36Sopenharmony_ci/* rcu tasks trace callback for bpf_ma == false */ 11562306a36Sopenharmony_cistatic void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 11662306a36Sopenharmony_ci{ 11762306a36Sopenharmony_ci struct bpf_local_storage *local_storage; 11862306a36Sopenharmony_ci 11962306a36Sopenharmony_ci /* If RCU Tasks Trace grace period implies RCU grace period, do 12062306a36Sopenharmony_ci * kfree(), else do kfree_rcu(). 12162306a36Sopenharmony_ci */ 12262306a36Sopenharmony_ci local_storage = container_of(rcu, struct bpf_local_storage, rcu); 12362306a36Sopenharmony_ci if (rcu_trace_implies_rcu_gp()) 12462306a36Sopenharmony_ci kfree(local_storage); 12562306a36Sopenharmony_ci else 12662306a36Sopenharmony_ci kfree_rcu(local_storage, rcu); 12762306a36Sopenharmony_ci} 12862306a36Sopenharmony_ci 12962306a36Sopenharmony_cistatic void bpf_local_storage_free_rcu(struct rcu_head *rcu) 13062306a36Sopenharmony_ci{ 13162306a36Sopenharmony_ci struct bpf_local_storage *local_storage; 13262306a36Sopenharmony_ci 13362306a36Sopenharmony_ci local_storage = container_of(rcu, struct bpf_local_storage, rcu); 13462306a36Sopenharmony_ci bpf_mem_cache_raw_free(local_storage); 13562306a36Sopenharmony_ci} 13662306a36Sopenharmony_ci 13762306a36Sopenharmony_cistatic void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu) 13862306a36Sopenharmony_ci{ 13962306a36Sopenharmony_ci if (rcu_trace_implies_rcu_gp()) 14062306a36Sopenharmony_ci bpf_local_storage_free_rcu(rcu); 14162306a36Sopenharmony_ci else 14262306a36Sopenharmony_ci call_rcu(rcu, bpf_local_storage_free_rcu); 14362306a36Sopenharmony_ci} 14462306a36Sopenharmony_ci 14562306a36Sopenharmony_ci/* Handle bpf_ma == false */ 14662306a36Sopenharmony_cistatic void __bpf_local_storage_free(struct bpf_local_storage *local_storage, 14762306a36Sopenharmony_ci bool vanilla_rcu) 14862306a36Sopenharmony_ci{ 14962306a36Sopenharmony_ci if (vanilla_rcu) 15062306a36Sopenharmony_ci kfree_rcu(local_storage, rcu); 15162306a36Sopenharmony_ci else 15262306a36Sopenharmony_ci call_rcu_tasks_trace(&local_storage->rcu, 15362306a36Sopenharmony_ci __bpf_local_storage_free_trace_rcu); 15462306a36Sopenharmony_ci} 15562306a36Sopenharmony_ci 15662306a36Sopenharmony_cistatic void bpf_local_storage_free(struct bpf_local_storage *local_storage, 15762306a36Sopenharmony_ci struct bpf_local_storage_map *smap, 15862306a36Sopenharmony_ci bool bpf_ma, bool reuse_now) 15962306a36Sopenharmony_ci{ 16062306a36Sopenharmony_ci if (!local_storage) 16162306a36Sopenharmony_ci return; 16262306a36Sopenharmony_ci 16362306a36Sopenharmony_ci if (!bpf_ma) { 16462306a36Sopenharmony_ci __bpf_local_storage_free(local_storage, reuse_now); 16562306a36Sopenharmony_ci return; 16662306a36Sopenharmony_ci } 16762306a36Sopenharmony_ci 16862306a36Sopenharmony_ci if (!reuse_now) { 16962306a36Sopenharmony_ci call_rcu_tasks_trace(&local_storage->rcu, 17062306a36Sopenharmony_ci bpf_local_storage_free_trace_rcu); 17162306a36Sopenharmony_ci return; 17262306a36Sopenharmony_ci } 17362306a36Sopenharmony_ci 17462306a36Sopenharmony_ci if (smap) { 17562306a36Sopenharmony_ci migrate_disable(); 17662306a36Sopenharmony_ci bpf_mem_cache_free(&smap->storage_ma, local_storage); 17762306a36Sopenharmony_ci migrate_enable(); 17862306a36Sopenharmony_ci } else { 17962306a36Sopenharmony_ci /* smap could be NULL if the selem that triggered 18062306a36Sopenharmony_ci * this 'local_storage' creation had been long gone. 18162306a36Sopenharmony_ci * In this case, directly do call_rcu(). 18262306a36Sopenharmony_ci */ 18362306a36Sopenharmony_ci call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu); 18462306a36Sopenharmony_ci } 18562306a36Sopenharmony_ci} 18662306a36Sopenharmony_ci 18762306a36Sopenharmony_ci/* rcu tasks trace callback for bpf_ma == false */ 18862306a36Sopenharmony_cistatic void __bpf_selem_free_trace_rcu(struct rcu_head *rcu) 18962306a36Sopenharmony_ci{ 19062306a36Sopenharmony_ci struct bpf_local_storage_elem *selem; 19162306a36Sopenharmony_ci 19262306a36Sopenharmony_ci selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 19362306a36Sopenharmony_ci if (rcu_trace_implies_rcu_gp()) 19462306a36Sopenharmony_ci kfree(selem); 19562306a36Sopenharmony_ci else 19662306a36Sopenharmony_ci kfree_rcu(selem, rcu); 19762306a36Sopenharmony_ci} 19862306a36Sopenharmony_ci 19962306a36Sopenharmony_ci/* Handle bpf_ma == false */ 20062306a36Sopenharmony_cistatic void __bpf_selem_free(struct bpf_local_storage_elem *selem, 20162306a36Sopenharmony_ci bool vanilla_rcu) 20262306a36Sopenharmony_ci{ 20362306a36Sopenharmony_ci if (vanilla_rcu) 20462306a36Sopenharmony_ci kfree_rcu(selem, rcu); 20562306a36Sopenharmony_ci else 20662306a36Sopenharmony_ci call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu); 20762306a36Sopenharmony_ci} 20862306a36Sopenharmony_ci 20962306a36Sopenharmony_cistatic void bpf_selem_free_rcu(struct rcu_head *rcu) 21062306a36Sopenharmony_ci{ 21162306a36Sopenharmony_ci struct bpf_local_storage_elem *selem; 21262306a36Sopenharmony_ci 21362306a36Sopenharmony_ci selem = container_of(rcu, struct bpf_local_storage_elem, rcu); 21462306a36Sopenharmony_ci bpf_mem_cache_raw_free(selem); 21562306a36Sopenharmony_ci} 21662306a36Sopenharmony_ci 21762306a36Sopenharmony_cistatic void bpf_selem_free_trace_rcu(struct rcu_head *rcu) 21862306a36Sopenharmony_ci{ 21962306a36Sopenharmony_ci if (rcu_trace_implies_rcu_gp()) 22062306a36Sopenharmony_ci bpf_selem_free_rcu(rcu); 22162306a36Sopenharmony_ci else 22262306a36Sopenharmony_ci call_rcu(rcu, bpf_selem_free_rcu); 22362306a36Sopenharmony_ci} 22462306a36Sopenharmony_ci 22562306a36Sopenharmony_civoid bpf_selem_free(struct bpf_local_storage_elem *selem, 22662306a36Sopenharmony_ci struct bpf_local_storage_map *smap, 22762306a36Sopenharmony_ci bool reuse_now) 22862306a36Sopenharmony_ci{ 22962306a36Sopenharmony_ci bpf_obj_free_fields(smap->map.record, SDATA(selem)->data); 23062306a36Sopenharmony_ci 23162306a36Sopenharmony_ci if (!smap->bpf_ma) { 23262306a36Sopenharmony_ci __bpf_selem_free(selem, reuse_now); 23362306a36Sopenharmony_ci return; 23462306a36Sopenharmony_ci } 23562306a36Sopenharmony_ci 23662306a36Sopenharmony_ci if (!reuse_now) { 23762306a36Sopenharmony_ci call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu); 23862306a36Sopenharmony_ci } else { 23962306a36Sopenharmony_ci /* Instead of using the vanilla call_rcu(), 24062306a36Sopenharmony_ci * bpf_mem_cache_free will be able to reuse selem 24162306a36Sopenharmony_ci * immediately. 24262306a36Sopenharmony_ci */ 24362306a36Sopenharmony_ci migrate_disable(); 24462306a36Sopenharmony_ci bpf_mem_cache_free(&smap->selem_ma, selem); 24562306a36Sopenharmony_ci migrate_enable(); 24662306a36Sopenharmony_ci } 24762306a36Sopenharmony_ci} 24862306a36Sopenharmony_ci 24962306a36Sopenharmony_ci/* local_storage->lock must be held and selem->local_storage == local_storage. 25062306a36Sopenharmony_ci * The caller must ensure selem->smap is still valid to be 25162306a36Sopenharmony_ci * dereferenced for its smap->elem_size and smap->cache_idx. 25262306a36Sopenharmony_ci */ 25362306a36Sopenharmony_cistatic bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage, 25462306a36Sopenharmony_ci struct bpf_local_storage_elem *selem, 25562306a36Sopenharmony_ci bool uncharge_mem, bool reuse_now) 25662306a36Sopenharmony_ci{ 25762306a36Sopenharmony_ci struct bpf_local_storage_map *smap; 25862306a36Sopenharmony_ci bool free_local_storage; 25962306a36Sopenharmony_ci void *owner; 26062306a36Sopenharmony_ci 26162306a36Sopenharmony_ci smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 26262306a36Sopenharmony_ci owner = local_storage->owner; 26362306a36Sopenharmony_ci 26462306a36Sopenharmony_ci /* All uncharging on the owner must be done first. 26562306a36Sopenharmony_ci * The owner may be freed once the last selem is unlinked 26662306a36Sopenharmony_ci * from local_storage. 26762306a36Sopenharmony_ci */ 26862306a36Sopenharmony_ci if (uncharge_mem) 26962306a36Sopenharmony_ci mem_uncharge(smap, owner, smap->elem_size); 27062306a36Sopenharmony_ci 27162306a36Sopenharmony_ci free_local_storage = hlist_is_singular_node(&selem->snode, 27262306a36Sopenharmony_ci &local_storage->list); 27362306a36Sopenharmony_ci if (free_local_storage) { 27462306a36Sopenharmony_ci mem_uncharge(smap, owner, sizeof(struct bpf_local_storage)); 27562306a36Sopenharmony_ci local_storage->owner = NULL; 27662306a36Sopenharmony_ci 27762306a36Sopenharmony_ci /* After this RCU_INIT, owner may be freed and cannot be used */ 27862306a36Sopenharmony_ci RCU_INIT_POINTER(*owner_storage(smap, owner), NULL); 27962306a36Sopenharmony_ci 28062306a36Sopenharmony_ci /* local_storage is not freed now. local_storage->lock is 28162306a36Sopenharmony_ci * still held and raw_spin_unlock_bh(&local_storage->lock) 28262306a36Sopenharmony_ci * will be done by the caller. 28362306a36Sopenharmony_ci * 28462306a36Sopenharmony_ci * Although the unlock will be done under 28562306a36Sopenharmony_ci * rcu_read_lock(), it is more intuitive to 28662306a36Sopenharmony_ci * read if the freeing of the storage is done 28762306a36Sopenharmony_ci * after the raw_spin_unlock_bh(&local_storage->lock). 28862306a36Sopenharmony_ci * 28962306a36Sopenharmony_ci * Hence, a "bool free_local_storage" is returned 29062306a36Sopenharmony_ci * to the caller which then calls then frees the storage after 29162306a36Sopenharmony_ci * all the RCU grace periods have expired. 29262306a36Sopenharmony_ci */ 29362306a36Sopenharmony_ci } 29462306a36Sopenharmony_ci hlist_del_init_rcu(&selem->snode); 29562306a36Sopenharmony_ci if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) == 29662306a36Sopenharmony_ci SDATA(selem)) 29762306a36Sopenharmony_ci RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL); 29862306a36Sopenharmony_ci 29962306a36Sopenharmony_ci bpf_selem_free(selem, smap, reuse_now); 30062306a36Sopenharmony_ci 30162306a36Sopenharmony_ci if (rcu_access_pointer(local_storage->smap) == smap) 30262306a36Sopenharmony_ci RCU_INIT_POINTER(local_storage->smap, NULL); 30362306a36Sopenharmony_ci 30462306a36Sopenharmony_ci return free_local_storage; 30562306a36Sopenharmony_ci} 30662306a36Sopenharmony_ci 30762306a36Sopenharmony_cistatic bool check_storage_bpf_ma(struct bpf_local_storage *local_storage, 30862306a36Sopenharmony_ci struct bpf_local_storage_map *storage_smap, 30962306a36Sopenharmony_ci struct bpf_local_storage_elem *selem) 31062306a36Sopenharmony_ci{ 31162306a36Sopenharmony_ci 31262306a36Sopenharmony_ci struct bpf_local_storage_map *selem_smap; 31362306a36Sopenharmony_ci 31462306a36Sopenharmony_ci /* local_storage->smap may be NULL. If it is, get the bpf_ma 31562306a36Sopenharmony_ci * from any selem in the local_storage->list. The bpf_ma of all 31662306a36Sopenharmony_ci * local_storage and selem should have the same value 31762306a36Sopenharmony_ci * for the same map type. 31862306a36Sopenharmony_ci * 31962306a36Sopenharmony_ci * If the local_storage->list is already empty, the caller will not 32062306a36Sopenharmony_ci * care about the bpf_ma value also because the caller is not 32162306a36Sopenharmony_ci * responsibile to free the local_storage. 32262306a36Sopenharmony_ci */ 32362306a36Sopenharmony_ci 32462306a36Sopenharmony_ci if (storage_smap) 32562306a36Sopenharmony_ci return storage_smap->bpf_ma; 32662306a36Sopenharmony_ci 32762306a36Sopenharmony_ci if (!selem) { 32862306a36Sopenharmony_ci struct hlist_node *n; 32962306a36Sopenharmony_ci 33062306a36Sopenharmony_ci n = rcu_dereference_check(hlist_first_rcu(&local_storage->list), 33162306a36Sopenharmony_ci bpf_rcu_lock_held()); 33262306a36Sopenharmony_ci if (!n) 33362306a36Sopenharmony_ci return false; 33462306a36Sopenharmony_ci 33562306a36Sopenharmony_ci selem = hlist_entry(n, struct bpf_local_storage_elem, snode); 33662306a36Sopenharmony_ci } 33762306a36Sopenharmony_ci selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 33862306a36Sopenharmony_ci 33962306a36Sopenharmony_ci return selem_smap->bpf_ma; 34062306a36Sopenharmony_ci} 34162306a36Sopenharmony_ci 34262306a36Sopenharmony_cistatic void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem, 34362306a36Sopenharmony_ci bool reuse_now) 34462306a36Sopenharmony_ci{ 34562306a36Sopenharmony_ci struct bpf_local_storage_map *storage_smap; 34662306a36Sopenharmony_ci struct bpf_local_storage *local_storage; 34762306a36Sopenharmony_ci bool bpf_ma, free_local_storage = false; 34862306a36Sopenharmony_ci unsigned long flags; 34962306a36Sopenharmony_ci 35062306a36Sopenharmony_ci if (unlikely(!selem_linked_to_storage_lockless(selem))) 35162306a36Sopenharmony_ci /* selem has already been unlinked from sk */ 35262306a36Sopenharmony_ci return; 35362306a36Sopenharmony_ci 35462306a36Sopenharmony_ci local_storage = rcu_dereference_check(selem->local_storage, 35562306a36Sopenharmony_ci bpf_rcu_lock_held()); 35662306a36Sopenharmony_ci storage_smap = rcu_dereference_check(local_storage->smap, 35762306a36Sopenharmony_ci bpf_rcu_lock_held()); 35862306a36Sopenharmony_ci bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem); 35962306a36Sopenharmony_ci 36062306a36Sopenharmony_ci raw_spin_lock_irqsave(&local_storage->lock, flags); 36162306a36Sopenharmony_ci if (likely(selem_linked_to_storage(selem))) 36262306a36Sopenharmony_ci free_local_storage = bpf_selem_unlink_storage_nolock( 36362306a36Sopenharmony_ci local_storage, selem, true, reuse_now); 36462306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&local_storage->lock, flags); 36562306a36Sopenharmony_ci 36662306a36Sopenharmony_ci if (free_local_storage) 36762306a36Sopenharmony_ci bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now); 36862306a36Sopenharmony_ci} 36962306a36Sopenharmony_ci 37062306a36Sopenharmony_civoid bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage, 37162306a36Sopenharmony_ci struct bpf_local_storage_elem *selem) 37262306a36Sopenharmony_ci{ 37362306a36Sopenharmony_ci RCU_INIT_POINTER(selem->local_storage, local_storage); 37462306a36Sopenharmony_ci hlist_add_head_rcu(&selem->snode, &local_storage->list); 37562306a36Sopenharmony_ci} 37662306a36Sopenharmony_ci 37762306a36Sopenharmony_cistatic void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem) 37862306a36Sopenharmony_ci{ 37962306a36Sopenharmony_ci struct bpf_local_storage_map *smap; 38062306a36Sopenharmony_ci struct bpf_local_storage_map_bucket *b; 38162306a36Sopenharmony_ci unsigned long flags; 38262306a36Sopenharmony_ci 38362306a36Sopenharmony_ci if (unlikely(!selem_linked_to_map_lockless(selem))) 38462306a36Sopenharmony_ci /* selem has already be unlinked from smap */ 38562306a36Sopenharmony_ci return; 38662306a36Sopenharmony_ci 38762306a36Sopenharmony_ci smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held()); 38862306a36Sopenharmony_ci b = select_bucket(smap, selem); 38962306a36Sopenharmony_ci raw_spin_lock_irqsave(&b->lock, flags); 39062306a36Sopenharmony_ci if (likely(selem_linked_to_map(selem))) 39162306a36Sopenharmony_ci hlist_del_init_rcu(&selem->map_node); 39262306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&b->lock, flags); 39362306a36Sopenharmony_ci} 39462306a36Sopenharmony_ci 39562306a36Sopenharmony_civoid bpf_selem_link_map(struct bpf_local_storage_map *smap, 39662306a36Sopenharmony_ci struct bpf_local_storage_elem *selem) 39762306a36Sopenharmony_ci{ 39862306a36Sopenharmony_ci struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem); 39962306a36Sopenharmony_ci unsigned long flags; 40062306a36Sopenharmony_ci 40162306a36Sopenharmony_ci raw_spin_lock_irqsave(&b->lock, flags); 40262306a36Sopenharmony_ci RCU_INIT_POINTER(SDATA(selem)->smap, smap); 40362306a36Sopenharmony_ci hlist_add_head_rcu(&selem->map_node, &b->list); 40462306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&b->lock, flags); 40562306a36Sopenharmony_ci} 40662306a36Sopenharmony_ci 40762306a36Sopenharmony_civoid bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now) 40862306a36Sopenharmony_ci{ 40962306a36Sopenharmony_ci /* Always unlink from map before unlinking from local_storage 41062306a36Sopenharmony_ci * because selem will be freed after successfully unlinked from 41162306a36Sopenharmony_ci * the local_storage. 41262306a36Sopenharmony_ci */ 41362306a36Sopenharmony_ci bpf_selem_unlink_map(selem); 41462306a36Sopenharmony_ci bpf_selem_unlink_storage(selem, reuse_now); 41562306a36Sopenharmony_ci} 41662306a36Sopenharmony_ci 41762306a36Sopenharmony_ci/* If cacheit_lockit is false, this lookup function is lockless */ 41862306a36Sopenharmony_cistruct bpf_local_storage_data * 41962306a36Sopenharmony_cibpf_local_storage_lookup(struct bpf_local_storage *local_storage, 42062306a36Sopenharmony_ci struct bpf_local_storage_map *smap, 42162306a36Sopenharmony_ci bool cacheit_lockit) 42262306a36Sopenharmony_ci{ 42362306a36Sopenharmony_ci struct bpf_local_storage_data *sdata; 42462306a36Sopenharmony_ci struct bpf_local_storage_elem *selem; 42562306a36Sopenharmony_ci 42662306a36Sopenharmony_ci /* Fast path (cache hit) */ 42762306a36Sopenharmony_ci sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx], 42862306a36Sopenharmony_ci bpf_rcu_lock_held()); 42962306a36Sopenharmony_ci if (sdata && rcu_access_pointer(sdata->smap) == smap) 43062306a36Sopenharmony_ci return sdata; 43162306a36Sopenharmony_ci 43262306a36Sopenharmony_ci /* Slow path (cache miss) */ 43362306a36Sopenharmony_ci hlist_for_each_entry_rcu(selem, &local_storage->list, snode, 43462306a36Sopenharmony_ci rcu_read_lock_trace_held()) 43562306a36Sopenharmony_ci if (rcu_access_pointer(SDATA(selem)->smap) == smap) 43662306a36Sopenharmony_ci break; 43762306a36Sopenharmony_ci 43862306a36Sopenharmony_ci if (!selem) 43962306a36Sopenharmony_ci return NULL; 44062306a36Sopenharmony_ci 44162306a36Sopenharmony_ci sdata = SDATA(selem); 44262306a36Sopenharmony_ci if (cacheit_lockit) { 44362306a36Sopenharmony_ci unsigned long flags; 44462306a36Sopenharmony_ci 44562306a36Sopenharmony_ci /* spinlock is needed to avoid racing with the 44662306a36Sopenharmony_ci * parallel delete. Otherwise, publishing an already 44762306a36Sopenharmony_ci * deleted sdata to the cache will become a use-after-free 44862306a36Sopenharmony_ci * problem in the next bpf_local_storage_lookup(). 44962306a36Sopenharmony_ci */ 45062306a36Sopenharmony_ci raw_spin_lock_irqsave(&local_storage->lock, flags); 45162306a36Sopenharmony_ci if (selem_linked_to_storage(selem)) 45262306a36Sopenharmony_ci rcu_assign_pointer(local_storage->cache[smap->cache_idx], 45362306a36Sopenharmony_ci sdata); 45462306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&local_storage->lock, flags); 45562306a36Sopenharmony_ci } 45662306a36Sopenharmony_ci 45762306a36Sopenharmony_ci return sdata; 45862306a36Sopenharmony_ci} 45962306a36Sopenharmony_ci 46062306a36Sopenharmony_cistatic int check_flags(const struct bpf_local_storage_data *old_sdata, 46162306a36Sopenharmony_ci u64 map_flags) 46262306a36Sopenharmony_ci{ 46362306a36Sopenharmony_ci if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST) 46462306a36Sopenharmony_ci /* elem already exists */ 46562306a36Sopenharmony_ci return -EEXIST; 46662306a36Sopenharmony_ci 46762306a36Sopenharmony_ci if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST) 46862306a36Sopenharmony_ci /* elem doesn't exist, cannot update it */ 46962306a36Sopenharmony_ci return -ENOENT; 47062306a36Sopenharmony_ci 47162306a36Sopenharmony_ci return 0; 47262306a36Sopenharmony_ci} 47362306a36Sopenharmony_ci 47462306a36Sopenharmony_ciint bpf_local_storage_alloc(void *owner, 47562306a36Sopenharmony_ci struct bpf_local_storage_map *smap, 47662306a36Sopenharmony_ci struct bpf_local_storage_elem *first_selem, 47762306a36Sopenharmony_ci gfp_t gfp_flags) 47862306a36Sopenharmony_ci{ 47962306a36Sopenharmony_ci struct bpf_local_storage *prev_storage, *storage; 48062306a36Sopenharmony_ci struct bpf_local_storage **owner_storage_ptr; 48162306a36Sopenharmony_ci int err; 48262306a36Sopenharmony_ci 48362306a36Sopenharmony_ci err = mem_charge(smap, owner, sizeof(*storage)); 48462306a36Sopenharmony_ci if (err) 48562306a36Sopenharmony_ci return err; 48662306a36Sopenharmony_ci 48762306a36Sopenharmony_ci if (smap->bpf_ma) { 48862306a36Sopenharmony_ci migrate_disable(); 48962306a36Sopenharmony_ci storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags); 49062306a36Sopenharmony_ci migrate_enable(); 49162306a36Sopenharmony_ci } else { 49262306a36Sopenharmony_ci storage = bpf_map_kzalloc(&smap->map, sizeof(*storage), 49362306a36Sopenharmony_ci gfp_flags | __GFP_NOWARN); 49462306a36Sopenharmony_ci } 49562306a36Sopenharmony_ci 49662306a36Sopenharmony_ci if (!storage) { 49762306a36Sopenharmony_ci err = -ENOMEM; 49862306a36Sopenharmony_ci goto uncharge; 49962306a36Sopenharmony_ci } 50062306a36Sopenharmony_ci 50162306a36Sopenharmony_ci RCU_INIT_POINTER(storage->smap, smap); 50262306a36Sopenharmony_ci INIT_HLIST_HEAD(&storage->list); 50362306a36Sopenharmony_ci raw_spin_lock_init(&storage->lock); 50462306a36Sopenharmony_ci storage->owner = owner; 50562306a36Sopenharmony_ci 50662306a36Sopenharmony_ci bpf_selem_link_storage_nolock(storage, first_selem); 50762306a36Sopenharmony_ci bpf_selem_link_map(smap, first_selem); 50862306a36Sopenharmony_ci 50962306a36Sopenharmony_ci owner_storage_ptr = 51062306a36Sopenharmony_ci (struct bpf_local_storage **)owner_storage(smap, owner); 51162306a36Sopenharmony_ci /* Publish storage to the owner. 51262306a36Sopenharmony_ci * Instead of using any lock of the kernel object (i.e. owner), 51362306a36Sopenharmony_ci * cmpxchg will work with any kernel object regardless what 51462306a36Sopenharmony_ci * the running context is, bh, irq...etc. 51562306a36Sopenharmony_ci * 51662306a36Sopenharmony_ci * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage) 51762306a36Sopenharmony_ci * is protected by the storage->lock. Hence, when freeing 51862306a36Sopenharmony_ci * the owner->storage, the storage->lock must be held before 51962306a36Sopenharmony_ci * setting owner->storage ptr to NULL. 52062306a36Sopenharmony_ci */ 52162306a36Sopenharmony_ci prev_storage = cmpxchg(owner_storage_ptr, NULL, storage); 52262306a36Sopenharmony_ci if (unlikely(prev_storage)) { 52362306a36Sopenharmony_ci bpf_selem_unlink_map(first_selem); 52462306a36Sopenharmony_ci err = -EAGAIN; 52562306a36Sopenharmony_ci goto uncharge; 52662306a36Sopenharmony_ci 52762306a36Sopenharmony_ci /* Note that even first_selem was linked to smap's 52862306a36Sopenharmony_ci * bucket->list, first_selem can be freed immediately 52962306a36Sopenharmony_ci * (instead of kfree_rcu) because 53062306a36Sopenharmony_ci * bpf_local_storage_map_free() does a 53162306a36Sopenharmony_ci * synchronize_rcu_mult (waiting for both sleepable and 53262306a36Sopenharmony_ci * normal programs) before walking the bucket->list. 53362306a36Sopenharmony_ci * Hence, no one is accessing selem from the 53462306a36Sopenharmony_ci * bucket->list under rcu_read_lock(). 53562306a36Sopenharmony_ci */ 53662306a36Sopenharmony_ci } 53762306a36Sopenharmony_ci 53862306a36Sopenharmony_ci return 0; 53962306a36Sopenharmony_ci 54062306a36Sopenharmony_ciuncharge: 54162306a36Sopenharmony_ci bpf_local_storage_free(storage, smap, smap->bpf_ma, true); 54262306a36Sopenharmony_ci mem_uncharge(smap, owner, sizeof(*storage)); 54362306a36Sopenharmony_ci return err; 54462306a36Sopenharmony_ci} 54562306a36Sopenharmony_ci 54662306a36Sopenharmony_ci/* sk cannot be going away because it is linking new elem 54762306a36Sopenharmony_ci * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0). 54862306a36Sopenharmony_ci * Otherwise, it will become a leak (and other memory issues 54962306a36Sopenharmony_ci * during map destruction). 55062306a36Sopenharmony_ci */ 55162306a36Sopenharmony_cistruct bpf_local_storage_data * 55262306a36Sopenharmony_cibpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap, 55362306a36Sopenharmony_ci void *value, u64 map_flags, gfp_t gfp_flags) 55462306a36Sopenharmony_ci{ 55562306a36Sopenharmony_ci struct bpf_local_storage_data *old_sdata = NULL; 55662306a36Sopenharmony_ci struct bpf_local_storage_elem *alloc_selem, *selem = NULL; 55762306a36Sopenharmony_ci struct bpf_local_storage *local_storage; 55862306a36Sopenharmony_ci unsigned long flags; 55962306a36Sopenharmony_ci int err; 56062306a36Sopenharmony_ci 56162306a36Sopenharmony_ci /* BPF_EXIST and BPF_NOEXIST cannot be both set */ 56262306a36Sopenharmony_ci if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) || 56362306a36Sopenharmony_ci /* BPF_F_LOCK can only be used in a value with spin_lock */ 56462306a36Sopenharmony_ci unlikely((map_flags & BPF_F_LOCK) && 56562306a36Sopenharmony_ci !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK))) 56662306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 56762306a36Sopenharmony_ci 56862306a36Sopenharmony_ci if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST) 56962306a36Sopenharmony_ci return ERR_PTR(-EINVAL); 57062306a36Sopenharmony_ci 57162306a36Sopenharmony_ci local_storage = rcu_dereference_check(*owner_storage(smap, owner), 57262306a36Sopenharmony_ci bpf_rcu_lock_held()); 57362306a36Sopenharmony_ci if (!local_storage || hlist_empty(&local_storage->list)) { 57462306a36Sopenharmony_ci /* Very first elem for the owner */ 57562306a36Sopenharmony_ci err = check_flags(NULL, map_flags); 57662306a36Sopenharmony_ci if (err) 57762306a36Sopenharmony_ci return ERR_PTR(err); 57862306a36Sopenharmony_ci 57962306a36Sopenharmony_ci selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); 58062306a36Sopenharmony_ci if (!selem) 58162306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 58262306a36Sopenharmony_ci 58362306a36Sopenharmony_ci err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags); 58462306a36Sopenharmony_ci if (err) { 58562306a36Sopenharmony_ci bpf_selem_free(selem, smap, true); 58662306a36Sopenharmony_ci mem_uncharge(smap, owner, smap->elem_size); 58762306a36Sopenharmony_ci return ERR_PTR(err); 58862306a36Sopenharmony_ci } 58962306a36Sopenharmony_ci 59062306a36Sopenharmony_ci return SDATA(selem); 59162306a36Sopenharmony_ci } 59262306a36Sopenharmony_ci 59362306a36Sopenharmony_ci if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) { 59462306a36Sopenharmony_ci /* Hoping to find an old_sdata to do inline update 59562306a36Sopenharmony_ci * such that it can avoid taking the local_storage->lock 59662306a36Sopenharmony_ci * and changing the lists. 59762306a36Sopenharmony_ci */ 59862306a36Sopenharmony_ci old_sdata = 59962306a36Sopenharmony_ci bpf_local_storage_lookup(local_storage, smap, false); 60062306a36Sopenharmony_ci err = check_flags(old_sdata, map_flags); 60162306a36Sopenharmony_ci if (err) 60262306a36Sopenharmony_ci return ERR_PTR(err); 60362306a36Sopenharmony_ci if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) { 60462306a36Sopenharmony_ci copy_map_value_locked(&smap->map, old_sdata->data, 60562306a36Sopenharmony_ci value, false); 60662306a36Sopenharmony_ci return old_sdata; 60762306a36Sopenharmony_ci } 60862306a36Sopenharmony_ci } 60962306a36Sopenharmony_ci 61062306a36Sopenharmony_ci /* A lookup has just been done before and concluded a new selem is 61162306a36Sopenharmony_ci * needed. The chance of an unnecessary alloc is unlikely. 61262306a36Sopenharmony_ci */ 61362306a36Sopenharmony_ci alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags); 61462306a36Sopenharmony_ci if (!alloc_selem) 61562306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 61662306a36Sopenharmony_ci 61762306a36Sopenharmony_ci raw_spin_lock_irqsave(&local_storage->lock, flags); 61862306a36Sopenharmony_ci 61962306a36Sopenharmony_ci /* Recheck local_storage->list under local_storage->lock */ 62062306a36Sopenharmony_ci if (unlikely(hlist_empty(&local_storage->list))) { 62162306a36Sopenharmony_ci /* A parallel del is happening and local_storage is going 62262306a36Sopenharmony_ci * away. It has just been checked before, so very 62362306a36Sopenharmony_ci * unlikely. Return instead of retry to keep things 62462306a36Sopenharmony_ci * simple. 62562306a36Sopenharmony_ci */ 62662306a36Sopenharmony_ci err = -EAGAIN; 62762306a36Sopenharmony_ci goto unlock; 62862306a36Sopenharmony_ci } 62962306a36Sopenharmony_ci 63062306a36Sopenharmony_ci old_sdata = bpf_local_storage_lookup(local_storage, smap, false); 63162306a36Sopenharmony_ci err = check_flags(old_sdata, map_flags); 63262306a36Sopenharmony_ci if (err) 63362306a36Sopenharmony_ci goto unlock; 63462306a36Sopenharmony_ci 63562306a36Sopenharmony_ci if (old_sdata && (map_flags & BPF_F_LOCK)) { 63662306a36Sopenharmony_ci copy_map_value_locked(&smap->map, old_sdata->data, value, 63762306a36Sopenharmony_ci false); 63862306a36Sopenharmony_ci selem = SELEM(old_sdata); 63962306a36Sopenharmony_ci goto unlock; 64062306a36Sopenharmony_ci } 64162306a36Sopenharmony_ci 64262306a36Sopenharmony_ci alloc_selem = NULL; 64362306a36Sopenharmony_ci /* First, link the new selem to the map */ 64462306a36Sopenharmony_ci bpf_selem_link_map(smap, selem); 64562306a36Sopenharmony_ci 64662306a36Sopenharmony_ci /* Second, link (and publish) the new selem to local_storage */ 64762306a36Sopenharmony_ci bpf_selem_link_storage_nolock(local_storage, selem); 64862306a36Sopenharmony_ci 64962306a36Sopenharmony_ci /* Third, remove old selem, SELEM(old_sdata) */ 65062306a36Sopenharmony_ci if (old_sdata) { 65162306a36Sopenharmony_ci bpf_selem_unlink_map(SELEM(old_sdata)); 65262306a36Sopenharmony_ci bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata), 65362306a36Sopenharmony_ci true, false); 65462306a36Sopenharmony_ci } 65562306a36Sopenharmony_ci 65662306a36Sopenharmony_ciunlock: 65762306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&local_storage->lock, flags); 65862306a36Sopenharmony_ci if (alloc_selem) { 65962306a36Sopenharmony_ci mem_uncharge(smap, owner, smap->elem_size); 66062306a36Sopenharmony_ci bpf_selem_free(alloc_selem, smap, true); 66162306a36Sopenharmony_ci } 66262306a36Sopenharmony_ci return err ? ERR_PTR(err) : SDATA(selem); 66362306a36Sopenharmony_ci} 66462306a36Sopenharmony_ci 66562306a36Sopenharmony_cistatic u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache) 66662306a36Sopenharmony_ci{ 66762306a36Sopenharmony_ci u64 min_usage = U64_MAX; 66862306a36Sopenharmony_ci u16 i, res = 0; 66962306a36Sopenharmony_ci 67062306a36Sopenharmony_ci spin_lock(&cache->idx_lock); 67162306a36Sopenharmony_ci 67262306a36Sopenharmony_ci for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) { 67362306a36Sopenharmony_ci if (cache->idx_usage_counts[i] < min_usage) { 67462306a36Sopenharmony_ci min_usage = cache->idx_usage_counts[i]; 67562306a36Sopenharmony_ci res = i; 67662306a36Sopenharmony_ci 67762306a36Sopenharmony_ci /* Found a free cache_idx */ 67862306a36Sopenharmony_ci if (!min_usage) 67962306a36Sopenharmony_ci break; 68062306a36Sopenharmony_ci } 68162306a36Sopenharmony_ci } 68262306a36Sopenharmony_ci cache->idx_usage_counts[res]++; 68362306a36Sopenharmony_ci 68462306a36Sopenharmony_ci spin_unlock(&cache->idx_lock); 68562306a36Sopenharmony_ci 68662306a36Sopenharmony_ci return res; 68762306a36Sopenharmony_ci} 68862306a36Sopenharmony_ci 68962306a36Sopenharmony_cistatic void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache, 69062306a36Sopenharmony_ci u16 idx) 69162306a36Sopenharmony_ci{ 69262306a36Sopenharmony_ci spin_lock(&cache->idx_lock); 69362306a36Sopenharmony_ci cache->idx_usage_counts[idx]--; 69462306a36Sopenharmony_ci spin_unlock(&cache->idx_lock); 69562306a36Sopenharmony_ci} 69662306a36Sopenharmony_ci 69762306a36Sopenharmony_ciint bpf_local_storage_map_alloc_check(union bpf_attr *attr) 69862306a36Sopenharmony_ci{ 69962306a36Sopenharmony_ci if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK || 70062306a36Sopenharmony_ci !(attr->map_flags & BPF_F_NO_PREALLOC) || 70162306a36Sopenharmony_ci attr->max_entries || 70262306a36Sopenharmony_ci attr->key_size != sizeof(int) || !attr->value_size || 70362306a36Sopenharmony_ci /* Enforce BTF for userspace sk dumping */ 70462306a36Sopenharmony_ci !attr->btf_key_type_id || !attr->btf_value_type_id) 70562306a36Sopenharmony_ci return -EINVAL; 70662306a36Sopenharmony_ci 70762306a36Sopenharmony_ci if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE) 70862306a36Sopenharmony_ci return -E2BIG; 70962306a36Sopenharmony_ci 71062306a36Sopenharmony_ci return 0; 71162306a36Sopenharmony_ci} 71262306a36Sopenharmony_ci 71362306a36Sopenharmony_ciint bpf_local_storage_map_check_btf(const struct bpf_map *map, 71462306a36Sopenharmony_ci const struct btf *btf, 71562306a36Sopenharmony_ci const struct btf_type *key_type, 71662306a36Sopenharmony_ci const struct btf_type *value_type) 71762306a36Sopenharmony_ci{ 71862306a36Sopenharmony_ci u32 int_data; 71962306a36Sopenharmony_ci 72062306a36Sopenharmony_ci if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) 72162306a36Sopenharmony_ci return -EINVAL; 72262306a36Sopenharmony_ci 72362306a36Sopenharmony_ci int_data = *(u32 *)(key_type + 1); 72462306a36Sopenharmony_ci if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data)) 72562306a36Sopenharmony_ci return -EINVAL; 72662306a36Sopenharmony_ci 72762306a36Sopenharmony_ci return 0; 72862306a36Sopenharmony_ci} 72962306a36Sopenharmony_ci 73062306a36Sopenharmony_civoid bpf_local_storage_destroy(struct bpf_local_storage *local_storage) 73162306a36Sopenharmony_ci{ 73262306a36Sopenharmony_ci struct bpf_local_storage_map *storage_smap; 73362306a36Sopenharmony_ci struct bpf_local_storage_elem *selem; 73462306a36Sopenharmony_ci bool bpf_ma, free_storage = false; 73562306a36Sopenharmony_ci struct hlist_node *n; 73662306a36Sopenharmony_ci unsigned long flags; 73762306a36Sopenharmony_ci 73862306a36Sopenharmony_ci storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held()); 73962306a36Sopenharmony_ci bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL); 74062306a36Sopenharmony_ci 74162306a36Sopenharmony_ci /* Neither the bpf_prog nor the bpf_map's syscall 74262306a36Sopenharmony_ci * could be modifying the local_storage->list now. 74362306a36Sopenharmony_ci * Thus, no elem can be added to or deleted from the 74462306a36Sopenharmony_ci * local_storage->list by the bpf_prog or by the bpf_map's syscall. 74562306a36Sopenharmony_ci * 74662306a36Sopenharmony_ci * It is racing with bpf_local_storage_map_free() alone 74762306a36Sopenharmony_ci * when unlinking elem from the local_storage->list and 74862306a36Sopenharmony_ci * the map's bucket->list. 74962306a36Sopenharmony_ci */ 75062306a36Sopenharmony_ci raw_spin_lock_irqsave(&local_storage->lock, flags); 75162306a36Sopenharmony_ci hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) { 75262306a36Sopenharmony_ci /* Always unlink from map before unlinking from 75362306a36Sopenharmony_ci * local_storage. 75462306a36Sopenharmony_ci */ 75562306a36Sopenharmony_ci bpf_selem_unlink_map(selem); 75662306a36Sopenharmony_ci /* If local_storage list has only one element, the 75762306a36Sopenharmony_ci * bpf_selem_unlink_storage_nolock() will return true. 75862306a36Sopenharmony_ci * Otherwise, it will return false. The current loop iteration 75962306a36Sopenharmony_ci * intends to remove all local storage. So the last iteration 76062306a36Sopenharmony_ci * of the loop will set the free_cgroup_storage to true. 76162306a36Sopenharmony_ci */ 76262306a36Sopenharmony_ci free_storage = bpf_selem_unlink_storage_nolock( 76362306a36Sopenharmony_ci local_storage, selem, true, true); 76462306a36Sopenharmony_ci } 76562306a36Sopenharmony_ci raw_spin_unlock_irqrestore(&local_storage->lock, flags); 76662306a36Sopenharmony_ci 76762306a36Sopenharmony_ci if (free_storage) 76862306a36Sopenharmony_ci bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true); 76962306a36Sopenharmony_ci} 77062306a36Sopenharmony_ci 77162306a36Sopenharmony_ciu64 bpf_local_storage_map_mem_usage(const struct bpf_map *map) 77262306a36Sopenharmony_ci{ 77362306a36Sopenharmony_ci struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map; 77462306a36Sopenharmony_ci u64 usage = sizeof(*smap); 77562306a36Sopenharmony_ci 77662306a36Sopenharmony_ci /* The dynamically callocated selems are not counted currently. */ 77762306a36Sopenharmony_ci usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log); 77862306a36Sopenharmony_ci return usage; 77962306a36Sopenharmony_ci} 78062306a36Sopenharmony_ci 78162306a36Sopenharmony_ci/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory. 78262306a36Sopenharmony_ci * A deadlock free allocator is useful for storage that the bpf prog can easily 78362306a36Sopenharmony_ci * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf. 78462306a36Sopenharmony_ci * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses 78562306a36Sopenharmony_ci * memory immediately. To be reuse-immediate safe, the owner destruction 78662306a36Sopenharmony_ci * code path needs to go through a rcu grace period before calling 78762306a36Sopenharmony_ci * bpf_local_storage_destroy(). 78862306a36Sopenharmony_ci * 78962306a36Sopenharmony_ci * When bpf_ma == false, the kmalloc and kfree are used. 79062306a36Sopenharmony_ci */ 79162306a36Sopenharmony_cistruct bpf_map * 79262306a36Sopenharmony_cibpf_local_storage_map_alloc(union bpf_attr *attr, 79362306a36Sopenharmony_ci struct bpf_local_storage_cache *cache, 79462306a36Sopenharmony_ci bool bpf_ma) 79562306a36Sopenharmony_ci{ 79662306a36Sopenharmony_ci struct bpf_local_storage_map *smap; 79762306a36Sopenharmony_ci unsigned int i; 79862306a36Sopenharmony_ci u32 nbuckets; 79962306a36Sopenharmony_ci int err; 80062306a36Sopenharmony_ci 80162306a36Sopenharmony_ci smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE); 80262306a36Sopenharmony_ci if (!smap) 80362306a36Sopenharmony_ci return ERR_PTR(-ENOMEM); 80462306a36Sopenharmony_ci bpf_map_init_from_attr(&smap->map, attr); 80562306a36Sopenharmony_ci 80662306a36Sopenharmony_ci nbuckets = roundup_pow_of_two(num_possible_cpus()); 80762306a36Sopenharmony_ci /* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */ 80862306a36Sopenharmony_ci nbuckets = max_t(u32, 2, nbuckets); 80962306a36Sopenharmony_ci smap->bucket_log = ilog2(nbuckets); 81062306a36Sopenharmony_ci 81162306a36Sopenharmony_ci smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets), 81262306a36Sopenharmony_ci nbuckets, GFP_USER | __GFP_NOWARN); 81362306a36Sopenharmony_ci if (!smap->buckets) { 81462306a36Sopenharmony_ci err = -ENOMEM; 81562306a36Sopenharmony_ci goto free_smap; 81662306a36Sopenharmony_ci } 81762306a36Sopenharmony_ci 81862306a36Sopenharmony_ci for (i = 0; i < nbuckets; i++) { 81962306a36Sopenharmony_ci INIT_HLIST_HEAD(&smap->buckets[i].list); 82062306a36Sopenharmony_ci raw_spin_lock_init(&smap->buckets[i].lock); 82162306a36Sopenharmony_ci } 82262306a36Sopenharmony_ci 82362306a36Sopenharmony_ci smap->elem_size = offsetof(struct bpf_local_storage_elem, 82462306a36Sopenharmony_ci sdata.data[attr->value_size]); 82562306a36Sopenharmony_ci 82662306a36Sopenharmony_ci smap->bpf_ma = bpf_ma; 82762306a36Sopenharmony_ci if (bpf_ma) { 82862306a36Sopenharmony_ci err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false); 82962306a36Sopenharmony_ci if (err) 83062306a36Sopenharmony_ci goto free_smap; 83162306a36Sopenharmony_ci 83262306a36Sopenharmony_ci err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false); 83362306a36Sopenharmony_ci if (err) { 83462306a36Sopenharmony_ci bpf_mem_alloc_destroy(&smap->selem_ma); 83562306a36Sopenharmony_ci goto free_smap; 83662306a36Sopenharmony_ci } 83762306a36Sopenharmony_ci } 83862306a36Sopenharmony_ci 83962306a36Sopenharmony_ci smap->cache_idx = bpf_local_storage_cache_idx_get(cache); 84062306a36Sopenharmony_ci return &smap->map; 84162306a36Sopenharmony_ci 84262306a36Sopenharmony_cifree_smap: 84362306a36Sopenharmony_ci kvfree(smap->buckets); 84462306a36Sopenharmony_ci bpf_map_area_free(smap); 84562306a36Sopenharmony_ci return ERR_PTR(err); 84662306a36Sopenharmony_ci} 84762306a36Sopenharmony_ci 84862306a36Sopenharmony_civoid bpf_local_storage_map_free(struct bpf_map *map, 84962306a36Sopenharmony_ci struct bpf_local_storage_cache *cache, 85062306a36Sopenharmony_ci int __percpu *busy_counter) 85162306a36Sopenharmony_ci{ 85262306a36Sopenharmony_ci struct bpf_local_storage_map_bucket *b; 85362306a36Sopenharmony_ci struct bpf_local_storage_elem *selem; 85462306a36Sopenharmony_ci struct bpf_local_storage_map *smap; 85562306a36Sopenharmony_ci unsigned int i; 85662306a36Sopenharmony_ci 85762306a36Sopenharmony_ci smap = (struct bpf_local_storage_map *)map; 85862306a36Sopenharmony_ci bpf_local_storage_cache_idx_free(cache, smap->cache_idx); 85962306a36Sopenharmony_ci 86062306a36Sopenharmony_ci /* Note that this map might be concurrently cloned from 86162306a36Sopenharmony_ci * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone 86262306a36Sopenharmony_ci * RCU read section to finish before proceeding. New RCU 86362306a36Sopenharmony_ci * read sections should be prevented via bpf_map_inc_not_zero. 86462306a36Sopenharmony_ci */ 86562306a36Sopenharmony_ci synchronize_rcu(); 86662306a36Sopenharmony_ci 86762306a36Sopenharmony_ci /* bpf prog and the userspace can no longer access this map 86862306a36Sopenharmony_ci * now. No new selem (of this map) can be added 86962306a36Sopenharmony_ci * to the owner->storage or to the map bucket's list. 87062306a36Sopenharmony_ci * 87162306a36Sopenharmony_ci * The elem of this map can be cleaned up here 87262306a36Sopenharmony_ci * or when the storage is freed e.g. 87362306a36Sopenharmony_ci * by bpf_sk_storage_free() during __sk_destruct(). 87462306a36Sopenharmony_ci */ 87562306a36Sopenharmony_ci for (i = 0; i < (1U << smap->bucket_log); i++) { 87662306a36Sopenharmony_ci b = &smap->buckets[i]; 87762306a36Sopenharmony_ci 87862306a36Sopenharmony_ci rcu_read_lock(); 87962306a36Sopenharmony_ci /* No one is adding to b->list now */ 88062306a36Sopenharmony_ci while ((selem = hlist_entry_safe( 88162306a36Sopenharmony_ci rcu_dereference_raw(hlist_first_rcu(&b->list)), 88262306a36Sopenharmony_ci struct bpf_local_storage_elem, map_node))) { 88362306a36Sopenharmony_ci if (busy_counter) { 88462306a36Sopenharmony_ci migrate_disable(); 88562306a36Sopenharmony_ci this_cpu_inc(*busy_counter); 88662306a36Sopenharmony_ci } 88762306a36Sopenharmony_ci bpf_selem_unlink(selem, true); 88862306a36Sopenharmony_ci if (busy_counter) { 88962306a36Sopenharmony_ci this_cpu_dec(*busy_counter); 89062306a36Sopenharmony_ci migrate_enable(); 89162306a36Sopenharmony_ci } 89262306a36Sopenharmony_ci cond_resched_rcu(); 89362306a36Sopenharmony_ci } 89462306a36Sopenharmony_ci rcu_read_unlock(); 89562306a36Sopenharmony_ci } 89662306a36Sopenharmony_ci 89762306a36Sopenharmony_ci /* While freeing the storage we may still need to access the map. 89862306a36Sopenharmony_ci * 89962306a36Sopenharmony_ci * e.g. when bpf_sk_storage_free() has unlinked selem from the map 90062306a36Sopenharmony_ci * which then made the above while((selem = ...)) loop 90162306a36Sopenharmony_ci * exit immediately. 90262306a36Sopenharmony_ci * 90362306a36Sopenharmony_ci * However, while freeing the storage one still needs to access the 90462306a36Sopenharmony_ci * smap->elem_size to do the uncharging in 90562306a36Sopenharmony_ci * bpf_selem_unlink_storage_nolock(). 90662306a36Sopenharmony_ci * 90762306a36Sopenharmony_ci * Hence, wait another rcu grace period for the storage to be freed. 90862306a36Sopenharmony_ci */ 90962306a36Sopenharmony_ci synchronize_rcu(); 91062306a36Sopenharmony_ci 91162306a36Sopenharmony_ci if (smap->bpf_ma) { 91262306a36Sopenharmony_ci bpf_mem_alloc_destroy(&smap->selem_ma); 91362306a36Sopenharmony_ci bpf_mem_alloc_destroy(&smap->storage_ma); 91462306a36Sopenharmony_ci } 91562306a36Sopenharmony_ci kvfree(smap->buckets); 91662306a36Sopenharmony_ci bpf_map_area_free(smap); 91762306a36Sopenharmony_ci} 918