162306a36Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
262306a36Sopenharmony_ci/* Copyright (c) 2019 Facebook  */
362306a36Sopenharmony_ci#include <linux/rculist.h>
462306a36Sopenharmony_ci#include <linux/list.h>
562306a36Sopenharmony_ci#include <linux/hash.h>
662306a36Sopenharmony_ci#include <linux/types.h>
762306a36Sopenharmony_ci#include <linux/spinlock.h>
862306a36Sopenharmony_ci#include <linux/bpf.h>
962306a36Sopenharmony_ci#include <linux/btf_ids.h>
1062306a36Sopenharmony_ci#include <linux/bpf_local_storage.h>
1162306a36Sopenharmony_ci#include <net/sock.h>
1262306a36Sopenharmony_ci#include <uapi/linux/sock_diag.h>
1362306a36Sopenharmony_ci#include <uapi/linux/btf.h>
1462306a36Sopenharmony_ci#include <linux/rcupdate.h>
1562306a36Sopenharmony_ci#include <linux/rcupdate_trace.h>
1662306a36Sopenharmony_ci#include <linux/rcupdate_wait.h>
1762306a36Sopenharmony_ci
1862306a36Sopenharmony_ci#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
1962306a36Sopenharmony_ci
2062306a36Sopenharmony_cistatic struct bpf_local_storage_map_bucket *
2162306a36Sopenharmony_ciselect_bucket(struct bpf_local_storage_map *smap,
2262306a36Sopenharmony_ci	      struct bpf_local_storage_elem *selem)
2362306a36Sopenharmony_ci{
2462306a36Sopenharmony_ci	return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
2562306a36Sopenharmony_ci}
2662306a36Sopenharmony_ci
2762306a36Sopenharmony_cistatic int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
2862306a36Sopenharmony_ci{
2962306a36Sopenharmony_ci	struct bpf_map *map = &smap->map;
3062306a36Sopenharmony_ci
3162306a36Sopenharmony_ci	if (!map->ops->map_local_storage_charge)
3262306a36Sopenharmony_ci		return 0;
3362306a36Sopenharmony_ci
3462306a36Sopenharmony_ci	return map->ops->map_local_storage_charge(smap, owner, size);
3562306a36Sopenharmony_ci}
3662306a36Sopenharmony_ci
3762306a36Sopenharmony_cistatic void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
3862306a36Sopenharmony_ci			 u32 size)
3962306a36Sopenharmony_ci{
4062306a36Sopenharmony_ci	struct bpf_map *map = &smap->map;
4162306a36Sopenharmony_ci
4262306a36Sopenharmony_ci	if (map->ops->map_local_storage_uncharge)
4362306a36Sopenharmony_ci		map->ops->map_local_storage_uncharge(smap, owner, size);
4462306a36Sopenharmony_ci}
4562306a36Sopenharmony_ci
4662306a36Sopenharmony_cistatic struct bpf_local_storage __rcu **
4762306a36Sopenharmony_ciowner_storage(struct bpf_local_storage_map *smap, void *owner)
4862306a36Sopenharmony_ci{
4962306a36Sopenharmony_ci	struct bpf_map *map = &smap->map;
5062306a36Sopenharmony_ci
5162306a36Sopenharmony_ci	return map->ops->map_owner_storage_ptr(owner);
5262306a36Sopenharmony_ci}
5362306a36Sopenharmony_ci
5462306a36Sopenharmony_cistatic bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem)
5562306a36Sopenharmony_ci{
5662306a36Sopenharmony_ci	return !hlist_unhashed_lockless(&selem->snode);
5762306a36Sopenharmony_ci}
5862306a36Sopenharmony_ci
5962306a36Sopenharmony_cistatic bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
6062306a36Sopenharmony_ci{
6162306a36Sopenharmony_ci	return !hlist_unhashed(&selem->snode);
6262306a36Sopenharmony_ci}
6362306a36Sopenharmony_ci
6462306a36Sopenharmony_cistatic bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
6562306a36Sopenharmony_ci{
6662306a36Sopenharmony_ci	return !hlist_unhashed_lockless(&selem->map_node);
6762306a36Sopenharmony_ci}
6862306a36Sopenharmony_ci
6962306a36Sopenharmony_cistatic bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
7062306a36Sopenharmony_ci{
7162306a36Sopenharmony_ci	return !hlist_unhashed(&selem->map_node);
7262306a36Sopenharmony_ci}
7362306a36Sopenharmony_ci
7462306a36Sopenharmony_cistruct bpf_local_storage_elem *
7562306a36Sopenharmony_cibpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
7662306a36Sopenharmony_ci		void *value, bool charge_mem, gfp_t gfp_flags)
7762306a36Sopenharmony_ci{
7862306a36Sopenharmony_ci	struct bpf_local_storage_elem *selem;
7962306a36Sopenharmony_ci
8062306a36Sopenharmony_ci	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
8162306a36Sopenharmony_ci		return NULL;
8262306a36Sopenharmony_ci
8362306a36Sopenharmony_ci	if (smap->bpf_ma) {
8462306a36Sopenharmony_ci		migrate_disable();
8562306a36Sopenharmony_ci		selem = bpf_mem_cache_alloc_flags(&smap->selem_ma, gfp_flags);
8662306a36Sopenharmony_ci		migrate_enable();
8762306a36Sopenharmony_ci		if (selem)
8862306a36Sopenharmony_ci			/* Keep the original bpf_map_kzalloc behavior
8962306a36Sopenharmony_ci			 * before started using the bpf_mem_cache_alloc.
9062306a36Sopenharmony_ci			 *
9162306a36Sopenharmony_ci			 * No need to use zero_map_value. The bpf_selem_free()
9262306a36Sopenharmony_ci			 * only does bpf_mem_cache_free when there is
9362306a36Sopenharmony_ci			 * no other bpf prog is using the selem.
9462306a36Sopenharmony_ci			 */
9562306a36Sopenharmony_ci			memset(SDATA(selem)->data, 0, smap->map.value_size);
9662306a36Sopenharmony_ci	} else {
9762306a36Sopenharmony_ci		selem = bpf_map_kzalloc(&smap->map, smap->elem_size,
9862306a36Sopenharmony_ci					gfp_flags | __GFP_NOWARN);
9962306a36Sopenharmony_ci	}
10062306a36Sopenharmony_ci
10162306a36Sopenharmony_ci	if (selem) {
10262306a36Sopenharmony_ci		if (value)
10362306a36Sopenharmony_ci			copy_map_value(&smap->map, SDATA(selem)->data, value);
10462306a36Sopenharmony_ci		/* No need to call check_and_init_map_value as memory is zero init */
10562306a36Sopenharmony_ci		return selem;
10662306a36Sopenharmony_ci	}
10762306a36Sopenharmony_ci
10862306a36Sopenharmony_ci	if (charge_mem)
10962306a36Sopenharmony_ci		mem_uncharge(smap, owner, smap->elem_size);
11062306a36Sopenharmony_ci
11162306a36Sopenharmony_ci	return NULL;
11262306a36Sopenharmony_ci}
11362306a36Sopenharmony_ci
11462306a36Sopenharmony_ci/* rcu tasks trace callback for bpf_ma == false */
11562306a36Sopenharmony_cistatic void __bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
11662306a36Sopenharmony_ci{
11762306a36Sopenharmony_ci	struct bpf_local_storage *local_storage;
11862306a36Sopenharmony_ci
11962306a36Sopenharmony_ci	/* If RCU Tasks Trace grace period implies RCU grace period, do
12062306a36Sopenharmony_ci	 * kfree(), else do kfree_rcu().
12162306a36Sopenharmony_ci	 */
12262306a36Sopenharmony_ci	local_storage = container_of(rcu, struct bpf_local_storage, rcu);
12362306a36Sopenharmony_ci	if (rcu_trace_implies_rcu_gp())
12462306a36Sopenharmony_ci		kfree(local_storage);
12562306a36Sopenharmony_ci	else
12662306a36Sopenharmony_ci		kfree_rcu(local_storage, rcu);
12762306a36Sopenharmony_ci}
12862306a36Sopenharmony_ci
12962306a36Sopenharmony_cistatic void bpf_local_storage_free_rcu(struct rcu_head *rcu)
13062306a36Sopenharmony_ci{
13162306a36Sopenharmony_ci	struct bpf_local_storage *local_storage;
13262306a36Sopenharmony_ci
13362306a36Sopenharmony_ci	local_storage = container_of(rcu, struct bpf_local_storage, rcu);
13462306a36Sopenharmony_ci	bpf_mem_cache_raw_free(local_storage);
13562306a36Sopenharmony_ci}
13662306a36Sopenharmony_ci
13762306a36Sopenharmony_cistatic void bpf_local_storage_free_trace_rcu(struct rcu_head *rcu)
13862306a36Sopenharmony_ci{
13962306a36Sopenharmony_ci	if (rcu_trace_implies_rcu_gp())
14062306a36Sopenharmony_ci		bpf_local_storage_free_rcu(rcu);
14162306a36Sopenharmony_ci	else
14262306a36Sopenharmony_ci		call_rcu(rcu, bpf_local_storage_free_rcu);
14362306a36Sopenharmony_ci}
14462306a36Sopenharmony_ci
14562306a36Sopenharmony_ci/* Handle bpf_ma == false */
14662306a36Sopenharmony_cistatic void __bpf_local_storage_free(struct bpf_local_storage *local_storage,
14762306a36Sopenharmony_ci				     bool vanilla_rcu)
14862306a36Sopenharmony_ci{
14962306a36Sopenharmony_ci	if (vanilla_rcu)
15062306a36Sopenharmony_ci		kfree_rcu(local_storage, rcu);
15162306a36Sopenharmony_ci	else
15262306a36Sopenharmony_ci		call_rcu_tasks_trace(&local_storage->rcu,
15362306a36Sopenharmony_ci				     __bpf_local_storage_free_trace_rcu);
15462306a36Sopenharmony_ci}
15562306a36Sopenharmony_ci
15662306a36Sopenharmony_cistatic void bpf_local_storage_free(struct bpf_local_storage *local_storage,
15762306a36Sopenharmony_ci				   struct bpf_local_storage_map *smap,
15862306a36Sopenharmony_ci				   bool bpf_ma, bool reuse_now)
15962306a36Sopenharmony_ci{
16062306a36Sopenharmony_ci	if (!local_storage)
16162306a36Sopenharmony_ci		return;
16262306a36Sopenharmony_ci
16362306a36Sopenharmony_ci	if (!bpf_ma) {
16462306a36Sopenharmony_ci		__bpf_local_storage_free(local_storage, reuse_now);
16562306a36Sopenharmony_ci		return;
16662306a36Sopenharmony_ci	}
16762306a36Sopenharmony_ci
16862306a36Sopenharmony_ci	if (!reuse_now) {
16962306a36Sopenharmony_ci		call_rcu_tasks_trace(&local_storage->rcu,
17062306a36Sopenharmony_ci				     bpf_local_storage_free_trace_rcu);
17162306a36Sopenharmony_ci		return;
17262306a36Sopenharmony_ci	}
17362306a36Sopenharmony_ci
17462306a36Sopenharmony_ci	if (smap) {
17562306a36Sopenharmony_ci		migrate_disable();
17662306a36Sopenharmony_ci		bpf_mem_cache_free(&smap->storage_ma, local_storage);
17762306a36Sopenharmony_ci		migrate_enable();
17862306a36Sopenharmony_ci	} else {
17962306a36Sopenharmony_ci		/* smap could be NULL if the selem that triggered
18062306a36Sopenharmony_ci		 * this 'local_storage' creation had been long gone.
18162306a36Sopenharmony_ci		 * In this case, directly do call_rcu().
18262306a36Sopenharmony_ci		 */
18362306a36Sopenharmony_ci		call_rcu(&local_storage->rcu, bpf_local_storage_free_rcu);
18462306a36Sopenharmony_ci	}
18562306a36Sopenharmony_ci}
18662306a36Sopenharmony_ci
18762306a36Sopenharmony_ci/* rcu tasks trace callback for bpf_ma == false */
18862306a36Sopenharmony_cistatic void __bpf_selem_free_trace_rcu(struct rcu_head *rcu)
18962306a36Sopenharmony_ci{
19062306a36Sopenharmony_ci	struct bpf_local_storage_elem *selem;
19162306a36Sopenharmony_ci
19262306a36Sopenharmony_ci	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
19362306a36Sopenharmony_ci	if (rcu_trace_implies_rcu_gp())
19462306a36Sopenharmony_ci		kfree(selem);
19562306a36Sopenharmony_ci	else
19662306a36Sopenharmony_ci		kfree_rcu(selem, rcu);
19762306a36Sopenharmony_ci}
19862306a36Sopenharmony_ci
19962306a36Sopenharmony_ci/* Handle bpf_ma == false */
20062306a36Sopenharmony_cistatic void __bpf_selem_free(struct bpf_local_storage_elem *selem,
20162306a36Sopenharmony_ci			     bool vanilla_rcu)
20262306a36Sopenharmony_ci{
20362306a36Sopenharmony_ci	if (vanilla_rcu)
20462306a36Sopenharmony_ci		kfree_rcu(selem, rcu);
20562306a36Sopenharmony_ci	else
20662306a36Sopenharmony_ci		call_rcu_tasks_trace(&selem->rcu, __bpf_selem_free_trace_rcu);
20762306a36Sopenharmony_ci}
20862306a36Sopenharmony_ci
20962306a36Sopenharmony_cistatic void bpf_selem_free_rcu(struct rcu_head *rcu)
21062306a36Sopenharmony_ci{
21162306a36Sopenharmony_ci	struct bpf_local_storage_elem *selem;
21262306a36Sopenharmony_ci
21362306a36Sopenharmony_ci	selem = container_of(rcu, struct bpf_local_storage_elem, rcu);
21462306a36Sopenharmony_ci	bpf_mem_cache_raw_free(selem);
21562306a36Sopenharmony_ci}
21662306a36Sopenharmony_ci
21762306a36Sopenharmony_cistatic void bpf_selem_free_trace_rcu(struct rcu_head *rcu)
21862306a36Sopenharmony_ci{
21962306a36Sopenharmony_ci	if (rcu_trace_implies_rcu_gp())
22062306a36Sopenharmony_ci		bpf_selem_free_rcu(rcu);
22162306a36Sopenharmony_ci	else
22262306a36Sopenharmony_ci		call_rcu(rcu, bpf_selem_free_rcu);
22362306a36Sopenharmony_ci}
22462306a36Sopenharmony_ci
22562306a36Sopenharmony_civoid bpf_selem_free(struct bpf_local_storage_elem *selem,
22662306a36Sopenharmony_ci		    struct bpf_local_storage_map *smap,
22762306a36Sopenharmony_ci		    bool reuse_now)
22862306a36Sopenharmony_ci{
22962306a36Sopenharmony_ci	bpf_obj_free_fields(smap->map.record, SDATA(selem)->data);
23062306a36Sopenharmony_ci
23162306a36Sopenharmony_ci	if (!smap->bpf_ma) {
23262306a36Sopenharmony_ci		__bpf_selem_free(selem, reuse_now);
23362306a36Sopenharmony_ci		return;
23462306a36Sopenharmony_ci	}
23562306a36Sopenharmony_ci
23662306a36Sopenharmony_ci	if (!reuse_now) {
23762306a36Sopenharmony_ci		call_rcu_tasks_trace(&selem->rcu, bpf_selem_free_trace_rcu);
23862306a36Sopenharmony_ci	} else {
23962306a36Sopenharmony_ci		/* Instead of using the vanilla call_rcu(),
24062306a36Sopenharmony_ci		 * bpf_mem_cache_free will be able to reuse selem
24162306a36Sopenharmony_ci		 * immediately.
24262306a36Sopenharmony_ci		 */
24362306a36Sopenharmony_ci		migrate_disable();
24462306a36Sopenharmony_ci		bpf_mem_cache_free(&smap->selem_ma, selem);
24562306a36Sopenharmony_ci		migrate_enable();
24662306a36Sopenharmony_ci	}
24762306a36Sopenharmony_ci}
24862306a36Sopenharmony_ci
24962306a36Sopenharmony_ci/* local_storage->lock must be held and selem->local_storage == local_storage.
25062306a36Sopenharmony_ci * The caller must ensure selem->smap is still valid to be
25162306a36Sopenharmony_ci * dereferenced for its smap->elem_size and smap->cache_idx.
25262306a36Sopenharmony_ci */
25362306a36Sopenharmony_cistatic bool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
25462306a36Sopenharmony_ci					    struct bpf_local_storage_elem *selem,
25562306a36Sopenharmony_ci					    bool uncharge_mem, bool reuse_now)
25662306a36Sopenharmony_ci{
25762306a36Sopenharmony_ci	struct bpf_local_storage_map *smap;
25862306a36Sopenharmony_ci	bool free_local_storage;
25962306a36Sopenharmony_ci	void *owner;
26062306a36Sopenharmony_ci
26162306a36Sopenharmony_ci	smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
26262306a36Sopenharmony_ci	owner = local_storage->owner;
26362306a36Sopenharmony_ci
26462306a36Sopenharmony_ci	/* All uncharging on the owner must be done first.
26562306a36Sopenharmony_ci	 * The owner may be freed once the last selem is unlinked
26662306a36Sopenharmony_ci	 * from local_storage.
26762306a36Sopenharmony_ci	 */
26862306a36Sopenharmony_ci	if (uncharge_mem)
26962306a36Sopenharmony_ci		mem_uncharge(smap, owner, smap->elem_size);
27062306a36Sopenharmony_ci
27162306a36Sopenharmony_ci	free_local_storage = hlist_is_singular_node(&selem->snode,
27262306a36Sopenharmony_ci						    &local_storage->list);
27362306a36Sopenharmony_ci	if (free_local_storage) {
27462306a36Sopenharmony_ci		mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
27562306a36Sopenharmony_ci		local_storage->owner = NULL;
27662306a36Sopenharmony_ci
27762306a36Sopenharmony_ci		/* After this RCU_INIT, owner may be freed and cannot be used */
27862306a36Sopenharmony_ci		RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
27962306a36Sopenharmony_ci
28062306a36Sopenharmony_ci		/* local_storage is not freed now.  local_storage->lock is
28162306a36Sopenharmony_ci		 * still held and raw_spin_unlock_bh(&local_storage->lock)
28262306a36Sopenharmony_ci		 * will be done by the caller.
28362306a36Sopenharmony_ci		 *
28462306a36Sopenharmony_ci		 * Although the unlock will be done under
28562306a36Sopenharmony_ci		 * rcu_read_lock(),  it is more intuitive to
28662306a36Sopenharmony_ci		 * read if the freeing of the storage is done
28762306a36Sopenharmony_ci		 * after the raw_spin_unlock_bh(&local_storage->lock).
28862306a36Sopenharmony_ci		 *
28962306a36Sopenharmony_ci		 * Hence, a "bool free_local_storage" is returned
29062306a36Sopenharmony_ci		 * to the caller which then calls then frees the storage after
29162306a36Sopenharmony_ci		 * all the RCU grace periods have expired.
29262306a36Sopenharmony_ci		 */
29362306a36Sopenharmony_ci	}
29462306a36Sopenharmony_ci	hlist_del_init_rcu(&selem->snode);
29562306a36Sopenharmony_ci	if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
29662306a36Sopenharmony_ci	    SDATA(selem))
29762306a36Sopenharmony_ci		RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
29862306a36Sopenharmony_ci
29962306a36Sopenharmony_ci	bpf_selem_free(selem, smap, reuse_now);
30062306a36Sopenharmony_ci
30162306a36Sopenharmony_ci	if (rcu_access_pointer(local_storage->smap) == smap)
30262306a36Sopenharmony_ci		RCU_INIT_POINTER(local_storage->smap, NULL);
30362306a36Sopenharmony_ci
30462306a36Sopenharmony_ci	return free_local_storage;
30562306a36Sopenharmony_ci}
30662306a36Sopenharmony_ci
30762306a36Sopenharmony_cistatic bool check_storage_bpf_ma(struct bpf_local_storage *local_storage,
30862306a36Sopenharmony_ci				 struct bpf_local_storage_map *storage_smap,
30962306a36Sopenharmony_ci				 struct bpf_local_storage_elem *selem)
31062306a36Sopenharmony_ci{
31162306a36Sopenharmony_ci
31262306a36Sopenharmony_ci	struct bpf_local_storage_map *selem_smap;
31362306a36Sopenharmony_ci
31462306a36Sopenharmony_ci	/* local_storage->smap may be NULL. If it is, get the bpf_ma
31562306a36Sopenharmony_ci	 * from any selem in the local_storage->list. The bpf_ma of all
31662306a36Sopenharmony_ci	 * local_storage and selem should have the same value
31762306a36Sopenharmony_ci	 * for the same map type.
31862306a36Sopenharmony_ci	 *
31962306a36Sopenharmony_ci	 * If the local_storage->list is already empty, the caller will not
32062306a36Sopenharmony_ci	 * care about the bpf_ma value also because the caller is not
32162306a36Sopenharmony_ci	 * responsibile to free the local_storage.
32262306a36Sopenharmony_ci	 */
32362306a36Sopenharmony_ci
32462306a36Sopenharmony_ci	if (storage_smap)
32562306a36Sopenharmony_ci		return storage_smap->bpf_ma;
32662306a36Sopenharmony_ci
32762306a36Sopenharmony_ci	if (!selem) {
32862306a36Sopenharmony_ci		struct hlist_node *n;
32962306a36Sopenharmony_ci
33062306a36Sopenharmony_ci		n = rcu_dereference_check(hlist_first_rcu(&local_storage->list),
33162306a36Sopenharmony_ci					  bpf_rcu_lock_held());
33262306a36Sopenharmony_ci		if (!n)
33362306a36Sopenharmony_ci			return false;
33462306a36Sopenharmony_ci
33562306a36Sopenharmony_ci		selem = hlist_entry(n, struct bpf_local_storage_elem, snode);
33662306a36Sopenharmony_ci	}
33762306a36Sopenharmony_ci	selem_smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
33862306a36Sopenharmony_ci
33962306a36Sopenharmony_ci	return selem_smap->bpf_ma;
34062306a36Sopenharmony_ci}
34162306a36Sopenharmony_ci
34262306a36Sopenharmony_cistatic void bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem,
34362306a36Sopenharmony_ci				     bool reuse_now)
34462306a36Sopenharmony_ci{
34562306a36Sopenharmony_ci	struct bpf_local_storage_map *storage_smap;
34662306a36Sopenharmony_ci	struct bpf_local_storage *local_storage;
34762306a36Sopenharmony_ci	bool bpf_ma, free_local_storage = false;
34862306a36Sopenharmony_ci	unsigned long flags;
34962306a36Sopenharmony_ci
35062306a36Sopenharmony_ci	if (unlikely(!selem_linked_to_storage_lockless(selem)))
35162306a36Sopenharmony_ci		/* selem has already been unlinked from sk */
35262306a36Sopenharmony_ci		return;
35362306a36Sopenharmony_ci
35462306a36Sopenharmony_ci	local_storage = rcu_dereference_check(selem->local_storage,
35562306a36Sopenharmony_ci					      bpf_rcu_lock_held());
35662306a36Sopenharmony_ci	storage_smap = rcu_dereference_check(local_storage->smap,
35762306a36Sopenharmony_ci					     bpf_rcu_lock_held());
35862306a36Sopenharmony_ci	bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, selem);
35962306a36Sopenharmony_ci
36062306a36Sopenharmony_ci	raw_spin_lock_irqsave(&local_storage->lock, flags);
36162306a36Sopenharmony_ci	if (likely(selem_linked_to_storage(selem)))
36262306a36Sopenharmony_ci		free_local_storage = bpf_selem_unlink_storage_nolock(
36362306a36Sopenharmony_ci			local_storage, selem, true, reuse_now);
36462306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
36562306a36Sopenharmony_ci
36662306a36Sopenharmony_ci	if (free_local_storage)
36762306a36Sopenharmony_ci		bpf_local_storage_free(local_storage, storage_smap, bpf_ma, reuse_now);
36862306a36Sopenharmony_ci}
36962306a36Sopenharmony_ci
37062306a36Sopenharmony_civoid bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
37162306a36Sopenharmony_ci				   struct bpf_local_storage_elem *selem)
37262306a36Sopenharmony_ci{
37362306a36Sopenharmony_ci	RCU_INIT_POINTER(selem->local_storage, local_storage);
37462306a36Sopenharmony_ci	hlist_add_head_rcu(&selem->snode, &local_storage->list);
37562306a36Sopenharmony_ci}
37662306a36Sopenharmony_ci
37762306a36Sopenharmony_cistatic void bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
37862306a36Sopenharmony_ci{
37962306a36Sopenharmony_ci	struct bpf_local_storage_map *smap;
38062306a36Sopenharmony_ci	struct bpf_local_storage_map_bucket *b;
38162306a36Sopenharmony_ci	unsigned long flags;
38262306a36Sopenharmony_ci
38362306a36Sopenharmony_ci	if (unlikely(!selem_linked_to_map_lockless(selem)))
38462306a36Sopenharmony_ci		/* selem has already be unlinked from smap */
38562306a36Sopenharmony_ci		return;
38662306a36Sopenharmony_ci
38762306a36Sopenharmony_ci	smap = rcu_dereference_check(SDATA(selem)->smap, bpf_rcu_lock_held());
38862306a36Sopenharmony_ci	b = select_bucket(smap, selem);
38962306a36Sopenharmony_ci	raw_spin_lock_irqsave(&b->lock, flags);
39062306a36Sopenharmony_ci	if (likely(selem_linked_to_map(selem)))
39162306a36Sopenharmony_ci		hlist_del_init_rcu(&selem->map_node);
39262306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&b->lock, flags);
39362306a36Sopenharmony_ci}
39462306a36Sopenharmony_ci
39562306a36Sopenharmony_civoid bpf_selem_link_map(struct bpf_local_storage_map *smap,
39662306a36Sopenharmony_ci			struct bpf_local_storage_elem *selem)
39762306a36Sopenharmony_ci{
39862306a36Sopenharmony_ci	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
39962306a36Sopenharmony_ci	unsigned long flags;
40062306a36Sopenharmony_ci
40162306a36Sopenharmony_ci	raw_spin_lock_irqsave(&b->lock, flags);
40262306a36Sopenharmony_ci	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
40362306a36Sopenharmony_ci	hlist_add_head_rcu(&selem->map_node, &b->list);
40462306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&b->lock, flags);
40562306a36Sopenharmony_ci}
40662306a36Sopenharmony_ci
40762306a36Sopenharmony_civoid bpf_selem_unlink(struct bpf_local_storage_elem *selem, bool reuse_now)
40862306a36Sopenharmony_ci{
40962306a36Sopenharmony_ci	/* Always unlink from map before unlinking from local_storage
41062306a36Sopenharmony_ci	 * because selem will be freed after successfully unlinked from
41162306a36Sopenharmony_ci	 * the local_storage.
41262306a36Sopenharmony_ci	 */
41362306a36Sopenharmony_ci	bpf_selem_unlink_map(selem);
41462306a36Sopenharmony_ci	bpf_selem_unlink_storage(selem, reuse_now);
41562306a36Sopenharmony_ci}
41662306a36Sopenharmony_ci
41762306a36Sopenharmony_ci/* If cacheit_lockit is false, this lookup function is lockless */
41862306a36Sopenharmony_cistruct bpf_local_storage_data *
41962306a36Sopenharmony_cibpf_local_storage_lookup(struct bpf_local_storage *local_storage,
42062306a36Sopenharmony_ci			 struct bpf_local_storage_map *smap,
42162306a36Sopenharmony_ci			 bool cacheit_lockit)
42262306a36Sopenharmony_ci{
42362306a36Sopenharmony_ci	struct bpf_local_storage_data *sdata;
42462306a36Sopenharmony_ci	struct bpf_local_storage_elem *selem;
42562306a36Sopenharmony_ci
42662306a36Sopenharmony_ci	/* Fast path (cache hit) */
42762306a36Sopenharmony_ci	sdata = rcu_dereference_check(local_storage->cache[smap->cache_idx],
42862306a36Sopenharmony_ci				      bpf_rcu_lock_held());
42962306a36Sopenharmony_ci	if (sdata && rcu_access_pointer(sdata->smap) == smap)
43062306a36Sopenharmony_ci		return sdata;
43162306a36Sopenharmony_ci
43262306a36Sopenharmony_ci	/* Slow path (cache miss) */
43362306a36Sopenharmony_ci	hlist_for_each_entry_rcu(selem, &local_storage->list, snode,
43462306a36Sopenharmony_ci				  rcu_read_lock_trace_held())
43562306a36Sopenharmony_ci		if (rcu_access_pointer(SDATA(selem)->smap) == smap)
43662306a36Sopenharmony_ci			break;
43762306a36Sopenharmony_ci
43862306a36Sopenharmony_ci	if (!selem)
43962306a36Sopenharmony_ci		return NULL;
44062306a36Sopenharmony_ci
44162306a36Sopenharmony_ci	sdata = SDATA(selem);
44262306a36Sopenharmony_ci	if (cacheit_lockit) {
44362306a36Sopenharmony_ci		unsigned long flags;
44462306a36Sopenharmony_ci
44562306a36Sopenharmony_ci		/* spinlock is needed to avoid racing with the
44662306a36Sopenharmony_ci		 * parallel delete.  Otherwise, publishing an already
44762306a36Sopenharmony_ci		 * deleted sdata to the cache will become a use-after-free
44862306a36Sopenharmony_ci		 * problem in the next bpf_local_storage_lookup().
44962306a36Sopenharmony_ci		 */
45062306a36Sopenharmony_ci		raw_spin_lock_irqsave(&local_storage->lock, flags);
45162306a36Sopenharmony_ci		if (selem_linked_to_storage(selem))
45262306a36Sopenharmony_ci			rcu_assign_pointer(local_storage->cache[smap->cache_idx],
45362306a36Sopenharmony_ci					   sdata);
45462306a36Sopenharmony_ci		raw_spin_unlock_irqrestore(&local_storage->lock, flags);
45562306a36Sopenharmony_ci	}
45662306a36Sopenharmony_ci
45762306a36Sopenharmony_ci	return sdata;
45862306a36Sopenharmony_ci}
45962306a36Sopenharmony_ci
46062306a36Sopenharmony_cistatic int check_flags(const struct bpf_local_storage_data *old_sdata,
46162306a36Sopenharmony_ci		       u64 map_flags)
46262306a36Sopenharmony_ci{
46362306a36Sopenharmony_ci	if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
46462306a36Sopenharmony_ci		/* elem already exists */
46562306a36Sopenharmony_ci		return -EEXIST;
46662306a36Sopenharmony_ci
46762306a36Sopenharmony_ci	if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
46862306a36Sopenharmony_ci		/* elem doesn't exist, cannot update it */
46962306a36Sopenharmony_ci		return -ENOENT;
47062306a36Sopenharmony_ci
47162306a36Sopenharmony_ci	return 0;
47262306a36Sopenharmony_ci}
47362306a36Sopenharmony_ci
47462306a36Sopenharmony_ciint bpf_local_storage_alloc(void *owner,
47562306a36Sopenharmony_ci			    struct bpf_local_storage_map *smap,
47662306a36Sopenharmony_ci			    struct bpf_local_storage_elem *first_selem,
47762306a36Sopenharmony_ci			    gfp_t gfp_flags)
47862306a36Sopenharmony_ci{
47962306a36Sopenharmony_ci	struct bpf_local_storage *prev_storage, *storage;
48062306a36Sopenharmony_ci	struct bpf_local_storage **owner_storage_ptr;
48162306a36Sopenharmony_ci	int err;
48262306a36Sopenharmony_ci
48362306a36Sopenharmony_ci	err = mem_charge(smap, owner, sizeof(*storage));
48462306a36Sopenharmony_ci	if (err)
48562306a36Sopenharmony_ci		return err;
48662306a36Sopenharmony_ci
48762306a36Sopenharmony_ci	if (smap->bpf_ma) {
48862306a36Sopenharmony_ci		migrate_disable();
48962306a36Sopenharmony_ci		storage = bpf_mem_cache_alloc_flags(&smap->storage_ma, gfp_flags);
49062306a36Sopenharmony_ci		migrate_enable();
49162306a36Sopenharmony_ci	} else {
49262306a36Sopenharmony_ci		storage = bpf_map_kzalloc(&smap->map, sizeof(*storage),
49362306a36Sopenharmony_ci					  gfp_flags | __GFP_NOWARN);
49462306a36Sopenharmony_ci	}
49562306a36Sopenharmony_ci
49662306a36Sopenharmony_ci	if (!storage) {
49762306a36Sopenharmony_ci		err = -ENOMEM;
49862306a36Sopenharmony_ci		goto uncharge;
49962306a36Sopenharmony_ci	}
50062306a36Sopenharmony_ci
50162306a36Sopenharmony_ci	RCU_INIT_POINTER(storage->smap, smap);
50262306a36Sopenharmony_ci	INIT_HLIST_HEAD(&storage->list);
50362306a36Sopenharmony_ci	raw_spin_lock_init(&storage->lock);
50462306a36Sopenharmony_ci	storage->owner = owner;
50562306a36Sopenharmony_ci
50662306a36Sopenharmony_ci	bpf_selem_link_storage_nolock(storage, first_selem);
50762306a36Sopenharmony_ci	bpf_selem_link_map(smap, first_selem);
50862306a36Sopenharmony_ci
50962306a36Sopenharmony_ci	owner_storage_ptr =
51062306a36Sopenharmony_ci		(struct bpf_local_storage **)owner_storage(smap, owner);
51162306a36Sopenharmony_ci	/* Publish storage to the owner.
51262306a36Sopenharmony_ci	 * Instead of using any lock of the kernel object (i.e. owner),
51362306a36Sopenharmony_ci	 * cmpxchg will work with any kernel object regardless what
51462306a36Sopenharmony_ci	 * the running context is, bh, irq...etc.
51562306a36Sopenharmony_ci	 *
51662306a36Sopenharmony_ci	 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
51762306a36Sopenharmony_ci	 * is protected by the storage->lock.  Hence, when freeing
51862306a36Sopenharmony_ci	 * the owner->storage, the storage->lock must be held before
51962306a36Sopenharmony_ci	 * setting owner->storage ptr to NULL.
52062306a36Sopenharmony_ci	 */
52162306a36Sopenharmony_ci	prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
52262306a36Sopenharmony_ci	if (unlikely(prev_storage)) {
52362306a36Sopenharmony_ci		bpf_selem_unlink_map(first_selem);
52462306a36Sopenharmony_ci		err = -EAGAIN;
52562306a36Sopenharmony_ci		goto uncharge;
52662306a36Sopenharmony_ci
52762306a36Sopenharmony_ci		/* Note that even first_selem was linked to smap's
52862306a36Sopenharmony_ci		 * bucket->list, first_selem can be freed immediately
52962306a36Sopenharmony_ci		 * (instead of kfree_rcu) because
53062306a36Sopenharmony_ci		 * bpf_local_storage_map_free() does a
53162306a36Sopenharmony_ci		 * synchronize_rcu_mult (waiting for both sleepable and
53262306a36Sopenharmony_ci		 * normal programs) before walking the bucket->list.
53362306a36Sopenharmony_ci		 * Hence, no one is accessing selem from the
53462306a36Sopenharmony_ci		 * bucket->list under rcu_read_lock().
53562306a36Sopenharmony_ci		 */
53662306a36Sopenharmony_ci	}
53762306a36Sopenharmony_ci
53862306a36Sopenharmony_ci	return 0;
53962306a36Sopenharmony_ci
54062306a36Sopenharmony_ciuncharge:
54162306a36Sopenharmony_ci	bpf_local_storage_free(storage, smap, smap->bpf_ma, true);
54262306a36Sopenharmony_ci	mem_uncharge(smap, owner, sizeof(*storage));
54362306a36Sopenharmony_ci	return err;
54462306a36Sopenharmony_ci}
54562306a36Sopenharmony_ci
54662306a36Sopenharmony_ci/* sk cannot be going away because it is linking new elem
54762306a36Sopenharmony_ci * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
54862306a36Sopenharmony_ci * Otherwise, it will become a leak (and other memory issues
54962306a36Sopenharmony_ci * during map destruction).
55062306a36Sopenharmony_ci */
55162306a36Sopenharmony_cistruct bpf_local_storage_data *
55262306a36Sopenharmony_cibpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
55362306a36Sopenharmony_ci			 void *value, u64 map_flags, gfp_t gfp_flags)
55462306a36Sopenharmony_ci{
55562306a36Sopenharmony_ci	struct bpf_local_storage_data *old_sdata = NULL;
55662306a36Sopenharmony_ci	struct bpf_local_storage_elem *alloc_selem, *selem = NULL;
55762306a36Sopenharmony_ci	struct bpf_local_storage *local_storage;
55862306a36Sopenharmony_ci	unsigned long flags;
55962306a36Sopenharmony_ci	int err;
56062306a36Sopenharmony_ci
56162306a36Sopenharmony_ci	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
56262306a36Sopenharmony_ci	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
56362306a36Sopenharmony_ci	    /* BPF_F_LOCK can only be used in a value with spin_lock */
56462306a36Sopenharmony_ci	    unlikely((map_flags & BPF_F_LOCK) &&
56562306a36Sopenharmony_ci		     !btf_record_has_field(smap->map.record, BPF_SPIN_LOCK)))
56662306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
56762306a36Sopenharmony_ci
56862306a36Sopenharmony_ci	if (gfp_flags == GFP_KERNEL && (map_flags & ~BPF_F_LOCK) != BPF_NOEXIST)
56962306a36Sopenharmony_ci		return ERR_PTR(-EINVAL);
57062306a36Sopenharmony_ci
57162306a36Sopenharmony_ci	local_storage = rcu_dereference_check(*owner_storage(smap, owner),
57262306a36Sopenharmony_ci					      bpf_rcu_lock_held());
57362306a36Sopenharmony_ci	if (!local_storage || hlist_empty(&local_storage->list)) {
57462306a36Sopenharmony_ci		/* Very first elem for the owner */
57562306a36Sopenharmony_ci		err = check_flags(NULL, map_flags);
57662306a36Sopenharmony_ci		if (err)
57762306a36Sopenharmony_ci			return ERR_PTR(err);
57862306a36Sopenharmony_ci
57962306a36Sopenharmony_ci		selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
58062306a36Sopenharmony_ci		if (!selem)
58162306a36Sopenharmony_ci			return ERR_PTR(-ENOMEM);
58262306a36Sopenharmony_ci
58362306a36Sopenharmony_ci		err = bpf_local_storage_alloc(owner, smap, selem, gfp_flags);
58462306a36Sopenharmony_ci		if (err) {
58562306a36Sopenharmony_ci			bpf_selem_free(selem, smap, true);
58662306a36Sopenharmony_ci			mem_uncharge(smap, owner, smap->elem_size);
58762306a36Sopenharmony_ci			return ERR_PTR(err);
58862306a36Sopenharmony_ci		}
58962306a36Sopenharmony_ci
59062306a36Sopenharmony_ci		return SDATA(selem);
59162306a36Sopenharmony_ci	}
59262306a36Sopenharmony_ci
59362306a36Sopenharmony_ci	if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
59462306a36Sopenharmony_ci		/* Hoping to find an old_sdata to do inline update
59562306a36Sopenharmony_ci		 * such that it can avoid taking the local_storage->lock
59662306a36Sopenharmony_ci		 * and changing the lists.
59762306a36Sopenharmony_ci		 */
59862306a36Sopenharmony_ci		old_sdata =
59962306a36Sopenharmony_ci			bpf_local_storage_lookup(local_storage, smap, false);
60062306a36Sopenharmony_ci		err = check_flags(old_sdata, map_flags);
60162306a36Sopenharmony_ci		if (err)
60262306a36Sopenharmony_ci			return ERR_PTR(err);
60362306a36Sopenharmony_ci		if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
60462306a36Sopenharmony_ci			copy_map_value_locked(&smap->map, old_sdata->data,
60562306a36Sopenharmony_ci					      value, false);
60662306a36Sopenharmony_ci			return old_sdata;
60762306a36Sopenharmony_ci		}
60862306a36Sopenharmony_ci	}
60962306a36Sopenharmony_ci
61062306a36Sopenharmony_ci	/* A lookup has just been done before and concluded a new selem is
61162306a36Sopenharmony_ci	 * needed. The chance of an unnecessary alloc is unlikely.
61262306a36Sopenharmony_ci	 */
61362306a36Sopenharmony_ci	alloc_selem = selem = bpf_selem_alloc(smap, owner, value, true, gfp_flags);
61462306a36Sopenharmony_ci	if (!alloc_selem)
61562306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
61662306a36Sopenharmony_ci
61762306a36Sopenharmony_ci	raw_spin_lock_irqsave(&local_storage->lock, flags);
61862306a36Sopenharmony_ci
61962306a36Sopenharmony_ci	/* Recheck local_storage->list under local_storage->lock */
62062306a36Sopenharmony_ci	if (unlikely(hlist_empty(&local_storage->list))) {
62162306a36Sopenharmony_ci		/* A parallel del is happening and local_storage is going
62262306a36Sopenharmony_ci		 * away.  It has just been checked before, so very
62362306a36Sopenharmony_ci		 * unlikely.  Return instead of retry to keep things
62462306a36Sopenharmony_ci		 * simple.
62562306a36Sopenharmony_ci		 */
62662306a36Sopenharmony_ci		err = -EAGAIN;
62762306a36Sopenharmony_ci		goto unlock;
62862306a36Sopenharmony_ci	}
62962306a36Sopenharmony_ci
63062306a36Sopenharmony_ci	old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
63162306a36Sopenharmony_ci	err = check_flags(old_sdata, map_flags);
63262306a36Sopenharmony_ci	if (err)
63362306a36Sopenharmony_ci		goto unlock;
63462306a36Sopenharmony_ci
63562306a36Sopenharmony_ci	if (old_sdata && (map_flags & BPF_F_LOCK)) {
63662306a36Sopenharmony_ci		copy_map_value_locked(&smap->map, old_sdata->data, value,
63762306a36Sopenharmony_ci				      false);
63862306a36Sopenharmony_ci		selem = SELEM(old_sdata);
63962306a36Sopenharmony_ci		goto unlock;
64062306a36Sopenharmony_ci	}
64162306a36Sopenharmony_ci
64262306a36Sopenharmony_ci	alloc_selem = NULL;
64362306a36Sopenharmony_ci	/* First, link the new selem to the map */
64462306a36Sopenharmony_ci	bpf_selem_link_map(smap, selem);
64562306a36Sopenharmony_ci
64662306a36Sopenharmony_ci	/* Second, link (and publish) the new selem to local_storage */
64762306a36Sopenharmony_ci	bpf_selem_link_storage_nolock(local_storage, selem);
64862306a36Sopenharmony_ci
64962306a36Sopenharmony_ci	/* Third, remove old selem, SELEM(old_sdata) */
65062306a36Sopenharmony_ci	if (old_sdata) {
65162306a36Sopenharmony_ci		bpf_selem_unlink_map(SELEM(old_sdata));
65262306a36Sopenharmony_ci		bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
65362306a36Sopenharmony_ci						true, false);
65462306a36Sopenharmony_ci	}
65562306a36Sopenharmony_ci
65662306a36Sopenharmony_ciunlock:
65762306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
65862306a36Sopenharmony_ci	if (alloc_selem) {
65962306a36Sopenharmony_ci		mem_uncharge(smap, owner, smap->elem_size);
66062306a36Sopenharmony_ci		bpf_selem_free(alloc_selem, smap, true);
66162306a36Sopenharmony_ci	}
66262306a36Sopenharmony_ci	return err ? ERR_PTR(err) : SDATA(selem);
66362306a36Sopenharmony_ci}
66462306a36Sopenharmony_ci
66562306a36Sopenharmony_cistatic u16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
66662306a36Sopenharmony_ci{
66762306a36Sopenharmony_ci	u64 min_usage = U64_MAX;
66862306a36Sopenharmony_ci	u16 i, res = 0;
66962306a36Sopenharmony_ci
67062306a36Sopenharmony_ci	spin_lock(&cache->idx_lock);
67162306a36Sopenharmony_ci
67262306a36Sopenharmony_ci	for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
67362306a36Sopenharmony_ci		if (cache->idx_usage_counts[i] < min_usage) {
67462306a36Sopenharmony_ci			min_usage = cache->idx_usage_counts[i];
67562306a36Sopenharmony_ci			res = i;
67662306a36Sopenharmony_ci
67762306a36Sopenharmony_ci			/* Found a free cache_idx */
67862306a36Sopenharmony_ci			if (!min_usage)
67962306a36Sopenharmony_ci				break;
68062306a36Sopenharmony_ci		}
68162306a36Sopenharmony_ci	}
68262306a36Sopenharmony_ci	cache->idx_usage_counts[res]++;
68362306a36Sopenharmony_ci
68462306a36Sopenharmony_ci	spin_unlock(&cache->idx_lock);
68562306a36Sopenharmony_ci
68662306a36Sopenharmony_ci	return res;
68762306a36Sopenharmony_ci}
68862306a36Sopenharmony_ci
68962306a36Sopenharmony_cistatic void bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
69062306a36Sopenharmony_ci					     u16 idx)
69162306a36Sopenharmony_ci{
69262306a36Sopenharmony_ci	spin_lock(&cache->idx_lock);
69362306a36Sopenharmony_ci	cache->idx_usage_counts[idx]--;
69462306a36Sopenharmony_ci	spin_unlock(&cache->idx_lock);
69562306a36Sopenharmony_ci}
69662306a36Sopenharmony_ci
69762306a36Sopenharmony_ciint bpf_local_storage_map_alloc_check(union bpf_attr *attr)
69862306a36Sopenharmony_ci{
69962306a36Sopenharmony_ci	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
70062306a36Sopenharmony_ci	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
70162306a36Sopenharmony_ci	    attr->max_entries ||
70262306a36Sopenharmony_ci	    attr->key_size != sizeof(int) || !attr->value_size ||
70362306a36Sopenharmony_ci	    /* Enforce BTF for userspace sk dumping */
70462306a36Sopenharmony_ci	    !attr->btf_key_type_id || !attr->btf_value_type_id)
70562306a36Sopenharmony_ci		return -EINVAL;
70662306a36Sopenharmony_ci
70762306a36Sopenharmony_ci	if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
70862306a36Sopenharmony_ci		return -E2BIG;
70962306a36Sopenharmony_ci
71062306a36Sopenharmony_ci	return 0;
71162306a36Sopenharmony_ci}
71262306a36Sopenharmony_ci
71362306a36Sopenharmony_ciint bpf_local_storage_map_check_btf(const struct bpf_map *map,
71462306a36Sopenharmony_ci				    const struct btf *btf,
71562306a36Sopenharmony_ci				    const struct btf_type *key_type,
71662306a36Sopenharmony_ci				    const struct btf_type *value_type)
71762306a36Sopenharmony_ci{
71862306a36Sopenharmony_ci	u32 int_data;
71962306a36Sopenharmony_ci
72062306a36Sopenharmony_ci	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
72162306a36Sopenharmony_ci		return -EINVAL;
72262306a36Sopenharmony_ci
72362306a36Sopenharmony_ci	int_data = *(u32 *)(key_type + 1);
72462306a36Sopenharmony_ci	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
72562306a36Sopenharmony_ci		return -EINVAL;
72662306a36Sopenharmony_ci
72762306a36Sopenharmony_ci	return 0;
72862306a36Sopenharmony_ci}
72962306a36Sopenharmony_ci
73062306a36Sopenharmony_civoid bpf_local_storage_destroy(struct bpf_local_storage *local_storage)
73162306a36Sopenharmony_ci{
73262306a36Sopenharmony_ci	struct bpf_local_storage_map *storage_smap;
73362306a36Sopenharmony_ci	struct bpf_local_storage_elem *selem;
73462306a36Sopenharmony_ci	bool bpf_ma, free_storage = false;
73562306a36Sopenharmony_ci	struct hlist_node *n;
73662306a36Sopenharmony_ci	unsigned long flags;
73762306a36Sopenharmony_ci
73862306a36Sopenharmony_ci	storage_smap = rcu_dereference_check(local_storage->smap, bpf_rcu_lock_held());
73962306a36Sopenharmony_ci	bpf_ma = check_storage_bpf_ma(local_storage, storage_smap, NULL);
74062306a36Sopenharmony_ci
74162306a36Sopenharmony_ci	/* Neither the bpf_prog nor the bpf_map's syscall
74262306a36Sopenharmony_ci	 * could be modifying the local_storage->list now.
74362306a36Sopenharmony_ci	 * Thus, no elem can be added to or deleted from the
74462306a36Sopenharmony_ci	 * local_storage->list by the bpf_prog or by the bpf_map's syscall.
74562306a36Sopenharmony_ci	 *
74662306a36Sopenharmony_ci	 * It is racing with bpf_local_storage_map_free() alone
74762306a36Sopenharmony_ci	 * when unlinking elem from the local_storage->list and
74862306a36Sopenharmony_ci	 * the map's bucket->list.
74962306a36Sopenharmony_ci	 */
75062306a36Sopenharmony_ci	raw_spin_lock_irqsave(&local_storage->lock, flags);
75162306a36Sopenharmony_ci	hlist_for_each_entry_safe(selem, n, &local_storage->list, snode) {
75262306a36Sopenharmony_ci		/* Always unlink from map before unlinking from
75362306a36Sopenharmony_ci		 * local_storage.
75462306a36Sopenharmony_ci		 */
75562306a36Sopenharmony_ci		bpf_selem_unlink_map(selem);
75662306a36Sopenharmony_ci		/* If local_storage list has only one element, the
75762306a36Sopenharmony_ci		 * bpf_selem_unlink_storage_nolock() will return true.
75862306a36Sopenharmony_ci		 * Otherwise, it will return false. The current loop iteration
75962306a36Sopenharmony_ci		 * intends to remove all local storage. So the last iteration
76062306a36Sopenharmony_ci		 * of the loop will set the free_cgroup_storage to true.
76162306a36Sopenharmony_ci		 */
76262306a36Sopenharmony_ci		free_storage = bpf_selem_unlink_storage_nolock(
76362306a36Sopenharmony_ci			local_storage, selem, true, true);
76462306a36Sopenharmony_ci	}
76562306a36Sopenharmony_ci	raw_spin_unlock_irqrestore(&local_storage->lock, flags);
76662306a36Sopenharmony_ci
76762306a36Sopenharmony_ci	if (free_storage)
76862306a36Sopenharmony_ci		bpf_local_storage_free(local_storage, storage_smap, bpf_ma, true);
76962306a36Sopenharmony_ci}
77062306a36Sopenharmony_ci
77162306a36Sopenharmony_ciu64 bpf_local_storage_map_mem_usage(const struct bpf_map *map)
77262306a36Sopenharmony_ci{
77362306a36Sopenharmony_ci	struct bpf_local_storage_map *smap = (struct bpf_local_storage_map *)map;
77462306a36Sopenharmony_ci	u64 usage = sizeof(*smap);
77562306a36Sopenharmony_ci
77662306a36Sopenharmony_ci	/* The dynamically callocated selems are not counted currently. */
77762306a36Sopenharmony_ci	usage += sizeof(*smap->buckets) * (1ULL << smap->bucket_log);
77862306a36Sopenharmony_ci	return usage;
77962306a36Sopenharmony_ci}
78062306a36Sopenharmony_ci
78162306a36Sopenharmony_ci/* When bpf_ma == true, the bpf_mem_alloc is used to allocate and free memory.
78262306a36Sopenharmony_ci * A deadlock free allocator is useful for storage that the bpf prog can easily
78362306a36Sopenharmony_ci * get a hold of the owner PTR_TO_BTF_ID in any context. eg. bpf_get_current_task_btf.
78462306a36Sopenharmony_ci * The task and cgroup storage fall into this case. The bpf_mem_alloc reuses
78562306a36Sopenharmony_ci * memory immediately. To be reuse-immediate safe, the owner destruction
78662306a36Sopenharmony_ci * code path needs to go through a rcu grace period before calling
78762306a36Sopenharmony_ci * bpf_local_storage_destroy().
78862306a36Sopenharmony_ci *
78962306a36Sopenharmony_ci * When bpf_ma == false, the kmalloc and kfree are used.
79062306a36Sopenharmony_ci */
79162306a36Sopenharmony_cistruct bpf_map *
79262306a36Sopenharmony_cibpf_local_storage_map_alloc(union bpf_attr *attr,
79362306a36Sopenharmony_ci			    struct bpf_local_storage_cache *cache,
79462306a36Sopenharmony_ci			    bool bpf_ma)
79562306a36Sopenharmony_ci{
79662306a36Sopenharmony_ci	struct bpf_local_storage_map *smap;
79762306a36Sopenharmony_ci	unsigned int i;
79862306a36Sopenharmony_ci	u32 nbuckets;
79962306a36Sopenharmony_ci	int err;
80062306a36Sopenharmony_ci
80162306a36Sopenharmony_ci	smap = bpf_map_area_alloc(sizeof(*smap), NUMA_NO_NODE);
80262306a36Sopenharmony_ci	if (!smap)
80362306a36Sopenharmony_ci		return ERR_PTR(-ENOMEM);
80462306a36Sopenharmony_ci	bpf_map_init_from_attr(&smap->map, attr);
80562306a36Sopenharmony_ci
80662306a36Sopenharmony_ci	nbuckets = roundup_pow_of_two(num_possible_cpus());
80762306a36Sopenharmony_ci	/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
80862306a36Sopenharmony_ci	nbuckets = max_t(u32, 2, nbuckets);
80962306a36Sopenharmony_ci	smap->bucket_log = ilog2(nbuckets);
81062306a36Sopenharmony_ci
81162306a36Sopenharmony_ci	smap->buckets = bpf_map_kvcalloc(&smap->map, sizeof(*smap->buckets),
81262306a36Sopenharmony_ci					 nbuckets, GFP_USER | __GFP_NOWARN);
81362306a36Sopenharmony_ci	if (!smap->buckets) {
81462306a36Sopenharmony_ci		err = -ENOMEM;
81562306a36Sopenharmony_ci		goto free_smap;
81662306a36Sopenharmony_ci	}
81762306a36Sopenharmony_ci
81862306a36Sopenharmony_ci	for (i = 0; i < nbuckets; i++) {
81962306a36Sopenharmony_ci		INIT_HLIST_HEAD(&smap->buckets[i].list);
82062306a36Sopenharmony_ci		raw_spin_lock_init(&smap->buckets[i].lock);
82162306a36Sopenharmony_ci	}
82262306a36Sopenharmony_ci
82362306a36Sopenharmony_ci	smap->elem_size = offsetof(struct bpf_local_storage_elem,
82462306a36Sopenharmony_ci				   sdata.data[attr->value_size]);
82562306a36Sopenharmony_ci
82662306a36Sopenharmony_ci	smap->bpf_ma = bpf_ma;
82762306a36Sopenharmony_ci	if (bpf_ma) {
82862306a36Sopenharmony_ci		err = bpf_mem_alloc_init(&smap->selem_ma, smap->elem_size, false);
82962306a36Sopenharmony_ci		if (err)
83062306a36Sopenharmony_ci			goto free_smap;
83162306a36Sopenharmony_ci
83262306a36Sopenharmony_ci		err = bpf_mem_alloc_init(&smap->storage_ma, sizeof(struct bpf_local_storage), false);
83362306a36Sopenharmony_ci		if (err) {
83462306a36Sopenharmony_ci			bpf_mem_alloc_destroy(&smap->selem_ma);
83562306a36Sopenharmony_ci			goto free_smap;
83662306a36Sopenharmony_ci		}
83762306a36Sopenharmony_ci	}
83862306a36Sopenharmony_ci
83962306a36Sopenharmony_ci	smap->cache_idx = bpf_local_storage_cache_idx_get(cache);
84062306a36Sopenharmony_ci	return &smap->map;
84162306a36Sopenharmony_ci
84262306a36Sopenharmony_cifree_smap:
84362306a36Sopenharmony_ci	kvfree(smap->buckets);
84462306a36Sopenharmony_ci	bpf_map_area_free(smap);
84562306a36Sopenharmony_ci	return ERR_PTR(err);
84662306a36Sopenharmony_ci}
84762306a36Sopenharmony_ci
84862306a36Sopenharmony_civoid bpf_local_storage_map_free(struct bpf_map *map,
84962306a36Sopenharmony_ci				struct bpf_local_storage_cache *cache,
85062306a36Sopenharmony_ci				int __percpu *busy_counter)
85162306a36Sopenharmony_ci{
85262306a36Sopenharmony_ci	struct bpf_local_storage_map_bucket *b;
85362306a36Sopenharmony_ci	struct bpf_local_storage_elem *selem;
85462306a36Sopenharmony_ci	struct bpf_local_storage_map *smap;
85562306a36Sopenharmony_ci	unsigned int i;
85662306a36Sopenharmony_ci
85762306a36Sopenharmony_ci	smap = (struct bpf_local_storage_map *)map;
85862306a36Sopenharmony_ci	bpf_local_storage_cache_idx_free(cache, smap->cache_idx);
85962306a36Sopenharmony_ci
86062306a36Sopenharmony_ci	/* Note that this map might be concurrently cloned from
86162306a36Sopenharmony_ci	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
86262306a36Sopenharmony_ci	 * RCU read section to finish before proceeding. New RCU
86362306a36Sopenharmony_ci	 * read sections should be prevented via bpf_map_inc_not_zero.
86462306a36Sopenharmony_ci	 */
86562306a36Sopenharmony_ci	synchronize_rcu();
86662306a36Sopenharmony_ci
86762306a36Sopenharmony_ci	/* bpf prog and the userspace can no longer access this map
86862306a36Sopenharmony_ci	 * now.  No new selem (of this map) can be added
86962306a36Sopenharmony_ci	 * to the owner->storage or to the map bucket's list.
87062306a36Sopenharmony_ci	 *
87162306a36Sopenharmony_ci	 * The elem of this map can be cleaned up here
87262306a36Sopenharmony_ci	 * or when the storage is freed e.g.
87362306a36Sopenharmony_ci	 * by bpf_sk_storage_free() during __sk_destruct().
87462306a36Sopenharmony_ci	 */
87562306a36Sopenharmony_ci	for (i = 0; i < (1U << smap->bucket_log); i++) {
87662306a36Sopenharmony_ci		b = &smap->buckets[i];
87762306a36Sopenharmony_ci
87862306a36Sopenharmony_ci		rcu_read_lock();
87962306a36Sopenharmony_ci		/* No one is adding to b->list now */
88062306a36Sopenharmony_ci		while ((selem = hlist_entry_safe(
88162306a36Sopenharmony_ci				rcu_dereference_raw(hlist_first_rcu(&b->list)),
88262306a36Sopenharmony_ci				struct bpf_local_storage_elem, map_node))) {
88362306a36Sopenharmony_ci			if (busy_counter) {
88462306a36Sopenharmony_ci				migrate_disable();
88562306a36Sopenharmony_ci				this_cpu_inc(*busy_counter);
88662306a36Sopenharmony_ci			}
88762306a36Sopenharmony_ci			bpf_selem_unlink(selem, true);
88862306a36Sopenharmony_ci			if (busy_counter) {
88962306a36Sopenharmony_ci				this_cpu_dec(*busy_counter);
89062306a36Sopenharmony_ci				migrate_enable();
89162306a36Sopenharmony_ci			}
89262306a36Sopenharmony_ci			cond_resched_rcu();
89362306a36Sopenharmony_ci		}
89462306a36Sopenharmony_ci		rcu_read_unlock();
89562306a36Sopenharmony_ci	}
89662306a36Sopenharmony_ci
89762306a36Sopenharmony_ci	/* While freeing the storage we may still need to access the map.
89862306a36Sopenharmony_ci	 *
89962306a36Sopenharmony_ci	 * e.g. when bpf_sk_storage_free() has unlinked selem from the map
90062306a36Sopenharmony_ci	 * which then made the above while((selem = ...)) loop
90162306a36Sopenharmony_ci	 * exit immediately.
90262306a36Sopenharmony_ci	 *
90362306a36Sopenharmony_ci	 * However, while freeing the storage one still needs to access the
90462306a36Sopenharmony_ci	 * smap->elem_size to do the uncharging in
90562306a36Sopenharmony_ci	 * bpf_selem_unlink_storage_nolock().
90662306a36Sopenharmony_ci	 *
90762306a36Sopenharmony_ci	 * Hence, wait another rcu grace period for the storage to be freed.
90862306a36Sopenharmony_ci	 */
90962306a36Sopenharmony_ci	synchronize_rcu();
91062306a36Sopenharmony_ci
91162306a36Sopenharmony_ci	if (smap->bpf_ma) {
91262306a36Sopenharmony_ci		bpf_mem_alloc_destroy(&smap->selem_ma);
91362306a36Sopenharmony_ci		bpf_mem_alloc_destroy(&smap->storage_ma);
91462306a36Sopenharmony_ci	}
91562306a36Sopenharmony_ci	kvfree(smap->buckets);
91662306a36Sopenharmony_ci	bpf_map_area_free(smap);
91762306a36Sopenharmony_ci}
918