18c2ecf20Sopenharmony_ci// SPDX-License-Identifier: GPL-2.0
28c2ecf20Sopenharmony_ci/* Copyright (c) 2019 Facebook  */
38c2ecf20Sopenharmony_ci#include <linux/rculist.h>
48c2ecf20Sopenharmony_ci#include <linux/list.h>
58c2ecf20Sopenharmony_ci#include <linux/hash.h>
68c2ecf20Sopenharmony_ci#include <linux/types.h>
78c2ecf20Sopenharmony_ci#include <linux/spinlock.h>
88c2ecf20Sopenharmony_ci#include <linux/bpf.h>
98c2ecf20Sopenharmony_ci#include <linux/btf_ids.h>
108c2ecf20Sopenharmony_ci#include <linux/bpf_local_storage.h>
118c2ecf20Sopenharmony_ci#include <net/sock.h>
128c2ecf20Sopenharmony_ci#include <uapi/linux/sock_diag.h>
138c2ecf20Sopenharmony_ci#include <uapi/linux/btf.h>
148c2ecf20Sopenharmony_ci
158c2ecf20Sopenharmony_ci#define BPF_LOCAL_STORAGE_CREATE_FLAG_MASK (BPF_F_NO_PREALLOC | BPF_F_CLONE)
168c2ecf20Sopenharmony_ci
178c2ecf20Sopenharmony_cistatic struct bpf_local_storage_map_bucket *
188c2ecf20Sopenharmony_ciselect_bucket(struct bpf_local_storage_map *smap,
198c2ecf20Sopenharmony_ci	      struct bpf_local_storage_elem *selem)
208c2ecf20Sopenharmony_ci{
218c2ecf20Sopenharmony_ci	return &smap->buckets[hash_ptr(selem, smap->bucket_log)];
228c2ecf20Sopenharmony_ci}
238c2ecf20Sopenharmony_ci
248c2ecf20Sopenharmony_cistatic int mem_charge(struct bpf_local_storage_map *smap, void *owner, u32 size)
258c2ecf20Sopenharmony_ci{
268c2ecf20Sopenharmony_ci	struct bpf_map *map = &smap->map;
278c2ecf20Sopenharmony_ci
288c2ecf20Sopenharmony_ci	if (!map->ops->map_local_storage_charge)
298c2ecf20Sopenharmony_ci		return 0;
308c2ecf20Sopenharmony_ci
318c2ecf20Sopenharmony_ci	return map->ops->map_local_storage_charge(smap, owner, size);
328c2ecf20Sopenharmony_ci}
338c2ecf20Sopenharmony_ci
348c2ecf20Sopenharmony_cistatic void mem_uncharge(struct bpf_local_storage_map *smap, void *owner,
358c2ecf20Sopenharmony_ci			 u32 size)
368c2ecf20Sopenharmony_ci{
378c2ecf20Sopenharmony_ci	struct bpf_map *map = &smap->map;
388c2ecf20Sopenharmony_ci
398c2ecf20Sopenharmony_ci	if (map->ops->map_local_storage_uncharge)
408c2ecf20Sopenharmony_ci		map->ops->map_local_storage_uncharge(smap, owner, size);
418c2ecf20Sopenharmony_ci}
428c2ecf20Sopenharmony_ci
438c2ecf20Sopenharmony_cistatic struct bpf_local_storage __rcu **
448c2ecf20Sopenharmony_ciowner_storage(struct bpf_local_storage_map *smap, void *owner)
458c2ecf20Sopenharmony_ci{
468c2ecf20Sopenharmony_ci	struct bpf_map *map = &smap->map;
478c2ecf20Sopenharmony_ci
488c2ecf20Sopenharmony_ci	return map->ops->map_owner_storage_ptr(owner);
498c2ecf20Sopenharmony_ci}
508c2ecf20Sopenharmony_ci
518c2ecf20Sopenharmony_cistatic bool selem_linked_to_storage_lockless(const struct bpf_local_storage_elem *selem)
528c2ecf20Sopenharmony_ci{
538c2ecf20Sopenharmony_ci	return !hlist_unhashed_lockless(&selem->snode);
548c2ecf20Sopenharmony_ci}
558c2ecf20Sopenharmony_ci
568c2ecf20Sopenharmony_cistatic bool selem_linked_to_storage(const struct bpf_local_storage_elem *selem)
578c2ecf20Sopenharmony_ci{
588c2ecf20Sopenharmony_ci	return !hlist_unhashed(&selem->snode);
598c2ecf20Sopenharmony_ci}
608c2ecf20Sopenharmony_ci
618c2ecf20Sopenharmony_cistatic bool selem_linked_to_map_lockless(const struct bpf_local_storage_elem *selem)
628c2ecf20Sopenharmony_ci{
638c2ecf20Sopenharmony_ci	return !hlist_unhashed_lockless(&selem->map_node);
648c2ecf20Sopenharmony_ci}
658c2ecf20Sopenharmony_ci
668c2ecf20Sopenharmony_cistatic bool selem_linked_to_map(const struct bpf_local_storage_elem *selem)
678c2ecf20Sopenharmony_ci{
688c2ecf20Sopenharmony_ci	return !hlist_unhashed(&selem->map_node);
698c2ecf20Sopenharmony_ci}
708c2ecf20Sopenharmony_ci
718c2ecf20Sopenharmony_cistruct bpf_local_storage_elem *
728c2ecf20Sopenharmony_cibpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner,
738c2ecf20Sopenharmony_ci		void *value, bool charge_mem)
748c2ecf20Sopenharmony_ci{
758c2ecf20Sopenharmony_ci	struct bpf_local_storage_elem *selem;
768c2ecf20Sopenharmony_ci
778c2ecf20Sopenharmony_ci	if (charge_mem && mem_charge(smap, owner, smap->elem_size))
788c2ecf20Sopenharmony_ci		return NULL;
798c2ecf20Sopenharmony_ci
808c2ecf20Sopenharmony_ci	selem = kzalloc(smap->elem_size, GFP_ATOMIC | __GFP_NOWARN);
818c2ecf20Sopenharmony_ci	if (selem) {
828c2ecf20Sopenharmony_ci		if (value)
838c2ecf20Sopenharmony_ci			copy_map_value(&smap->map, SDATA(selem)->data, value);
848c2ecf20Sopenharmony_ci		return selem;
858c2ecf20Sopenharmony_ci	}
868c2ecf20Sopenharmony_ci
878c2ecf20Sopenharmony_ci	if (charge_mem)
888c2ecf20Sopenharmony_ci		mem_uncharge(smap, owner, smap->elem_size);
898c2ecf20Sopenharmony_ci
908c2ecf20Sopenharmony_ci	return NULL;
918c2ecf20Sopenharmony_ci}
928c2ecf20Sopenharmony_ci
938c2ecf20Sopenharmony_ci/* local_storage->lock must be held and selem->local_storage == local_storage.
948c2ecf20Sopenharmony_ci * The caller must ensure selem->smap is still valid to be
958c2ecf20Sopenharmony_ci * dereferenced for its smap->elem_size and smap->cache_idx.
968c2ecf20Sopenharmony_ci */
978c2ecf20Sopenharmony_cibool bpf_selem_unlink_storage_nolock(struct bpf_local_storage *local_storage,
988c2ecf20Sopenharmony_ci				     struct bpf_local_storage_elem *selem,
998c2ecf20Sopenharmony_ci				     bool uncharge_mem)
1008c2ecf20Sopenharmony_ci{
1018c2ecf20Sopenharmony_ci	struct bpf_local_storage_map *smap;
1028c2ecf20Sopenharmony_ci	bool free_local_storage;
1038c2ecf20Sopenharmony_ci	void *owner;
1048c2ecf20Sopenharmony_ci
1058c2ecf20Sopenharmony_ci	smap = rcu_dereference(SDATA(selem)->smap);
1068c2ecf20Sopenharmony_ci	owner = local_storage->owner;
1078c2ecf20Sopenharmony_ci
1088c2ecf20Sopenharmony_ci	/* All uncharging on the owner must be done first.
1098c2ecf20Sopenharmony_ci	 * The owner may be freed once the last selem is unlinked
1108c2ecf20Sopenharmony_ci	 * from local_storage.
1118c2ecf20Sopenharmony_ci	 */
1128c2ecf20Sopenharmony_ci	if (uncharge_mem)
1138c2ecf20Sopenharmony_ci		mem_uncharge(smap, owner, smap->elem_size);
1148c2ecf20Sopenharmony_ci
1158c2ecf20Sopenharmony_ci	free_local_storage = hlist_is_singular_node(&selem->snode,
1168c2ecf20Sopenharmony_ci						    &local_storage->list);
1178c2ecf20Sopenharmony_ci	if (free_local_storage) {
1188c2ecf20Sopenharmony_ci		mem_uncharge(smap, owner, sizeof(struct bpf_local_storage));
1198c2ecf20Sopenharmony_ci		local_storage->owner = NULL;
1208c2ecf20Sopenharmony_ci
1218c2ecf20Sopenharmony_ci		/* After this RCU_INIT, owner may be freed and cannot be used */
1228c2ecf20Sopenharmony_ci		RCU_INIT_POINTER(*owner_storage(smap, owner), NULL);
1238c2ecf20Sopenharmony_ci
1248c2ecf20Sopenharmony_ci		/* local_storage is not freed now.  local_storage->lock is
1258c2ecf20Sopenharmony_ci		 * still held and raw_spin_unlock_bh(&local_storage->lock)
1268c2ecf20Sopenharmony_ci		 * will be done by the caller.
1278c2ecf20Sopenharmony_ci		 *
1288c2ecf20Sopenharmony_ci		 * Although the unlock will be done under
1298c2ecf20Sopenharmony_ci		 * rcu_read_lock(),  it is more intutivie to
1308c2ecf20Sopenharmony_ci		 * read if kfree_rcu(local_storage, rcu) is done
1318c2ecf20Sopenharmony_ci		 * after the raw_spin_unlock_bh(&local_storage->lock).
1328c2ecf20Sopenharmony_ci		 *
1338c2ecf20Sopenharmony_ci		 * Hence, a "bool free_local_storage" is returned
1348c2ecf20Sopenharmony_ci		 * to the caller which then calls the kfree_rcu()
1358c2ecf20Sopenharmony_ci		 * after unlock.
1368c2ecf20Sopenharmony_ci		 */
1378c2ecf20Sopenharmony_ci	}
1388c2ecf20Sopenharmony_ci	hlist_del_init_rcu(&selem->snode);
1398c2ecf20Sopenharmony_ci	if (rcu_access_pointer(local_storage->cache[smap->cache_idx]) ==
1408c2ecf20Sopenharmony_ci	    SDATA(selem))
1418c2ecf20Sopenharmony_ci		RCU_INIT_POINTER(local_storage->cache[smap->cache_idx], NULL);
1428c2ecf20Sopenharmony_ci
1438c2ecf20Sopenharmony_ci	kfree_rcu(selem, rcu);
1448c2ecf20Sopenharmony_ci
1458c2ecf20Sopenharmony_ci	return free_local_storage;
1468c2ecf20Sopenharmony_ci}
1478c2ecf20Sopenharmony_ci
1488c2ecf20Sopenharmony_cistatic void __bpf_selem_unlink_storage(struct bpf_local_storage_elem *selem)
1498c2ecf20Sopenharmony_ci{
1508c2ecf20Sopenharmony_ci	struct bpf_local_storage *local_storage;
1518c2ecf20Sopenharmony_ci	bool free_local_storage = false;
1528c2ecf20Sopenharmony_ci
1538c2ecf20Sopenharmony_ci	if (unlikely(!selem_linked_to_storage_lockless(selem)))
1548c2ecf20Sopenharmony_ci		/* selem has already been unlinked from sk */
1558c2ecf20Sopenharmony_ci		return;
1568c2ecf20Sopenharmony_ci
1578c2ecf20Sopenharmony_ci	local_storage = rcu_dereference(selem->local_storage);
1588c2ecf20Sopenharmony_ci	raw_spin_lock_bh(&local_storage->lock);
1598c2ecf20Sopenharmony_ci	if (likely(selem_linked_to_storage(selem)))
1608c2ecf20Sopenharmony_ci		free_local_storage = bpf_selem_unlink_storage_nolock(
1618c2ecf20Sopenharmony_ci			local_storage, selem, true);
1628c2ecf20Sopenharmony_ci	raw_spin_unlock_bh(&local_storage->lock);
1638c2ecf20Sopenharmony_ci
1648c2ecf20Sopenharmony_ci	if (free_local_storage)
1658c2ecf20Sopenharmony_ci		kfree_rcu(local_storage, rcu);
1668c2ecf20Sopenharmony_ci}
1678c2ecf20Sopenharmony_ci
1688c2ecf20Sopenharmony_civoid bpf_selem_link_storage_nolock(struct bpf_local_storage *local_storage,
1698c2ecf20Sopenharmony_ci				   struct bpf_local_storage_elem *selem)
1708c2ecf20Sopenharmony_ci{
1718c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(selem->local_storage, local_storage);
1728c2ecf20Sopenharmony_ci	hlist_add_head_rcu(&selem->snode, &local_storage->list);
1738c2ecf20Sopenharmony_ci}
1748c2ecf20Sopenharmony_ci
1758c2ecf20Sopenharmony_civoid bpf_selem_unlink_map(struct bpf_local_storage_elem *selem)
1768c2ecf20Sopenharmony_ci{
1778c2ecf20Sopenharmony_ci	struct bpf_local_storage_map *smap;
1788c2ecf20Sopenharmony_ci	struct bpf_local_storage_map_bucket *b;
1798c2ecf20Sopenharmony_ci
1808c2ecf20Sopenharmony_ci	if (unlikely(!selem_linked_to_map_lockless(selem)))
1818c2ecf20Sopenharmony_ci		/* selem has already be unlinked from smap */
1828c2ecf20Sopenharmony_ci		return;
1838c2ecf20Sopenharmony_ci
1848c2ecf20Sopenharmony_ci	smap = rcu_dereference(SDATA(selem)->smap);
1858c2ecf20Sopenharmony_ci	b = select_bucket(smap, selem);
1868c2ecf20Sopenharmony_ci	raw_spin_lock_bh(&b->lock);
1878c2ecf20Sopenharmony_ci	if (likely(selem_linked_to_map(selem)))
1888c2ecf20Sopenharmony_ci		hlist_del_init_rcu(&selem->map_node);
1898c2ecf20Sopenharmony_ci	raw_spin_unlock_bh(&b->lock);
1908c2ecf20Sopenharmony_ci}
1918c2ecf20Sopenharmony_ci
1928c2ecf20Sopenharmony_civoid bpf_selem_link_map(struct bpf_local_storage_map *smap,
1938c2ecf20Sopenharmony_ci			struct bpf_local_storage_elem *selem)
1948c2ecf20Sopenharmony_ci{
1958c2ecf20Sopenharmony_ci	struct bpf_local_storage_map_bucket *b = select_bucket(smap, selem);
1968c2ecf20Sopenharmony_ci
1978c2ecf20Sopenharmony_ci	raw_spin_lock_bh(&b->lock);
1988c2ecf20Sopenharmony_ci	RCU_INIT_POINTER(SDATA(selem)->smap, smap);
1998c2ecf20Sopenharmony_ci	hlist_add_head_rcu(&selem->map_node, &b->list);
2008c2ecf20Sopenharmony_ci	raw_spin_unlock_bh(&b->lock);
2018c2ecf20Sopenharmony_ci}
2028c2ecf20Sopenharmony_ci
2038c2ecf20Sopenharmony_civoid bpf_selem_unlink(struct bpf_local_storage_elem *selem)
2048c2ecf20Sopenharmony_ci{
2058c2ecf20Sopenharmony_ci	/* Always unlink from map before unlinking from local_storage
2068c2ecf20Sopenharmony_ci	 * because selem will be freed after successfully unlinked from
2078c2ecf20Sopenharmony_ci	 * the local_storage.
2088c2ecf20Sopenharmony_ci	 */
2098c2ecf20Sopenharmony_ci	bpf_selem_unlink_map(selem);
2108c2ecf20Sopenharmony_ci	__bpf_selem_unlink_storage(selem);
2118c2ecf20Sopenharmony_ci}
2128c2ecf20Sopenharmony_ci
2138c2ecf20Sopenharmony_cistruct bpf_local_storage_data *
2148c2ecf20Sopenharmony_cibpf_local_storage_lookup(struct bpf_local_storage *local_storage,
2158c2ecf20Sopenharmony_ci			 struct bpf_local_storage_map *smap,
2168c2ecf20Sopenharmony_ci			 bool cacheit_lockit)
2178c2ecf20Sopenharmony_ci{
2188c2ecf20Sopenharmony_ci	struct bpf_local_storage_data *sdata;
2198c2ecf20Sopenharmony_ci	struct bpf_local_storage_elem *selem;
2208c2ecf20Sopenharmony_ci
2218c2ecf20Sopenharmony_ci	/* Fast path (cache hit) */
2228c2ecf20Sopenharmony_ci	sdata = rcu_dereference(local_storage->cache[smap->cache_idx]);
2238c2ecf20Sopenharmony_ci	if (sdata && rcu_access_pointer(sdata->smap) == smap)
2248c2ecf20Sopenharmony_ci		return sdata;
2258c2ecf20Sopenharmony_ci
2268c2ecf20Sopenharmony_ci	/* Slow path (cache miss) */
2278c2ecf20Sopenharmony_ci	hlist_for_each_entry_rcu(selem, &local_storage->list, snode)
2288c2ecf20Sopenharmony_ci		if (rcu_access_pointer(SDATA(selem)->smap) == smap)
2298c2ecf20Sopenharmony_ci			break;
2308c2ecf20Sopenharmony_ci
2318c2ecf20Sopenharmony_ci	if (!selem)
2328c2ecf20Sopenharmony_ci		return NULL;
2338c2ecf20Sopenharmony_ci
2348c2ecf20Sopenharmony_ci	sdata = SDATA(selem);
2358c2ecf20Sopenharmony_ci	if (cacheit_lockit) {
2368c2ecf20Sopenharmony_ci		/* spinlock is needed to avoid racing with the
2378c2ecf20Sopenharmony_ci		 * parallel delete.  Otherwise, publishing an already
2388c2ecf20Sopenharmony_ci		 * deleted sdata to the cache will become a use-after-free
2398c2ecf20Sopenharmony_ci		 * problem in the next bpf_local_storage_lookup().
2408c2ecf20Sopenharmony_ci		 */
2418c2ecf20Sopenharmony_ci		raw_spin_lock_bh(&local_storage->lock);
2428c2ecf20Sopenharmony_ci		if (selem_linked_to_storage(selem))
2438c2ecf20Sopenharmony_ci			rcu_assign_pointer(local_storage->cache[smap->cache_idx],
2448c2ecf20Sopenharmony_ci					   sdata);
2458c2ecf20Sopenharmony_ci		raw_spin_unlock_bh(&local_storage->lock);
2468c2ecf20Sopenharmony_ci	}
2478c2ecf20Sopenharmony_ci
2488c2ecf20Sopenharmony_ci	return sdata;
2498c2ecf20Sopenharmony_ci}
2508c2ecf20Sopenharmony_ci
2518c2ecf20Sopenharmony_cistatic int check_flags(const struct bpf_local_storage_data *old_sdata,
2528c2ecf20Sopenharmony_ci		       u64 map_flags)
2538c2ecf20Sopenharmony_ci{
2548c2ecf20Sopenharmony_ci	if (old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_NOEXIST)
2558c2ecf20Sopenharmony_ci		/* elem already exists */
2568c2ecf20Sopenharmony_ci		return -EEXIST;
2578c2ecf20Sopenharmony_ci
2588c2ecf20Sopenharmony_ci	if (!old_sdata && (map_flags & ~BPF_F_LOCK) == BPF_EXIST)
2598c2ecf20Sopenharmony_ci		/* elem doesn't exist, cannot update it */
2608c2ecf20Sopenharmony_ci		return -ENOENT;
2618c2ecf20Sopenharmony_ci
2628c2ecf20Sopenharmony_ci	return 0;
2638c2ecf20Sopenharmony_ci}
2648c2ecf20Sopenharmony_ci
2658c2ecf20Sopenharmony_ciint bpf_local_storage_alloc(void *owner,
2668c2ecf20Sopenharmony_ci			    struct bpf_local_storage_map *smap,
2678c2ecf20Sopenharmony_ci			    struct bpf_local_storage_elem *first_selem)
2688c2ecf20Sopenharmony_ci{
2698c2ecf20Sopenharmony_ci	struct bpf_local_storage *prev_storage, *storage;
2708c2ecf20Sopenharmony_ci	struct bpf_local_storage **owner_storage_ptr;
2718c2ecf20Sopenharmony_ci	int err;
2728c2ecf20Sopenharmony_ci
2738c2ecf20Sopenharmony_ci	err = mem_charge(smap, owner, sizeof(*storage));
2748c2ecf20Sopenharmony_ci	if (err)
2758c2ecf20Sopenharmony_ci		return err;
2768c2ecf20Sopenharmony_ci
2778c2ecf20Sopenharmony_ci	storage = kzalloc(sizeof(*storage), GFP_ATOMIC | __GFP_NOWARN);
2788c2ecf20Sopenharmony_ci	if (!storage) {
2798c2ecf20Sopenharmony_ci		err = -ENOMEM;
2808c2ecf20Sopenharmony_ci		goto uncharge;
2818c2ecf20Sopenharmony_ci	}
2828c2ecf20Sopenharmony_ci
2838c2ecf20Sopenharmony_ci	INIT_HLIST_HEAD(&storage->list);
2848c2ecf20Sopenharmony_ci	raw_spin_lock_init(&storage->lock);
2858c2ecf20Sopenharmony_ci	storage->owner = owner;
2868c2ecf20Sopenharmony_ci
2878c2ecf20Sopenharmony_ci	bpf_selem_link_storage_nolock(storage, first_selem);
2888c2ecf20Sopenharmony_ci	bpf_selem_link_map(smap, first_selem);
2898c2ecf20Sopenharmony_ci
2908c2ecf20Sopenharmony_ci	owner_storage_ptr =
2918c2ecf20Sopenharmony_ci		(struct bpf_local_storage **)owner_storage(smap, owner);
2928c2ecf20Sopenharmony_ci	/* Publish storage to the owner.
2938c2ecf20Sopenharmony_ci	 * Instead of using any lock of the kernel object (i.e. owner),
2948c2ecf20Sopenharmony_ci	 * cmpxchg will work with any kernel object regardless what
2958c2ecf20Sopenharmony_ci	 * the running context is, bh, irq...etc.
2968c2ecf20Sopenharmony_ci	 *
2978c2ecf20Sopenharmony_ci	 * From now on, the owner->storage pointer (e.g. sk->sk_bpf_storage)
2988c2ecf20Sopenharmony_ci	 * is protected by the storage->lock.  Hence, when freeing
2998c2ecf20Sopenharmony_ci	 * the owner->storage, the storage->lock must be held before
3008c2ecf20Sopenharmony_ci	 * setting owner->storage ptr to NULL.
3018c2ecf20Sopenharmony_ci	 */
3028c2ecf20Sopenharmony_ci	prev_storage = cmpxchg(owner_storage_ptr, NULL, storage);
3038c2ecf20Sopenharmony_ci	if (unlikely(prev_storage)) {
3048c2ecf20Sopenharmony_ci		bpf_selem_unlink_map(first_selem);
3058c2ecf20Sopenharmony_ci		err = -EAGAIN;
3068c2ecf20Sopenharmony_ci		goto uncharge;
3078c2ecf20Sopenharmony_ci
3088c2ecf20Sopenharmony_ci		/* Note that even first_selem was linked to smap's
3098c2ecf20Sopenharmony_ci		 * bucket->list, first_selem can be freed immediately
3108c2ecf20Sopenharmony_ci		 * (instead of kfree_rcu) because
3118c2ecf20Sopenharmony_ci		 * bpf_local_storage_map_free() does a
3128c2ecf20Sopenharmony_ci		 * synchronize_rcu() before walking the bucket->list.
3138c2ecf20Sopenharmony_ci		 * Hence, no one is accessing selem from the
3148c2ecf20Sopenharmony_ci		 * bucket->list under rcu_read_lock().
3158c2ecf20Sopenharmony_ci		 */
3168c2ecf20Sopenharmony_ci	}
3178c2ecf20Sopenharmony_ci
3188c2ecf20Sopenharmony_ci	return 0;
3198c2ecf20Sopenharmony_ci
3208c2ecf20Sopenharmony_ciuncharge:
3218c2ecf20Sopenharmony_ci	kfree(storage);
3228c2ecf20Sopenharmony_ci	mem_uncharge(smap, owner, sizeof(*storage));
3238c2ecf20Sopenharmony_ci	return err;
3248c2ecf20Sopenharmony_ci}
3258c2ecf20Sopenharmony_ci
3268c2ecf20Sopenharmony_ci/* sk cannot be going away because it is linking new elem
3278c2ecf20Sopenharmony_ci * to sk->sk_bpf_storage. (i.e. sk->sk_refcnt cannot be 0).
3288c2ecf20Sopenharmony_ci * Otherwise, it will become a leak (and other memory issues
3298c2ecf20Sopenharmony_ci * during map destruction).
3308c2ecf20Sopenharmony_ci */
3318c2ecf20Sopenharmony_cistruct bpf_local_storage_data *
3328c2ecf20Sopenharmony_cibpf_local_storage_update(void *owner, struct bpf_local_storage_map *smap,
3338c2ecf20Sopenharmony_ci			 void *value, u64 map_flags)
3348c2ecf20Sopenharmony_ci{
3358c2ecf20Sopenharmony_ci	struct bpf_local_storage_data *old_sdata = NULL;
3368c2ecf20Sopenharmony_ci	struct bpf_local_storage_elem *selem;
3378c2ecf20Sopenharmony_ci	struct bpf_local_storage *local_storage;
3388c2ecf20Sopenharmony_ci	int err;
3398c2ecf20Sopenharmony_ci
3408c2ecf20Sopenharmony_ci	/* BPF_EXIST and BPF_NOEXIST cannot be both set */
3418c2ecf20Sopenharmony_ci	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST) ||
3428c2ecf20Sopenharmony_ci	    /* BPF_F_LOCK can only be used in a value with spin_lock */
3438c2ecf20Sopenharmony_ci	    unlikely((map_flags & BPF_F_LOCK) &&
3448c2ecf20Sopenharmony_ci		     !map_value_has_spin_lock(&smap->map)))
3458c2ecf20Sopenharmony_ci		return ERR_PTR(-EINVAL);
3468c2ecf20Sopenharmony_ci
3478c2ecf20Sopenharmony_ci	local_storage = rcu_dereference(*owner_storage(smap, owner));
3488c2ecf20Sopenharmony_ci	if (!local_storage || hlist_empty(&local_storage->list)) {
3498c2ecf20Sopenharmony_ci		/* Very first elem for the owner */
3508c2ecf20Sopenharmony_ci		err = check_flags(NULL, map_flags);
3518c2ecf20Sopenharmony_ci		if (err)
3528c2ecf20Sopenharmony_ci			return ERR_PTR(err);
3538c2ecf20Sopenharmony_ci
3548c2ecf20Sopenharmony_ci		selem = bpf_selem_alloc(smap, owner, value, true);
3558c2ecf20Sopenharmony_ci		if (!selem)
3568c2ecf20Sopenharmony_ci			return ERR_PTR(-ENOMEM);
3578c2ecf20Sopenharmony_ci
3588c2ecf20Sopenharmony_ci		err = bpf_local_storage_alloc(owner, smap, selem);
3598c2ecf20Sopenharmony_ci		if (err) {
3608c2ecf20Sopenharmony_ci			kfree(selem);
3618c2ecf20Sopenharmony_ci			mem_uncharge(smap, owner, smap->elem_size);
3628c2ecf20Sopenharmony_ci			return ERR_PTR(err);
3638c2ecf20Sopenharmony_ci		}
3648c2ecf20Sopenharmony_ci
3658c2ecf20Sopenharmony_ci		return SDATA(selem);
3668c2ecf20Sopenharmony_ci	}
3678c2ecf20Sopenharmony_ci
3688c2ecf20Sopenharmony_ci	if ((map_flags & BPF_F_LOCK) && !(map_flags & BPF_NOEXIST)) {
3698c2ecf20Sopenharmony_ci		/* Hoping to find an old_sdata to do inline update
3708c2ecf20Sopenharmony_ci		 * such that it can avoid taking the local_storage->lock
3718c2ecf20Sopenharmony_ci		 * and changing the lists.
3728c2ecf20Sopenharmony_ci		 */
3738c2ecf20Sopenharmony_ci		old_sdata =
3748c2ecf20Sopenharmony_ci			bpf_local_storage_lookup(local_storage, smap, false);
3758c2ecf20Sopenharmony_ci		err = check_flags(old_sdata, map_flags);
3768c2ecf20Sopenharmony_ci		if (err)
3778c2ecf20Sopenharmony_ci			return ERR_PTR(err);
3788c2ecf20Sopenharmony_ci		if (old_sdata && selem_linked_to_storage_lockless(SELEM(old_sdata))) {
3798c2ecf20Sopenharmony_ci			copy_map_value_locked(&smap->map, old_sdata->data,
3808c2ecf20Sopenharmony_ci					      value, false);
3818c2ecf20Sopenharmony_ci			return old_sdata;
3828c2ecf20Sopenharmony_ci		}
3838c2ecf20Sopenharmony_ci	}
3848c2ecf20Sopenharmony_ci
3858c2ecf20Sopenharmony_ci	raw_spin_lock_bh(&local_storage->lock);
3868c2ecf20Sopenharmony_ci
3878c2ecf20Sopenharmony_ci	/* Recheck local_storage->list under local_storage->lock */
3888c2ecf20Sopenharmony_ci	if (unlikely(hlist_empty(&local_storage->list))) {
3898c2ecf20Sopenharmony_ci		/* A parallel del is happening and local_storage is going
3908c2ecf20Sopenharmony_ci		 * away.  It has just been checked before, so very
3918c2ecf20Sopenharmony_ci		 * unlikely.  Return instead of retry to keep things
3928c2ecf20Sopenharmony_ci		 * simple.
3938c2ecf20Sopenharmony_ci		 */
3948c2ecf20Sopenharmony_ci		err = -EAGAIN;
3958c2ecf20Sopenharmony_ci		goto unlock_err;
3968c2ecf20Sopenharmony_ci	}
3978c2ecf20Sopenharmony_ci
3988c2ecf20Sopenharmony_ci	old_sdata = bpf_local_storage_lookup(local_storage, smap, false);
3998c2ecf20Sopenharmony_ci	err = check_flags(old_sdata, map_flags);
4008c2ecf20Sopenharmony_ci	if (err)
4018c2ecf20Sopenharmony_ci		goto unlock_err;
4028c2ecf20Sopenharmony_ci
4038c2ecf20Sopenharmony_ci	if (old_sdata && (map_flags & BPF_F_LOCK)) {
4048c2ecf20Sopenharmony_ci		copy_map_value_locked(&smap->map, old_sdata->data, value,
4058c2ecf20Sopenharmony_ci				      false);
4068c2ecf20Sopenharmony_ci		selem = SELEM(old_sdata);
4078c2ecf20Sopenharmony_ci		goto unlock;
4088c2ecf20Sopenharmony_ci	}
4098c2ecf20Sopenharmony_ci
4108c2ecf20Sopenharmony_ci	/* local_storage->lock is held.  Hence, we are sure
4118c2ecf20Sopenharmony_ci	 * we can unlink and uncharge the old_sdata successfully
4128c2ecf20Sopenharmony_ci	 * later.  Hence, instead of charging the new selem now
4138c2ecf20Sopenharmony_ci	 * and then uncharge the old selem later (which may cause
4148c2ecf20Sopenharmony_ci	 * a potential but unnecessary charge failure),  avoid taking
4158c2ecf20Sopenharmony_ci	 * a charge at all here (the "!old_sdata" check) and the
4168c2ecf20Sopenharmony_ci	 * old_sdata will not be uncharged later during
4178c2ecf20Sopenharmony_ci	 * bpf_selem_unlink_storage_nolock().
4188c2ecf20Sopenharmony_ci	 */
4198c2ecf20Sopenharmony_ci	selem = bpf_selem_alloc(smap, owner, value, !old_sdata);
4208c2ecf20Sopenharmony_ci	if (!selem) {
4218c2ecf20Sopenharmony_ci		err = -ENOMEM;
4228c2ecf20Sopenharmony_ci		goto unlock_err;
4238c2ecf20Sopenharmony_ci	}
4248c2ecf20Sopenharmony_ci
4258c2ecf20Sopenharmony_ci	/* First, link the new selem to the map */
4268c2ecf20Sopenharmony_ci	bpf_selem_link_map(smap, selem);
4278c2ecf20Sopenharmony_ci
4288c2ecf20Sopenharmony_ci	/* Second, link (and publish) the new selem to local_storage */
4298c2ecf20Sopenharmony_ci	bpf_selem_link_storage_nolock(local_storage, selem);
4308c2ecf20Sopenharmony_ci
4318c2ecf20Sopenharmony_ci	/* Third, remove old selem, SELEM(old_sdata) */
4328c2ecf20Sopenharmony_ci	if (old_sdata) {
4338c2ecf20Sopenharmony_ci		bpf_selem_unlink_map(SELEM(old_sdata));
4348c2ecf20Sopenharmony_ci		bpf_selem_unlink_storage_nolock(local_storage, SELEM(old_sdata),
4358c2ecf20Sopenharmony_ci						false);
4368c2ecf20Sopenharmony_ci	}
4378c2ecf20Sopenharmony_ci
4388c2ecf20Sopenharmony_ciunlock:
4398c2ecf20Sopenharmony_ci	raw_spin_unlock_bh(&local_storage->lock);
4408c2ecf20Sopenharmony_ci	return SDATA(selem);
4418c2ecf20Sopenharmony_ci
4428c2ecf20Sopenharmony_ciunlock_err:
4438c2ecf20Sopenharmony_ci	raw_spin_unlock_bh(&local_storage->lock);
4448c2ecf20Sopenharmony_ci	return ERR_PTR(err);
4458c2ecf20Sopenharmony_ci}
4468c2ecf20Sopenharmony_ci
4478c2ecf20Sopenharmony_ciu16 bpf_local_storage_cache_idx_get(struct bpf_local_storage_cache *cache)
4488c2ecf20Sopenharmony_ci{
4498c2ecf20Sopenharmony_ci	u64 min_usage = U64_MAX;
4508c2ecf20Sopenharmony_ci	u16 i, res = 0;
4518c2ecf20Sopenharmony_ci
4528c2ecf20Sopenharmony_ci	spin_lock(&cache->idx_lock);
4538c2ecf20Sopenharmony_ci
4548c2ecf20Sopenharmony_ci	for (i = 0; i < BPF_LOCAL_STORAGE_CACHE_SIZE; i++) {
4558c2ecf20Sopenharmony_ci		if (cache->idx_usage_counts[i] < min_usage) {
4568c2ecf20Sopenharmony_ci			min_usage = cache->idx_usage_counts[i];
4578c2ecf20Sopenharmony_ci			res = i;
4588c2ecf20Sopenharmony_ci
4598c2ecf20Sopenharmony_ci			/* Found a free cache_idx */
4608c2ecf20Sopenharmony_ci			if (!min_usage)
4618c2ecf20Sopenharmony_ci				break;
4628c2ecf20Sopenharmony_ci		}
4638c2ecf20Sopenharmony_ci	}
4648c2ecf20Sopenharmony_ci	cache->idx_usage_counts[res]++;
4658c2ecf20Sopenharmony_ci
4668c2ecf20Sopenharmony_ci	spin_unlock(&cache->idx_lock);
4678c2ecf20Sopenharmony_ci
4688c2ecf20Sopenharmony_ci	return res;
4698c2ecf20Sopenharmony_ci}
4708c2ecf20Sopenharmony_ci
4718c2ecf20Sopenharmony_civoid bpf_local_storage_cache_idx_free(struct bpf_local_storage_cache *cache,
4728c2ecf20Sopenharmony_ci				      u16 idx)
4738c2ecf20Sopenharmony_ci{
4748c2ecf20Sopenharmony_ci	spin_lock(&cache->idx_lock);
4758c2ecf20Sopenharmony_ci	cache->idx_usage_counts[idx]--;
4768c2ecf20Sopenharmony_ci	spin_unlock(&cache->idx_lock);
4778c2ecf20Sopenharmony_ci}
4788c2ecf20Sopenharmony_ci
4798c2ecf20Sopenharmony_civoid bpf_local_storage_map_free(struct bpf_local_storage_map *smap)
4808c2ecf20Sopenharmony_ci{
4818c2ecf20Sopenharmony_ci	struct bpf_local_storage_elem *selem;
4828c2ecf20Sopenharmony_ci	struct bpf_local_storage_map_bucket *b;
4838c2ecf20Sopenharmony_ci	unsigned int i;
4848c2ecf20Sopenharmony_ci
4858c2ecf20Sopenharmony_ci	/* Note that this map might be concurrently cloned from
4868c2ecf20Sopenharmony_ci	 * bpf_sk_storage_clone. Wait for any existing bpf_sk_storage_clone
4878c2ecf20Sopenharmony_ci	 * RCU read section to finish before proceeding. New RCU
4888c2ecf20Sopenharmony_ci	 * read sections should be prevented via bpf_map_inc_not_zero.
4898c2ecf20Sopenharmony_ci	 */
4908c2ecf20Sopenharmony_ci	synchronize_rcu();
4918c2ecf20Sopenharmony_ci
4928c2ecf20Sopenharmony_ci	/* bpf prog and the userspace can no longer access this map
4938c2ecf20Sopenharmony_ci	 * now.  No new selem (of this map) can be added
4948c2ecf20Sopenharmony_ci	 * to the owner->storage or to the map bucket's list.
4958c2ecf20Sopenharmony_ci	 *
4968c2ecf20Sopenharmony_ci	 * The elem of this map can be cleaned up here
4978c2ecf20Sopenharmony_ci	 * or when the storage is freed e.g.
4988c2ecf20Sopenharmony_ci	 * by bpf_sk_storage_free() during __sk_destruct().
4998c2ecf20Sopenharmony_ci	 */
5008c2ecf20Sopenharmony_ci	for (i = 0; i < (1U << smap->bucket_log); i++) {
5018c2ecf20Sopenharmony_ci		b = &smap->buckets[i];
5028c2ecf20Sopenharmony_ci
5038c2ecf20Sopenharmony_ci		rcu_read_lock();
5048c2ecf20Sopenharmony_ci		/* No one is adding to b->list now */
5058c2ecf20Sopenharmony_ci		while ((selem = hlist_entry_safe(
5068c2ecf20Sopenharmony_ci				rcu_dereference_raw(hlist_first_rcu(&b->list)),
5078c2ecf20Sopenharmony_ci				struct bpf_local_storage_elem, map_node))) {
5088c2ecf20Sopenharmony_ci			bpf_selem_unlink(selem);
5098c2ecf20Sopenharmony_ci			cond_resched_rcu();
5108c2ecf20Sopenharmony_ci		}
5118c2ecf20Sopenharmony_ci		rcu_read_unlock();
5128c2ecf20Sopenharmony_ci	}
5138c2ecf20Sopenharmony_ci
5148c2ecf20Sopenharmony_ci	/* While freeing the storage we may still need to access the map.
5158c2ecf20Sopenharmony_ci	 *
5168c2ecf20Sopenharmony_ci	 * e.g. when bpf_sk_storage_free() has unlinked selem from the map
5178c2ecf20Sopenharmony_ci	 * which then made the above while((selem = ...)) loop
5188c2ecf20Sopenharmony_ci	 * exit immediately.
5198c2ecf20Sopenharmony_ci	 *
5208c2ecf20Sopenharmony_ci	 * However, while freeing the storage one still needs to access the
5218c2ecf20Sopenharmony_ci	 * smap->elem_size to do the uncharging in
5228c2ecf20Sopenharmony_ci	 * bpf_selem_unlink_storage_nolock().
5238c2ecf20Sopenharmony_ci	 *
5248c2ecf20Sopenharmony_ci	 * Hence, wait another rcu grace period for the storage to be freed.
5258c2ecf20Sopenharmony_ci	 */
5268c2ecf20Sopenharmony_ci	synchronize_rcu();
5278c2ecf20Sopenharmony_ci
5288c2ecf20Sopenharmony_ci	kvfree(smap->buckets);
5298c2ecf20Sopenharmony_ci	kfree(smap);
5308c2ecf20Sopenharmony_ci}
5318c2ecf20Sopenharmony_ci
5328c2ecf20Sopenharmony_ciint bpf_local_storage_map_alloc_check(union bpf_attr *attr)
5338c2ecf20Sopenharmony_ci{
5348c2ecf20Sopenharmony_ci	if (attr->map_flags & ~BPF_LOCAL_STORAGE_CREATE_FLAG_MASK ||
5358c2ecf20Sopenharmony_ci	    !(attr->map_flags & BPF_F_NO_PREALLOC) ||
5368c2ecf20Sopenharmony_ci	    attr->max_entries ||
5378c2ecf20Sopenharmony_ci	    attr->key_size != sizeof(int) || !attr->value_size ||
5388c2ecf20Sopenharmony_ci	    /* Enforce BTF for userspace sk dumping */
5398c2ecf20Sopenharmony_ci	    !attr->btf_key_type_id || !attr->btf_value_type_id)
5408c2ecf20Sopenharmony_ci		return -EINVAL;
5418c2ecf20Sopenharmony_ci
5428c2ecf20Sopenharmony_ci	if (!bpf_capable())
5438c2ecf20Sopenharmony_ci		return -EPERM;
5448c2ecf20Sopenharmony_ci
5458c2ecf20Sopenharmony_ci	if (attr->value_size > BPF_LOCAL_STORAGE_MAX_VALUE_SIZE)
5468c2ecf20Sopenharmony_ci		return -E2BIG;
5478c2ecf20Sopenharmony_ci
5488c2ecf20Sopenharmony_ci	return 0;
5498c2ecf20Sopenharmony_ci}
5508c2ecf20Sopenharmony_ci
5518c2ecf20Sopenharmony_cistruct bpf_local_storage_map *bpf_local_storage_map_alloc(union bpf_attr *attr)
5528c2ecf20Sopenharmony_ci{
5538c2ecf20Sopenharmony_ci	struct bpf_local_storage_map *smap;
5548c2ecf20Sopenharmony_ci	unsigned int i;
5558c2ecf20Sopenharmony_ci	u32 nbuckets;
5568c2ecf20Sopenharmony_ci	u64 cost;
5578c2ecf20Sopenharmony_ci	int ret;
5588c2ecf20Sopenharmony_ci
5598c2ecf20Sopenharmony_ci	smap = kzalloc(sizeof(*smap), GFP_USER | __GFP_NOWARN);
5608c2ecf20Sopenharmony_ci	if (!smap)
5618c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
5628c2ecf20Sopenharmony_ci	bpf_map_init_from_attr(&smap->map, attr);
5638c2ecf20Sopenharmony_ci
5648c2ecf20Sopenharmony_ci	nbuckets = roundup_pow_of_two(num_possible_cpus());
5658c2ecf20Sopenharmony_ci	/* Use at least 2 buckets, select_bucket() is undefined behavior with 1 bucket */
5668c2ecf20Sopenharmony_ci	nbuckets = max_t(u32, 2, nbuckets);
5678c2ecf20Sopenharmony_ci	smap->bucket_log = ilog2(nbuckets);
5688c2ecf20Sopenharmony_ci	cost = sizeof(*smap->buckets) * nbuckets + sizeof(*smap);
5698c2ecf20Sopenharmony_ci
5708c2ecf20Sopenharmony_ci	ret = bpf_map_charge_init(&smap->map.memory, cost);
5718c2ecf20Sopenharmony_ci	if (ret < 0) {
5728c2ecf20Sopenharmony_ci		kfree(smap);
5738c2ecf20Sopenharmony_ci		return ERR_PTR(ret);
5748c2ecf20Sopenharmony_ci	}
5758c2ecf20Sopenharmony_ci
5768c2ecf20Sopenharmony_ci	smap->buckets = kvcalloc(sizeof(*smap->buckets), nbuckets,
5778c2ecf20Sopenharmony_ci				 GFP_USER | __GFP_NOWARN);
5788c2ecf20Sopenharmony_ci	if (!smap->buckets) {
5798c2ecf20Sopenharmony_ci		bpf_map_charge_finish(&smap->map.memory);
5808c2ecf20Sopenharmony_ci		kfree(smap);
5818c2ecf20Sopenharmony_ci		return ERR_PTR(-ENOMEM);
5828c2ecf20Sopenharmony_ci	}
5838c2ecf20Sopenharmony_ci
5848c2ecf20Sopenharmony_ci	for (i = 0; i < nbuckets; i++) {
5858c2ecf20Sopenharmony_ci		INIT_HLIST_HEAD(&smap->buckets[i].list);
5868c2ecf20Sopenharmony_ci		raw_spin_lock_init(&smap->buckets[i].lock);
5878c2ecf20Sopenharmony_ci	}
5888c2ecf20Sopenharmony_ci
5898c2ecf20Sopenharmony_ci	smap->elem_size =
5908c2ecf20Sopenharmony_ci		sizeof(struct bpf_local_storage_elem) + attr->value_size;
5918c2ecf20Sopenharmony_ci
5928c2ecf20Sopenharmony_ci	return smap;
5938c2ecf20Sopenharmony_ci}
5948c2ecf20Sopenharmony_ci
5958c2ecf20Sopenharmony_ciint bpf_local_storage_map_check_btf(const struct bpf_map *map,
5968c2ecf20Sopenharmony_ci				    const struct btf *btf,
5978c2ecf20Sopenharmony_ci				    const struct btf_type *key_type,
5988c2ecf20Sopenharmony_ci				    const struct btf_type *value_type)
5998c2ecf20Sopenharmony_ci{
6008c2ecf20Sopenharmony_ci	u32 int_data;
6018c2ecf20Sopenharmony_ci
6028c2ecf20Sopenharmony_ci	if (BTF_INFO_KIND(key_type->info) != BTF_KIND_INT)
6038c2ecf20Sopenharmony_ci		return -EINVAL;
6048c2ecf20Sopenharmony_ci
6058c2ecf20Sopenharmony_ci	int_data = *(u32 *)(key_type + 1);
6068c2ecf20Sopenharmony_ci	if (BTF_INT_BITS(int_data) != 32 || BTF_INT_OFFSET(int_data))
6078c2ecf20Sopenharmony_ci		return -EINVAL;
6088c2ecf20Sopenharmony_ci
6098c2ecf20Sopenharmony_ci	return 0;
6108c2ecf20Sopenharmony_ci}
611