1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright © 2019 Intel Corporation
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20bf215546Sopenharmony_ci * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21bf215546Sopenharmony_ci * IN THE SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci#include "sparse_array.h"
25bf215546Sopenharmony_ci#include "os_memory.h"
26bf215546Sopenharmony_ci
27bf215546Sopenharmony_ci/* Aligning our allocations to 64 has two advantages:
28bf215546Sopenharmony_ci *
29bf215546Sopenharmony_ci *  1. On x86 platforms, it means that they are cache-line aligned so we
30bf215546Sopenharmony_ci *     reduce the likelihood that one of our allocations shares a cache line
31bf215546Sopenharmony_ci *     with some other allocation.
32bf215546Sopenharmony_ci *
33bf215546Sopenharmony_ci *  2. It lets us use the bottom 6 bits of the pointer to store the tree level
34bf215546Sopenharmony_ci *     of the node so we can avoid some pointer indirections.
35bf215546Sopenharmony_ci */
36bf215546Sopenharmony_ci#define NODE_ALLOC_ALIGN 64
37bf215546Sopenharmony_ci
38bf215546Sopenharmony_civoid
39bf215546Sopenharmony_ciutil_sparse_array_init(struct util_sparse_array *arr,
40bf215546Sopenharmony_ci                       size_t elem_size, size_t node_size)
41bf215546Sopenharmony_ci{
42bf215546Sopenharmony_ci   memset(arr, 0, sizeof(*arr));
43bf215546Sopenharmony_ci   arr->elem_size = elem_size;
44bf215546Sopenharmony_ci   arr->node_size_log2 = util_logbase2_64(node_size);
45bf215546Sopenharmony_ci   assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
46bf215546Sopenharmony_ci}
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci#define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1))
49bf215546Sopenharmony_ci#define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1)
50bf215546Sopenharmony_ci#define NULL_NODE 0
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_cistatic inline uintptr_t
53bf215546Sopenharmony_ci_util_sparse_array_node(void *data, unsigned level)
54bf215546Sopenharmony_ci{
55bf215546Sopenharmony_ci   assert(data != NULL);
56bf215546Sopenharmony_ci   assert(((uintptr_t)data & NODE_LEVEL_MASK) == 0);
57bf215546Sopenharmony_ci   assert((level & NODE_PTR_MASK) == 0);
58bf215546Sopenharmony_ci   return (uintptr_t)data | level;
59bf215546Sopenharmony_ci}
60bf215546Sopenharmony_ci
61bf215546Sopenharmony_cistatic inline void *
62bf215546Sopenharmony_ci_util_sparse_array_node_data(uintptr_t handle)
63bf215546Sopenharmony_ci{
64bf215546Sopenharmony_ci   return (void *)(handle & NODE_PTR_MASK);
65bf215546Sopenharmony_ci}
66bf215546Sopenharmony_ci
67bf215546Sopenharmony_cistatic inline unsigned
68bf215546Sopenharmony_ci_util_sparse_array_node_level(uintptr_t handle)
69bf215546Sopenharmony_ci{
70bf215546Sopenharmony_ci   return handle & NODE_LEVEL_MASK;
71bf215546Sopenharmony_ci}
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_cistatic inline void
74bf215546Sopenharmony_ci_util_sparse_array_node_finish(struct util_sparse_array *arr,
75bf215546Sopenharmony_ci                               uintptr_t node)
76bf215546Sopenharmony_ci{
77bf215546Sopenharmony_ci   if (_util_sparse_array_node_level(node) > 0) {
78bf215546Sopenharmony_ci      uintptr_t *children = _util_sparse_array_node_data(node);
79bf215546Sopenharmony_ci      size_t node_size = 1ull << arr->node_size_log2;
80bf215546Sopenharmony_ci      for (size_t i = 0; i < node_size; i++) {
81bf215546Sopenharmony_ci         if (children[i])
82bf215546Sopenharmony_ci            _util_sparse_array_node_finish(arr, children[i]);
83bf215546Sopenharmony_ci      }
84bf215546Sopenharmony_ci   }
85bf215546Sopenharmony_ci
86bf215546Sopenharmony_ci   os_free_aligned(_util_sparse_array_node_data(node));
87bf215546Sopenharmony_ci}
88bf215546Sopenharmony_ci
89bf215546Sopenharmony_civoid
90bf215546Sopenharmony_ciutil_sparse_array_finish(struct util_sparse_array *arr)
91bf215546Sopenharmony_ci{
92bf215546Sopenharmony_ci   if (arr->root)
93bf215546Sopenharmony_ci      _util_sparse_array_node_finish(arr, arr->root);
94bf215546Sopenharmony_ci}
95bf215546Sopenharmony_ci
96bf215546Sopenharmony_cistatic inline uintptr_t
97bf215546Sopenharmony_ci_util_sparse_array_node_alloc(struct util_sparse_array *arr,
98bf215546Sopenharmony_ci                              unsigned level)
99bf215546Sopenharmony_ci{
100bf215546Sopenharmony_ci   size_t size;
101bf215546Sopenharmony_ci   if (level == 0) {
102bf215546Sopenharmony_ci      size = arr->elem_size << arr->node_size_log2;
103bf215546Sopenharmony_ci   } else {
104bf215546Sopenharmony_ci      size = sizeof(uintptr_t) << arr->node_size_log2;
105bf215546Sopenharmony_ci   }
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_ci   void *data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
108bf215546Sopenharmony_ci   memset(data, 0, size);
109bf215546Sopenharmony_ci
110bf215546Sopenharmony_ci   return _util_sparse_array_node(data, level);
111bf215546Sopenharmony_ci}
112bf215546Sopenharmony_ci
113bf215546Sopenharmony_cistatic inline uintptr_t
114bf215546Sopenharmony_ci_util_sparse_array_set_or_free_node(uintptr_t *node_ptr,
115bf215546Sopenharmony_ci                                    uintptr_t cmp_node,
116bf215546Sopenharmony_ci                                    uintptr_t node)
117bf215546Sopenharmony_ci{
118bf215546Sopenharmony_ci   uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
119bf215546Sopenharmony_ci
120bf215546Sopenharmony_ci   if (prev_node != cmp_node) {
121bf215546Sopenharmony_ci      /* We lost the race.  Free this one and return the one that was already
122bf215546Sopenharmony_ci       * allocated.
123bf215546Sopenharmony_ci       */
124bf215546Sopenharmony_ci      os_free_aligned(_util_sparse_array_node_data(node));
125bf215546Sopenharmony_ci      return prev_node;
126bf215546Sopenharmony_ci   } else {
127bf215546Sopenharmony_ci      return node;
128bf215546Sopenharmony_ci   }
129bf215546Sopenharmony_ci}
130bf215546Sopenharmony_ci
131bf215546Sopenharmony_civoid *
132bf215546Sopenharmony_ciutil_sparse_array_get(struct util_sparse_array *arr, uint64_t idx)
133bf215546Sopenharmony_ci{
134bf215546Sopenharmony_ci   const unsigned node_size_log2 = arr->node_size_log2;
135bf215546Sopenharmony_ci   uintptr_t root = p_atomic_read(&arr->root);
136bf215546Sopenharmony_ci   if (unlikely(!root)) {
137bf215546Sopenharmony_ci      unsigned root_level = 0;
138bf215546Sopenharmony_ci      uint64_t idx_iter = idx >> node_size_log2;
139bf215546Sopenharmony_ci      while (idx_iter) {
140bf215546Sopenharmony_ci         idx_iter >>= node_size_log2;
141bf215546Sopenharmony_ci         root_level++;
142bf215546Sopenharmony_ci      }
143bf215546Sopenharmony_ci      uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
144bf215546Sopenharmony_ci      root = _util_sparse_array_set_or_free_node(&arr->root,
145bf215546Sopenharmony_ci                                                 NULL_NODE, new_root);
146bf215546Sopenharmony_ci   }
147bf215546Sopenharmony_ci
148bf215546Sopenharmony_ci   while (1) {
149bf215546Sopenharmony_ci      unsigned root_level = _util_sparse_array_node_level(root);
150bf215546Sopenharmony_ci      uint64_t root_idx = idx >> (root_level * node_size_log2);
151bf215546Sopenharmony_ci      if (likely(root_idx < (1ull << node_size_log2)))
152bf215546Sopenharmony_ci         break;
153bf215546Sopenharmony_ci
154bf215546Sopenharmony_ci      /* In this case, we have a root but its level is low enough that the
155bf215546Sopenharmony_ci       * requested index is out-of-bounds.
156bf215546Sopenharmony_ci       */
157bf215546Sopenharmony_ci      uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
158bf215546Sopenharmony_ci
159bf215546Sopenharmony_ci      uintptr_t *new_root_children = _util_sparse_array_node_data(new_root);
160bf215546Sopenharmony_ci      new_root_children[0] = root;
161bf215546Sopenharmony_ci
162bf215546Sopenharmony_ci      /* We only add one at a time instead of the whole tree because it's
163bf215546Sopenharmony_ci       * easier to ensure correctness of both the tree building and the
164bf215546Sopenharmony_ci       * clean-up path.  Because we're only adding one node we never have to
165bf215546Sopenharmony_ci       * worry about trying to free multiple things without freeing the old
166bf215546Sopenharmony_ci       * things.
167bf215546Sopenharmony_ci       */
168bf215546Sopenharmony_ci      root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
169bf215546Sopenharmony_ci   }
170bf215546Sopenharmony_ci
171bf215546Sopenharmony_ci   void *node_data = _util_sparse_array_node_data(root);
172bf215546Sopenharmony_ci   unsigned node_level = _util_sparse_array_node_level(root);
173bf215546Sopenharmony_ci   while (node_level > 0) {
174bf215546Sopenharmony_ci      uint64_t child_idx = (idx >> (node_level * node_size_log2)) &
175bf215546Sopenharmony_ci                           ((1ull << node_size_log2) - 1);
176bf215546Sopenharmony_ci
177bf215546Sopenharmony_ci      uintptr_t *children = node_data;
178bf215546Sopenharmony_ci      uintptr_t child = p_atomic_read(&children[child_idx]);
179bf215546Sopenharmony_ci
180bf215546Sopenharmony_ci      if (unlikely(!child)) {
181bf215546Sopenharmony_ci         child = _util_sparse_array_node_alloc(arr, node_level - 1);
182bf215546Sopenharmony_ci         child = _util_sparse_array_set_or_free_node(&children[child_idx],
183bf215546Sopenharmony_ci                                                     NULL_NODE, child);
184bf215546Sopenharmony_ci      }
185bf215546Sopenharmony_ci
186bf215546Sopenharmony_ci      node_data = _util_sparse_array_node_data(child);
187bf215546Sopenharmony_ci      node_level = _util_sparse_array_node_level(child);
188bf215546Sopenharmony_ci   }
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci   uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
191bf215546Sopenharmony_ci   return (void *)((char *)node_data + (elem_idx * arr->elem_size));
192bf215546Sopenharmony_ci}
193bf215546Sopenharmony_ci
194bf215546Sopenharmony_cistatic void
195bf215546Sopenharmony_civalidate_node_level(struct util_sparse_array *arr,
196bf215546Sopenharmony_ci                    uintptr_t node, unsigned level)
197bf215546Sopenharmony_ci{
198bf215546Sopenharmony_ci   assert(_util_sparse_array_node_level(node) == level);
199bf215546Sopenharmony_ci
200bf215546Sopenharmony_ci   if (_util_sparse_array_node_level(node) > 0) {
201bf215546Sopenharmony_ci      uintptr_t *children = _util_sparse_array_node_data(node);
202bf215546Sopenharmony_ci      size_t node_size = 1ull << arr->node_size_log2;
203bf215546Sopenharmony_ci      for (size_t i = 0; i < node_size; i++) {
204bf215546Sopenharmony_ci         if (children[i])
205bf215546Sopenharmony_ci            validate_node_level(arr, children[i], level - 1);
206bf215546Sopenharmony_ci      }
207bf215546Sopenharmony_ci   }
208bf215546Sopenharmony_ci}
209bf215546Sopenharmony_ci
210bf215546Sopenharmony_civoid
211bf215546Sopenharmony_ciutil_sparse_array_validate(struct util_sparse_array *arr)
212bf215546Sopenharmony_ci{
213bf215546Sopenharmony_ci   uintptr_t root = p_atomic_read(&arr->root);
214bf215546Sopenharmony_ci   validate_node_level(arr, root, _util_sparse_array_node_level(root));
215bf215546Sopenharmony_ci}
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_civoid
218bf215546Sopenharmony_ciutil_sparse_array_free_list_init(struct util_sparse_array_free_list *fl,
219bf215546Sopenharmony_ci                                 struct util_sparse_array *arr,
220bf215546Sopenharmony_ci                                 uint32_t sentinel,
221bf215546Sopenharmony_ci                                 uint32_t next_offset)
222bf215546Sopenharmony_ci{
223bf215546Sopenharmony_ci   fl->head = sentinel;
224bf215546Sopenharmony_ci   fl->arr = arr;
225bf215546Sopenharmony_ci   fl->sentinel = sentinel;
226bf215546Sopenharmony_ci   fl->next_offset = next_offset;
227bf215546Sopenharmony_ci}
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_cistatic uint64_t
230bf215546Sopenharmony_cifree_list_head(uint64_t old, uint32_t next)
231bf215546Sopenharmony_ci{
232bf215546Sopenharmony_ci   return ((old & 0xffffffff00000000ull) + 0x100000000ull) | next;
233bf215546Sopenharmony_ci}
234bf215546Sopenharmony_ci
235bf215546Sopenharmony_civoid
236bf215546Sopenharmony_ciutil_sparse_array_free_list_push(struct util_sparse_array_free_list *fl,
237bf215546Sopenharmony_ci                                 uint32_t *items, unsigned num_items)
238bf215546Sopenharmony_ci{
239bf215546Sopenharmony_ci   assert(num_items > 0);
240bf215546Sopenharmony_ci   assert(items[0] != fl->sentinel);
241bf215546Sopenharmony_ci   void *last_elem = util_sparse_array_get(fl->arr, items[0]);
242bf215546Sopenharmony_ci   uint32_t *last_next = (uint32_t *)((char *)last_elem + fl->next_offset);
243bf215546Sopenharmony_ci   for (unsigned i = 1; i < num_items; i++) {
244bf215546Sopenharmony_ci      p_atomic_set(last_next, items[i]);
245bf215546Sopenharmony_ci      assert(items[i] != fl->sentinel);
246bf215546Sopenharmony_ci      last_elem = util_sparse_array_get(fl->arr, items[i]);
247bf215546Sopenharmony_ci      last_next = (uint32_t *)((char *)last_elem + fl->next_offset);
248bf215546Sopenharmony_ci   }
249bf215546Sopenharmony_ci
250bf215546Sopenharmony_ci   uint64_t current_head, old_head;
251bf215546Sopenharmony_ci   old_head = p_atomic_read(&fl->head);
252bf215546Sopenharmony_ci   do {
253bf215546Sopenharmony_ci      current_head = old_head;
254bf215546Sopenharmony_ci      p_atomic_set(last_next, (uint32_t)current_head); /* Index is the bottom 32 bits */
255bf215546Sopenharmony_ci      uint64_t new_head = free_list_head(current_head, items[0]);
256bf215546Sopenharmony_ci      old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
257bf215546Sopenharmony_ci   } while (old_head != current_head);
258bf215546Sopenharmony_ci}
259bf215546Sopenharmony_ci
260bf215546Sopenharmony_ciuint32_t
261bf215546Sopenharmony_ciutil_sparse_array_free_list_pop_idx(struct util_sparse_array_free_list *fl)
262bf215546Sopenharmony_ci{
263bf215546Sopenharmony_ci   uint64_t current_head;
264bf215546Sopenharmony_ci
265bf215546Sopenharmony_ci   current_head = p_atomic_read(&fl->head);
266bf215546Sopenharmony_ci   while (1) {
267bf215546Sopenharmony_ci      if ((uint32_t)current_head == fl->sentinel)
268bf215546Sopenharmony_ci         return fl->sentinel;
269bf215546Sopenharmony_ci
270bf215546Sopenharmony_ci      uint32_t head_idx = current_head; /* Index is the bottom 32 bits */
271bf215546Sopenharmony_ci      void *head_elem = util_sparse_array_get(fl->arr, head_idx);
272bf215546Sopenharmony_ci      uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset);
273bf215546Sopenharmony_ci      uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next));
274bf215546Sopenharmony_ci      uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
275bf215546Sopenharmony_ci      if (old_head == current_head)
276bf215546Sopenharmony_ci         return head_idx;
277bf215546Sopenharmony_ci      current_head = old_head;
278bf215546Sopenharmony_ci   }
279bf215546Sopenharmony_ci}
280bf215546Sopenharmony_ci
281bf215546Sopenharmony_civoid *
282bf215546Sopenharmony_ciutil_sparse_array_free_list_pop_elem(struct util_sparse_array_free_list *fl)
283bf215546Sopenharmony_ci{
284bf215546Sopenharmony_ci   uint64_t current_head;
285bf215546Sopenharmony_ci
286bf215546Sopenharmony_ci   current_head = p_atomic_read(&fl->head);
287bf215546Sopenharmony_ci   while (1) {
288bf215546Sopenharmony_ci      if ((uint32_t)current_head == fl->sentinel)
289bf215546Sopenharmony_ci         return NULL;
290bf215546Sopenharmony_ci
291bf215546Sopenharmony_ci      uint32_t head_idx = current_head; /* Index is the bottom 32 bits */
292bf215546Sopenharmony_ci      void *head_elem = util_sparse_array_get(fl->arr, head_idx);
293bf215546Sopenharmony_ci      uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset);
294bf215546Sopenharmony_ci      uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next));
295bf215546Sopenharmony_ci      uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
296bf215546Sopenharmony_ci      if (old_head == current_head)
297bf215546Sopenharmony_ci         return head_elem;
298bf215546Sopenharmony_ci      current_head = old_head;
299bf215546Sopenharmony_ci   }
300bf215546Sopenharmony_ci}
301