xref: /third_party/mesa3d/src/util/sparse_array.c (revision bf215546)
1/*
2 * Copyright © 2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24#include "sparse_array.h"
25#include "os_memory.h"
26
27/* Aligning our allocations to 64 has two advantages:
28 *
29 *  1. On x86 platforms, it means that they are cache-line aligned so we
30 *     reduce the likelihood that one of our allocations shares a cache line
31 *     with some other allocation.
32 *
33 *  2. It lets us use the bottom 6 bits of the pointer to store the tree level
34 *     of the node so we can avoid some pointer indirections.
35 */
36#define NODE_ALLOC_ALIGN 64
37
38void
39util_sparse_array_init(struct util_sparse_array *arr,
40                       size_t elem_size, size_t node_size)
41{
42   memset(arr, 0, sizeof(*arr));
43   arr->elem_size = elem_size;
44   arr->node_size_log2 = util_logbase2_64(node_size);
45   assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2));
46}
47
48#define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1))
49#define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1)
50#define NULL_NODE 0
51
52static inline uintptr_t
53_util_sparse_array_node(void *data, unsigned level)
54{
55   assert(data != NULL);
56   assert(((uintptr_t)data & NODE_LEVEL_MASK) == 0);
57   assert((level & NODE_PTR_MASK) == 0);
58   return (uintptr_t)data | level;
59}
60
61static inline void *
62_util_sparse_array_node_data(uintptr_t handle)
63{
64   return (void *)(handle & NODE_PTR_MASK);
65}
66
67static inline unsigned
68_util_sparse_array_node_level(uintptr_t handle)
69{
70   return handle & NODE_LEVEL_MASK;
71}
72
73static inline void
74_util_sparse_array_node_finish(struct util_sparse_array *arr,
75                               uintptr_t node)
76{
77   if (_util_sparse_array_node_level(node) > 0) {
78      uintptr_t *children = _util_sparse_array_node_data(node);
79      size_t node_size = 1ull << arr->node_size_log2;
80      for (size_t i = 0; i < node_size; i++) {
81         if (children[i])
82            _util_sparse_array_node_finish(arr, children[i]);
83      }
84   }
85
86   os_free_aligned(_util_sparse_array_node_data(node));
87}
88
89void
90util_sparse_array_finish(struct util_sparse_array *arr)
91{
92   if (arr->root)
93      _util_sparse_array_node_finish(arr, arr->root);
94}
95
96static inline uintptr_t
97_util_sparse_array_node_alloc(struct util_sparse_array *arr,
98                              unsigned level)
99{
100   size_t size;
101   if (level == 0) {
102      size = arr->elem_size << arr->node_size_log2;
103   } else {
104      size = sizeof(uintptr_t) << arr->node_size_log2;
105   }
106
107   void *data = os_malloc_aligned(size, NODE_ALLOC_ALIGN);
108   memset(data, 0, size);
109
110   return _util_sparse_array_node(data, level);
111}
112
113static inline uintptr_t
114_util_sparse_array_set_or_free_node(uintptr_t *node_ptr,
115                                    uintptr_t cmp_node,
116                                    uintptr_t node)
117{
118   uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node);
119
120   if (prev_node != cmp_node) {
121      /* We lost the race.  Free this one and return the one that was already
122       * allocated.
123       */
124      os_free_aligned(_util_sparse_array_node_data(node));
125      return prev_node;
126   } else {
127      return node;
128   }
129}
130
131void *
132util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx)
133{
134   const unsigned node_size_log2 = arr->node_size_log2;
135   uintptr_t root = p_atomic_read(&arr->root);
136   if (unlikely(!root)) {
137      unsigned root_level = 0;
138      uint64_t idx_iter = idx >> node_size_log2;
139      while (idx_iter) {
140         idx_iter >>= node_size_log2;
141         root_level++;
142      }
143      uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level);
144      root = _util_sparse_array_set_or_free_node(&arr->root,
145                                                 NULL_NODE, new_root);
146   }
147
148   while (1) {
149      unsigned root_level = _util_sparse_array_node_level(root);
150      uint64_t root_idx = idx >> (root_level * node_size_log2);
151      if (likely(root_idx < (1ull << node_size_log2)))
152         break;
153
154      /* In this case, we have a root but its level is low enough that the
155       * requested index is out-of-bounds.
156       */
157      uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1);
158
159      uintptr_t *new_root_children = _util_sparse_array_node_data(new_root);
160      new_root_children[0] = root;
161
162      /* We only add one at a time instead of the whole tree because it's
163       * easier to ensure correctness of both the tree building and the
164       * clean-up path.  Because we're only adding one node we never have to
165       * worry about trying to free multiple things without freeing the old
166       * things.
167       */
168      root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root);
169   }
170
171   void *node_data = _util_sparse_array_node_data(root);
172   unsigned node_level = _util_sparse_array_node_level(root);
173   while (node_level > 0) {
174      uint64_t child_idx = (idx >> (node_level * node_size_log2)) &
175                           ((1ull << node_size_log2) - 1);
176
177      uintptr_t *children = node_data;
178      uintptr_t child = p_atomic_read(&children[child_idx]);
179
180      if (unlikely(!child)) {
181         child = _util_sparse_array_node_alloc(arr, node_level - 1);
182         child = _util_sparse_array_set_or_free_node(&children[child_idx],
183                                                     NULL_NODE, child);
184      }
185
186      node_data = _util_sparse_array_node_data(child);
187      node_level = _util_sparse_array_node_level(child);
188   }
189
190   uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1);
191   return (void *)((char *)node_data + (elem_idx * arr->elem_size));
192}
193
194static void
195validate_node_level(struct util_sparse_array *arr,
196                    uintptr_t node, unsigned level)
197{
198   assert(_util_sparse_array_node_level(node) == level);
199
200   if (_util_sparse_array_node_level(node) > 0) {
201      uintptr_t *children = _util_sparse_array_node_data(node);
202      size_t node_size = 1ull << arr->node_size_log2;
203      for (size_t i = 0; i < node_size; i++) {
204         if (children[i])
205            validate_node_level(arr, children[i], level - 1);
206      }
207   }
208}
209
210void
211util_sparse_array_validate(struct util_sparse_array *arr)
212{
213   uintptr_t root = p_atomic_read(&arr->root);
214   validate_node_level(arr, root, _util_sparse_array_node_level(root));
215}
216
217void
218util_sparse_array_free_list_init(struct util_sparse_array_free_list *fl,
219                                 struct util_sparse_array *arr,
220                                 uint32_t sentinel,
221                                 uint32_t next_offset)
222{
223   fl->head = sentinel;
224   fl->arr = arr;
225   fl->sentinel = sentinel;
226   fl->next_offset = next_offset;
227}
228
229static uint64_t
230free_list_head(uint64_t old, uint32_t next)
231{
232   return ((old & 0xffffffff00000000ull) + 0x100000000ull) | next;
233}
234
235void
236util_sparse_array_free_list_push(struct util_sparse_array_free_list *fl,
237                                 uint32_t *items, unsigned num_items)
238{
239   assert(num_items > 0);
240   assert(items[0] != fl->sentinel);
241   void *last_elem = util_sparse_array_get(fl->arr, items[0]);
242   uint32_t *last_next = (uint32_t *)((char *)last_elem + fl->next_offset);
243   for (unsigned i = 1; i < num_items; i++) {
244      p_atomic_set(last_next, items[i]);
245      assert(items[i] != fl->sentinel);
246      last_elem = util_sparse_array_get(fl->arr, items[i]);
247      last_next = (uint32_t *)((char *)last_elem + fl->next_offset);
248   }
249
250   uint64_t current_head, old_head;
251   old_head = p_atomic_read(&fl->head);
252   do {
253      current_head = old_head;
254      p_atomic_set(last_next, (uint32_t)current_head); /* Index is the bottom 32 bits */
255      uint64_t new_head = free_list_head(current_head, items[0]);
256      old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
257   } while (old_head != current_head);
258}
259
260uint32_t
261util_sparse_array_free_list_pop_idx(struct util_sparse_array_free_list *fl)
262{
263   uint64_t current_head;
264
265   current_head = p_atomic_read(&fl->head);
266   while (1) {
267      if ((uint32_t)current_head == fl->sentinel)
268         return fl->sentinel;
269
270      uint32_t head_idx = current_head; /* Index is the bottom 32 bits */
271      void *head_elem = util_sparse_array_get(fl->arr, head_idx);
272      uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset);
273      uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next));
274      uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
275      if (old_head == current_head)
276         return head_idx;
277      current_head = old_head;
278   }
279}
280
281void *
282util_sparse_array_free_list_pop_elem(struct util_sparse_array_free_list *fl)
283{
284   uint64_t current_head;
285
286   current_head = p_atomic_read(&fl->head);
287   while (1) {
288      if ((uint32_t)current_head == fl->sentinel)
289         return NULL;
290
291      uint32_t head_idx = current_head; /* Index is the bottom 32 bits */
292      void *head_elem = util_sparse_array_get(fl->arr, head_idx);
293      uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset);
294      uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next));
295      uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head);
296      if (old_head == current_head)
297         return head_elem;
298      current_head = old_head;
299   }
300}
301