1/* 2 * Copyright © 2019 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24#include "sparse_array.h" 25#include "os_memory.h" 26 27/* Aligning our allocations to 64 has two advantages: 28 * 29 * 1. On x86 platforms, it means that they are cache-line aligned so we 30 * reduce the likelihood that one of our allocations shares a cache line 31 * with some other allocation. 32 * 33 * 2. It lets us use the bottom 6 bits of the pointer to store the tree level 34 * of the node so we can avoid some pointer indirections. 35 */ 36#define NODE_ALLOC_ALIGN 64 37 38void 39util_sparse_array_init(struct util_sparse_array *arr, 40 size_t elem_size, size_t node_size) 41{ 42 memset(arr, 0, sizeof(*arr)); 43 arr->elem_size = elem_size; 44 arr->node_size_log2 = util_logbase2_64(node_size); 45 assert(node_size >= 2 && node_size == (1ull << arr->node_size_log2)); 46} 47 48#define NODE_PTR_MASK (~((uintptr_t)NODE_ALLOC_ALIGN - 1)) 49#define NODE_LEVEL_MASK ((uintptr_t)NODE_ALLOC_ALIGN - 1) 50#define NULL_NODE 0 51 52static inline uintptr_t 53_util_sparse_array_node(void *data, unsigned level) 54{ 55 assert(data != NULL); 56 assert(((uintptr_t)data & NODE_LEVEL_MASK) == 0); 57 assert((level & NODE_PTR_MASK) == 0); 58 return (uintptr_t)data | level; 59} 60 61static inline void * 62_util_sparse_array_node_data(uintptr_t handle) 63{ 64 return (void *)(handle & NODE_PTR_MASK); 65} 66 67static inline unsigned 68_util_sparse_array_node_level(uintptr_t handle) 69{ 70 return handle & NODE_LEVEL_MASK; 71} 72 73static inline void 74_util_sparse_array_node_finish(struct util_sparse_array *arr, 75 uintptr_t node) 76{ 77 if (_util_sparse_array_node_level(node) > 0) { 78 uintptr_t *children = _util_sparse_array_node_data(node); 79 size_t node_size = 1ull << arr->node_size_log2; 80 for (size_t i = 0; i < node_size; i++) { 81 if (children[i]) 82 _util_sparse_array_node_finish(arr, children[i]); 83 } 84 } 85 86 os_free_aligned(_util_sparse_array_node_data(node)); 87} 88 89void 90util_sparse_array_finish(struct util_sparse_array *arr) 91{ 92 if (arr->root) 93 _util_sparse_array_node_finish(arr, arr->root); 94} 95 96static inline uintptr_t 97_util_sparse_array_node_alloc(struct util_sparse_array *arr, 98 unsigned level) 99{ 100 size_t size; 101 if (level == 0) { 102 size = arr->elem_size << arr->node_size_log2; 103 } else { 104 size = sizeof(uintptr_t) << arr->node_size_log2; 105 } 106 107 void *data = os_malloc_aligned(size, NODE_ALLOC_ALIGN); 108 memset(data, 0, size); 109 110 return _util_sparse_array_node(data, level); 111} 112 113static inline uintptr_t 114_util_sparse_array_set_or_free_node(uintptr_t *node_ptr, 115 uintptr_t cmp_node, 116 uintptr_t node) 117{ 118 uintptr_t prev_node = p_atomic_cmpxchg(node_ptr, cmp_node, node); 119 120 if (prev_node != cmp_node) { 121 /* We lost the race. Free this one and return the one that was already 122 * allocated. 123 */ 124 os_free_aligned(_util_sparse_array_node_data(node)); 125 return prev_node; 126 } else { 127 return node; 128 } 129} 130 131void * 132util_sparse_array_get(struct util_sparse_array *arr, uint64_t idx) 133{ 134 const unsigned node_size_log2 = arr->node_size_log2; 135 uintptr_t root = p_atomic_read(&arr->root); 136 if (unlikely(!root)) { 137 unsigned root_level = 0; 138 uint64_t idx_iter = idx >> node_size_log2; 139 while (idx_iter) { 140 idx_iter >>= node_size_log2; 141 root_level++; 142 } 143 uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level); 144 root = _util_sparse_array_set_or_free_node(&arr->root, 145 NULL_NODE, new_root); 146 } 147 148 while (1) { 149 unsigned root_level = _util_sparse_array_node_level(root); 150 uint64_t root_idx = idx >> (root_level * node_size_log2); 151 if (likely(root_idx < (1ull << node_size_log2))) 152 break; 153 154 /* In this case, we have a root but its level is low enough that the 155 * requested index is out-of-bounds. 156 */ 157 uintptr_t new_root = _util_sparse_array_node_alloc(arr, root_level + 1); 158 159 uintptr_t *new_root_children = _util_sparse_array_node_data(new_root); 160 new_root_children[0] = root; 161 162 /* We only add one at a time instead of the whole tree because it's 163 * easier to ensure correctness of both the tree building and the 164 * clean-up path. Because we're only adding one node we never have to 165 * worry about trying to free multiple things without freeing the old 166 * things. 167 */ 168 root = _util_sparse_array_set_or_free_node(&arr->root, root, new_root); 169 } 170 171 void *node_data = _util_sparse_array_node_data(root); 172 unsigned node_level = _util_sparse_array_node_level(root); 173 while (node_level > 0) { 174 uint64_t child_idx = (idx >> (node_level * node_size_log2)) & 175 ((1ull << node_size_log2) - 1); 176 177 uintptr_t *children = node_data; 178 uintptr_t child = p_atomic_read(&children[child_idx]); 179 180 if (unlikely(!child)) { 181 child = _util_sparse_array_node_alloc(arr, node_level - 1); 182 child = _util_sparse_array_set_or_free_node(&children[child_idx], 183 NULL_NODE, child); 184 } 185 186 node_data = _util_sparse_array_node_data(child); 187 node_level = _util_sparse_array_node_level(child); 188 } 189 190 uint64_t elem_idx = idx & ((1ull << node_size_log2) - 1); 191 return (void *)((char *)node_data + (elem_idx * arr->elem_size)); 192} 193 194static void 195validate_node_level(struct util_sparse_array *arr, 196 uintptr_t node, unsigned level) 197{ 198 assert(_util_sparse_array_node_level(node) == level); 199 200 if (_util_sparse_array_node_level(node) > 0) { 201 uintptr_t *children = _util_sparse_array_node_data(node); 202 size_t node_size = 1ull << arr->node_size_log2; 203 for (size_t i = 0; i < node_size; i++) { 204 if (children[i]) 205 validate_node_level(arr, children[i], level - 1); 206 } 207 } 208} 209 210void 211util_sparse_array_validate(struct util_sparse_array *arr) 212{ 213 uintptr_t root = p_atomic_read(&arr->root); 214 validate_node_level(arr, root, _util_sparse_array_node_level(root)); 215} 216 217void 218util_sparse_array_free_list_init(struct util_sparse_array_free_list *fl, 219 struct util_sparse_array *arr, 220 uint32_t sentinel, 221 uint32_t next_offset) 222{ 223 fl->head = sentinel; 224 fl->arr = arr; 225 fl->sentinel = sentinel; 226 fl->next_offset = next_offset; 227} 228 229static uint64_t 230free_list_head(uint64_t old, uint32_t next) 231{ 232 return ((old & 0xffffffff00000000ull) + 0x100000000ull) | next; 233} 234 235void 236util_sparse_array_free_list_push(struct util_sparse_array_free_list *fl, 237 uint32_t *items, unsigned num_items) 238{ 239 assert(num_items > 0); 240 assert(items[0] != fl->sentinel); 241 void *last_elem = util_sparse_array_get(fl->arr, items[0]); 242 uint32_t *last_next = (uint32_t *)((char *)last_elem + fl->next_offset); 243 for (unsigned i = 1; i < num_items; i++) { 244 p_atomic_set(last_next, items[i]); 245 assert(items[i] != fl->sentinel); 246 last_elem = util_sparse_array_get(fl->arr, items[i]); 247 last_next = (uint32_t *)((char *)last_elem + fl->next_offset); 248 } 249 250 uint64_t current_head, old_head; 251 old_head = p_atomic_read(&fl->head); 252 do { 253 current_head = old_head; 254 p_atomic_set(last_next, (uint32_t)current_head); /* Index is the bottom 32 bits */ 255 uint64_t new_head = free_list_head(current_head, items[0]); 256 old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head); 257 } while (old_head != current_head); 258} 259 260uint32_t 261util_sparse_array_free_list_pop_idx(struct util_sparse_array_free_list *fl) 262{ 263 uint64_t current_head; 264 265 current_head = p_atomic_read(&fl->head); 266 while (1) { 267 if ((uint32_t)current_head == fl->sentinel) 268 return fl->sentinel; 269 270 uint32_t head_idx = current_head; /* Index is the bottom 32 bits */ 271 void *head_elem = util_sparse_array_get(fl->arr, head_idx); 272 uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset); 273 uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next)); 274 uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head); 275 if (old_head == current_head) 276 return head_idx; 277 current_head = old_head; 278 } 279} 280 281void * 282util_sparse_array_free_list_pop_elem(struct util_sparse_array_free_list *fl) 283{ 284 uint64_t current_head; 285 286 current_head = p_atomic_read(&fl->head); 287 while (1) { 288 if ((uint32_t)current_head == fl->sentinel) 289 return NULL; 290 291 uint32_t head_idx = current_head; /* Index is the bottom 32 bits */ 292 void *head_elem = util_sparse_array_get(fl->arr, head_idx); 293 uint32_t *head_next = (uint32_t *)((char *)head_elem + fl->next_offset); 294 uint64_t new_head = free_list_head(current_head, p_atomic_read(head_next)); 295 uint64_t old_head = p_atomic_cmpxchg(&fl->head, current_head, new_head); 296 if (old_head == current_head) 297 return head_elem; 298 current_head = old_head; 299 } 300} 301