1bf215546Sopenharmony_ci/* 2bf215546Sopenharmony_ci * Copyright (C) 2021 Icecream95 3bf215546Sopenharmony_ci * 4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a 5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"), 6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation 7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the 9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions: 10bf215546Sopenharmony_ci * 11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next 12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the 13bf215546Sopenharmony_ci * Software. 14bf215546Sopenharmony_ci * 15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21bf215546Sopenharmony_ci * SOFTWARE. 22bf215546Sopenharmony_ci */ 23bf215546Sopenharmony_ci 24bf215546Sopenharmony_ci/* A nodearray is an array type that is either sparse or dense, depending on 25bf215546Sopenharmony_ci * the number of elements. 26bf215546Sopenharmony_ci * 27bf215546Sopenharmony_ci * When the number of elements is over a threshold (max_sparse), the dense mode 28bf215546Sopenharmony_ci * is used, and the nodearray is simply a container for an array. 29bf215546Sopenharmony_ci * 30bf215546Sopenharmony_ci * In sparse mode, the array has elements with a 24-bit node index and a value. 31bf215546Sopenharmony_ci * The nodes are always sorted, so that a binary search can be used to find 32bf215546Sopenharmony_ci * elements. Nonexistent elements are treated as zero. 33bf215546Sopenharmony_ci * 34bf215546Sopenharmony_ci * Function names follow ARM instruction names: orr does *elem |= value. 35bf215546Sopenharmony_ci * 36bf215546Sopenharmony_ci * Although it's probably already fast enough, the datastructure could be sped 37bf215546Sopenharmony_ci * up a lot, especially when NEON is available, by making the sparse mode store 38bf215546Sopenharmony_ci * sixteen adjacent values, so that adding new keys also allocates nearby keys, 39bf215546Sopenharmony_ci * and to allow for vectorising iteration, as can be done when in the dense 40bf215546Sopenharmony_ci * mode. 41bf215546Sopenharmony_ci */ 42bf215546Sopenharmony_ci 43bf215546Sopenharmony_ci#ifndef __BIFROST_NODEARRAY_H 44bf215546Sopenharmony_ci#define __BIFROST_NODEARRAY_H 45bf215546Sopenharmony_ci 46bf215546Sopenharmony_ci#include <stdint.h> 47bf215546Sopenharmony_ci 48bf215546Sopenharmony_ci#ifdef __cplusplus 49bf215546Sopenharmony_ciextern "C" { 50bf215546Sopenharmony_ci#endif 51bf215546Sopenharmony_ci 52bf215546Sopenharmony_ci/* A value that may be stored in a nodearray element, used directly for dense 53bf215546Sopenharmony_ci * elements and included into sparse elements. 54bf215546Sopenharmony_ci */ 55bf215546Sopenharmony_citypedef uint16_t nodearray_value; 56bf215546Sopenharmony_ci 57bf215546Sopenharmony_ci#define NODEARRAY_MAX_VALUE 0xffff 58bf215546Sopenharmony_ci 59bf215546Sopenharmony_ci/* Type storing sparse nodearray elements, consisting of a nodearray_value at 60bf215546Sopenharmony_ci * the bottom and a nodearray_key at the top. 61bf215546Sopenharmony_ci */ 62bf215546Sopenharmony_citypedef uint64_t nodearray_sparse; 63bf215546Sopenharmony_ci 64bf215546Sopenharmony_citypedef struct { 65bf215546Sopenharmony_ci union { 66bf215546Sopenharmony_ci nodearray_sparse *sparse; 67bf215546Sopenharmony_ci nodearray_value *dense; 68bf215546Sopenharmony_ci }; 69bf215546Sopenharmony_ci unsigned size; 70bf215546Sopenharmony_ci unsigned sparse_capacity; 71bf215546Sopenharmony_ci} nodearray; 72bf215546Sopenharmony_ci 73bf215546Sopenharmony_ci/* Align sizes to 16-bytes for SIMD purposes */ 74bf215546Sopenharmony_ci#define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16) 75bf215546Sopenharmony_ci 76bf215546Sopenharmony_ci#define nodearray_sparse_foreach(buf, elem) \ 77bf215546Sopenharmony_ci for (nodearray_sparse *elem = (buf)->sparse; \ 78bf215546Sopenharmony_ci elem < (buf)->sparse + (buf)->size; elem++) 79bf215546Sopenharmony_ci 80bf215546Sopenharmony_ci#define nodearray_dense_foreach(buf, elem) \ 81bf215546Sopenharmony_ci for (nodearray_value *elem = (buf)->dense; \ 82bf215546Sopenharmony_ci elem < (buf)->dense + (buf)->size; elem++) 83bf215546Sopenharmony_ci 84bf215546Sopenharmony_ci#define nodearray_dense_foreach_64(buf, elem) \ 85bf215546Sopenharmony_ci for (uint64_t *elem = (uint64_t *)(buf)->dense; \ 86bf215546Sopenharmony_ci (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++) 87bf215546Sopenharmony_ci 88bf215546Sopenharmony_cistatic inline bool 89bf215546Sopenharmony_cinodearray_is_sparse(const nodearray *a) 90bf215546Sopenharmony_ci{ 91bf215546Sopenharmony_ci return a->sparse_capacity != ~0U; 92bf215546Sopenharmony_ci} 93bf215546Sopenharmony_ci 94bf215546Sopenharmony_cistatic inline void 95bf215546Sopenharmony_cinodearray_init(nodearray *a) 96bf215546Sopenharmony_ci{ 97bf215546Sopenharmony_ci memset(a, 0, sizeof(nodearray)); 98bf215546Sopenharmony_ci} 99bf215546Sopenharmony_ci 100bf215546Sopenharmony_cistatic inline void 101bf215546Sopenharmony_cinodearray_reset(nodearray *a) 102bf215546Sopenharmony_ci{ 103bf215546Sopenharmony_ci free(a->sparse); 104bf215546Sopenharmony_ci nodearray_init(a); 105bf215546Sopenharmony_ci} 106bf215546Sopenharmony_ci 107bf215546Sopenharmony_cistatic inline nodearray_sparse 108bf215546Sopenharmony_cinodearray_encode(unsigned key, nodearray_value value) 109bf215546Sopenharmony_ci{ 110bf215546Sopenharmony_ci static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch"); 111bf215546Sopenharmony_ci return ((nodearray_sparse) key << 16) | value; 112bf215546Sopenharmony_ci} 113bf215546Sopenharmony_ci 114bf215546Sopenharmony_cistatic inline unsigned 115bf215546Sopenharmony_cinodearray_sparse_key(const nodearray_sparse *elem) 116bf215546Sopenharmony_ci{ 117bf215546Sopenharmony_ci static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch"); 118bf215546Sopenharmony_ci return *elem >> 16; 119bf215546Sopenharmony_ci} 120bf215546Sopenharmony_ci 121bf215546Sopenharmony_cistatic inline nodearray_value 122bf215546Sopenharmony_cinodearray_sparse_value(const nodearray_sparse *elem) 123bf215546Sopenharmony_ci{ 124bf215546Sopenharmony_ci return *elem & NODEARRAY_MAX_VALUE; 125bf215546Sopenharmony_ci} 126bf215546Sopenharmony_ci 127bf215546Sopenharmony_cistatic inline unsigned 128bf215546Sopenharmony_cinodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem) 129bf215546Sopenharmony_ci{ 130bf215546Sopenharmony_ci assert(nodearray_is_sparse(a) && a->size); 131bf215546Sopenharmony_ci 132bf215546Sopenharmony_ci nodearray_sparse *data = a->sparse; 133bf215546Sopenharmony_ci 134bf215546Sopenharmony_ci /* Encode the key using the highest possible value, so that the 135bf215546Sopenharmony_ci * matching node must be encoded lower than this 136bf215546Sopenharmony_ci */ 137bf215546Sopenharmony_ci nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE); 138bf215546Sopenharmony_ci 139bf215546Sopenharmony_ci unsigned left = 0; 140bf215546Sopenharmony_ci unsigned right = a->size - 1; 141bf215546Sopenharmony_ci 142bf215546Sopenharmony_ci if (data[right] <= skey) 143bf215546Sopenharmony_ci left = right; 144bf215546Sopenharmony_ci 145bf215546Sopenharmony_ci while (left != right) { 146bf215546Sopenharmony_ci /* No need to worry about overflow, we couldn't have more than 147bf215546Sopenharmony_ci * 2^24 elements */ 148bf215546Sopenharmony_ci unsigned probe = (left + right + 1) / 2; 149bf215546Sopenharmony_ci 150bf215546Sopenharmony_ci if (data[probe] > skey) 151bf215546Sopenharmony_ci right = probe - 1; 152bf215546Sopenharmony_ci else 153bf215546Sopenharmony_ci left = probe; 154bf215546Sopenharmony_ci } 155bf215546Sopenharmony_ci 156bf215546Sopenharmony_ci *elem = data + left; 157bf215546Sopenharmony_ci return left; 158bf215546Sopenharmony_ci} 159bf215546Sopenharmony_ci 160bf215546Sopenharmony_cistatic inline void 161bf215546Sopenharmony_cinodearray_orr(nodearray *a, unsigned key, nodearray_value value, 162bf215546Sopenharmony_ci unsigned max_sparse, unsigned max) 163bf215546Sopenharmony_ci{ 164bf215546Sopenharmony_ci assert(key < (1 << 24)); 165bf215546Sopenharmony_ci assert(key < max); 166bf215546Sopenharmony_ci 167bf215546Sopenharmony_ci if (!value) 168bf215546Sopenharmony_ci return; 169bf215546Sopenharmony_ci 170bf215546Sopenharmony_ci if (nodearray_is_sparse(a)) { 171bf215546Sopenharmony_ci unsigned size = a->size; 172bf215546Sopenharmony_ci unsigned left = 0; 173bf215546Sopenharmony_ci 174bf215546Sopenharmony_ci if (size) { 175bf215546Sopenharmony_ci /* First, binary search for key */ 176bf215546Sopenharmony_ci nodearray_sparse *elem; 177bf215546Sopenharmony_ci left = nodearray_sparse_search(a, key, &elem); 178bf215546Sopenharmony_ci 179bf215546Sopenharmony_ci if (nodearray_sparse_key(elem) == key) { 180bf215546Sopenharmony_ci *elem |= value; 181bf215546Sopenharmony_ci return; 182bf215546Sopenharmony_ci } 183bf215546Sopenharmony_ci 184bf215546Sopenharmony_ci /* We insert before `left`, so increment it if it's 185bf215546Sopenharmony_ci * out of order */ 186bf215546Sopenharmony_ci if (nodearray_sparse_key(elem) < key) 187bf215546Sopenharmony_ci ++left; 188bf215546Sopenharmony_ci } 189bf215546Sopenharmony_ci 190bf215546Sopenharmony_ci if (size < max_sparse && (size + 1) < max / 4) { 191bf215546Sopenharmony_ci /* We didn't find it, but we know where to insert it. */ 192bf215546Sopenharmony_ci 193bf215546Sopenharmony_ci nodearray_sparse *data = a->sparse; 194bf215546Sopenharmony_ci nodearray_sparse *data_move = data + left; 195bf215546Sopenharmony_ci 196bf215546Sopenharmony_ci bool realloc = (++a->size) > a->sparse_capacity; 197bf215546Sopenharmony_ci 198bf215546Sopenharmony_ci if (realloc) { 199bf215546Sopenharmony_ci a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4); 200bf215546Sopenharmony_ci 201bf215546Sopenharmony_ci a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse)); 202bf215546Sopenharmony_ci 203bf215546Sopenharmony_ci if (left) 204bf215546Sopenharmony_ci memcpy(a->sparse, data, left * sizeof(nodearray_sparse)); 205bf215546Sopenharmony_ci } 206bf215546Sopenharmony_ci 207bf215546Sopenharmony_ci nodearray_sparse *elem = a->sparse + left; 208bf215546Sopenharmony_ci 209bf215546Sopenharmony_ci if (left != size) 210bf215546Sopenharmony_ci memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse)); 211bf215546Sopenharmony_ci 212bf215546Sopenharmony_ci *elem = nodearray_encode(key, value); 213bf215546Sopenharmony_ci 214bf215546Sopenharmony_ci if (realloc) 215bf215546Sopenharmony_ci free(data); 216bf215546Sopenharmony_ci 217bf215546Sopenharmony_ci return; 218bf215546Sopenharmony_ci } 219bf215546Sopenharmony_ci 220bf215546Sopenharmony_ci /* There are too many elements, so convert to a dense array */ 221bf215546Sopenharmony_ci nodearray old = *a; 222bf215546Sopenharmony_ci 223bf215546Sopenharmony_ci a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value)); 224bf215546Sopenharmony_ci a->size = max; 225bf215546Sopenharmony_ci a->sparse_capacity = ~0U; 226bf215546Sopenharmony_ci 227bf215546Sopenharmony_ci nodearray_value *data = a->dense; 228bf215546Sopenharmony_ci 229bf215546Sopenharmony_ci nodearray_sparse_foreach(&old, x) { 230bf215546Sopenharmony_ci unsigned key = nodearray_sparse_key(x); 231bf215546Sopenharmony_ci nodearray_value value = nodearray_sparse_value(x); 232bf215546Sopenharmony_ci 233bf215546Sopenharmony_ci assert(key < max); 234bf215546Sopenharmony_ci data[key] = value; 235bf215546Sopenharmony_ci } 236bf215546Sopenharmony_ci 237bf215546Sopenharmony_ci free(old.sparse); 238bf215546Sopenharmony_ci } 239bf215546Sopenharmony_ci 240bf215546Sopenharmony_ci a->dense[key] |= value; 241bf215546Sopenharmony_ci} 242bf215546Sopenharmony_ci 243bf215546Sopenharmony_ci#ifdef __cplusplus 244bf215546Sopenharmony_ci} /* extern C */ 245bf215546Sopenharmony_ci#endif 246bf215546Sopenharmony_ci 247bf215546Sopenharmony_ci#endif 248