1 /*
2 * Copyright (C) 2021 Icecream95
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 /* A nodearray is an array type that is either sparse or dense, depending on
25 * the number of elements.
26 *
27 * When the number of elements is over a threshold (max_sparse), the dense mode
28 * is used, and the nodearray is simply a container for an array.
29 *
30 * In sparse mode, the array has elements with a 24-bit node index and a value.
31 * The nodes are always sorted, so that a binary search can be used to find
32 * elements. Nonexistent elements are treated as zero.
33 *
34 * Function names follow ARM instruction names: orr does *elem |= value.
35 *
36 * Although it's probably already fast enough, the datastructure could be sped
37 * up a lot, especially when NEON is available, by making the sparse mode store
38 * sixteen adjacent values, so that adding new keys also allocates nearby keys,
39 * and to allow for vectorising iteration, as can be done when in the dense
40 * mode.
41 */
42
43 #ifndef __BIFROST_NODEARRAY_H
44 #define __BIFROST_NODEARRAY_H
45
46 #include <stdint.h>
47
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51
52 /* A value that may be stored in a nodearray element, used directly for dense
53 * elements and included into sparse elements.
54 */
55 typedef uint16_t nodearray_value;
56
57 #define NODEARRAY_MAX_VALUE 0xffff
58
59 /* Type storing sparse nodearray elements, consisting of a nodearray_value at
60 * the bottom and a nodearray_key at the top.
61 */
62 typedef uint64_t nodearray_sparse;
63
64 typedef struct {
65 union {
66 nodearray_sparse *sparse;
67 nodearray_value *dense;
68 };
69 unsigned size;
70 unsigned sparse_capacity;
71 } nodearray;
72
73 /* Align sizes to 16-bytes for SIMD purposes */
74 #define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
75
76 #define nodearray_sparse_foreach(buf, elem) \
77 for (nodearray_sparse *elem = (buf)->sparse; \
78 elem < (buf)->sparse + (buf)->size; elem++)
79
80 #define nodearray_dense_foreach(buf, elem) \
81 for (nodearray_value *elem = (buf)->dense; \
82 elem < (buf)->dense + (buf)->size; elem++)
83
84 #define nodearray_dense_foreach_64(buf, elem) \
85 for (uint64_t *elem = (uint64_t *)(buf)->dense; \
86 (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
87
88 static inline bool
nodearray_is_sparse(const nodearray *a)89 nodearray_is_sparse(const nodearray *a)
90 {
91 return a->sparse_capacity != ~0U;
92 }
93
94 static inline void
nodearray_init(nodearray *a)95 nodearray_init(nodearray *a)
96 {
97 memset(a, 0, sizeof(nodearray));
98 }
99
100 static inline void
nodearray_reset(nodearray *a)101 nodearray_reset(nodearray *a)
102 {
103 free(a->sparse);
104 nodearray_init(a);
105 }
106
107 static inline nodearray_sparse
nodearray_encode(unsigned key, nodearray_value value)108 nodearray_encode(unsigned key, nodearray_value value)
109 {
110 static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
111 return ((nodearray_sparse) key << 16) | value;
112 }
113
114 static inline unsigned
nodearray_sparse_key(const nodearray_sparse *elem)115 nodearray_sparse_key(const nodearray_sparse *elem)
116 {
117 static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
118 return *elem >> 16;
119 }
120
121 static inline nodearray_value
nodearray_sparse_value(const nodearray_sparse *elem)122 nodearray_sparse_value(const nodearray_sparse *elem)
123 {
124 return *elem & NODEARRAY_MAX_VALUE;
125 }
126
127 static inline unsigned
nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)128 nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
129 {
130 assert(nodearray_is_sparse(a) && a->size);
131
132 nodearray_sparse *data = a->sparse;
133
134 /* Encode the key using the highest possible value, so that the
135 * matching node must be encoded lower than this
136 */
137 nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
138
139 unsigned left = 0;
140 unsigned right = a->size - 1;
141
142 if (data[right] <= skey)
143 left = right;
144
145 while (left != right) {
146 /* No need to worry about overflow, we couldn't have more than
147 * 2^24 elements */
148 unsigned probe = (left + right + 1) / 2;
149
150 if (data[probe] > skey)
151 right = probe - 1;
152 else
153 left = probe;
154 }
155
156 *elem = data + left;
157 return left;
158 }
159
160 static inline void
nodearray_orr(nodearray *a, unsigned key, nodearray_value value, unsigned max_sparse, unsigned max)161 nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
162 unsigned max_sparse, unsigned max)
163 {
164 assert(key < (1 << 24));
165 assert(key < max);
166
167 if (!value)
168 return;
169
170 if (nodearray_is_sparse(a)) {
171 unsigned size = a->size;
172 unsigned left = 0;
173
174 if (size) {
175 /* First, binary search for key */
176 nodearray_sparse *elem;
177 left = nodearray_sparse_search(a, key, &elem);
178
179 if (nodearray_sparse_key(elem) == key) {
180 *elem |= value;
181 return;
182 }
183
184 /* We insert before `left`, so increment it if it's
185 * out of order */
186 if (nodearray_sparse_key(elem) < key)
187 ++left;
188 }
189
190 if (size < max_sparse && (size + 1) < max / 4) {
191 /* We didn't find it, but we know where to insert it. */
192
193 nodearray_sparse *data = a->sparse;
194 nodearray_sparse *data_move = data + left;
195
196 bool realloc = (++a->size) > a->sparse_capacity;
197
198 if (realloc) {
199 a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
200
201 a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
202
203 if (left)
204 memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
205 }
206
207 nodearray_sparse *elem = a->sparse + left;
208
209 if (left != size)
210 memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
211
212 *elem = nodearray_encode(key, value);
213
214 if (realloc)
215 free(data);
216
217 return;
218 }
219
220 /* There are too many elements, so convert to a dense array */
221 nodearray old = *a;
222
223 a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
224 a->size = max;
225 a->sparse_capacity = ~0U;
226
227 nodearray_value *data = a->dense;
228
229 nodearray_sparse_foreach(&old, x) {
230 unsigned key = nodearray_sparse_key(x);
231 nodearray_value value = nodearray_sparse_value(x);
232
233 assert(key < max);
234 data[key] = value;
235 }
236
237 free(old.sparse);
238 }
239
240 a->dense[key] |= value;
241 }
242
243 #ifdef __cplusplus
244 } /* extern C */
245 #endif
246
247 #endif
248