1bf215546Sopenharmony_ci/*
2bf215546Sopenharmony_ci * Copyright (C) 2021 Icecream95
3bf215546Sopenharmony_ci *
4bf215546Sopenharmony_ci * Permission is hereby granted, free of charge, to any person obtaining a
5bf215546Sopenharmony_ci * copy of this software and associated documentation files (the "Software"),
6bf215546Sopenharmony_ci * to deal in the Software without restriction, including without limitation
7bf215546Sopenharmony_ci * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8bf215546Sopenharmony_ci * and/or sell copies of the Software, and to permit persons to whom the
9bf215546Sopenharmony_ci * Software is furnished to do so, subject to the following conditions:
10bf215546Sopenharmony_ci *
11bf215546Sopenharmony_ci * The above copyright notice and this permission notice (including the next
12bf215546Sopenharmony_ci * paragraph) shall be included in all copies or substantial portions of the
13bf215546Sopenharmony_ci * Software.
14bf215546Sopenharmony_ci *
15bf215546Sopenharmony_ci * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16bf215546Sopenharmony_ci * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17bf215546Sopenharmony_ci * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18bf215546Sopenharmony_ci * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19bf215546Sopenharmony_ci * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20bf215546Sopenharmony_ci * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21bf215546Sopenharmony_ci * SOFTWARE.
22bf215546Sopenharmony_ci */
23bf215546Sopenharmony_ci
24bf215546Sopenharmony_ci/* A nodearray is an array type that is either sparse or dense, depending on
25bf215546Sopenharmony_ci * the number of elements.
26bf215546Sopenharmony_ci *
27bf215546Sopenharmony_ci * When the number of elements is over a threshold (max_sparse), the dense mode
28bf215546Sopenharmony_ci * is used, and the nodearray is simply a container for an array.
29bf215546Sopenharmony_ci *
30bf215546Sopenharmony_ci * In sparse mode, the array has elements with a 24-bit node index and a value.
31bf215546Sopenharmony_ci * The nodes are always sorted, so that a binary search can be used to find
32bf215546Sopenharmony_ci * elements. Nonexistent elements are treated as zero.
33bf215546Sopenharmony_ci *
34bf215546Sopenharmony_ci * Function names follow ARM instruction names: orr does *elem |= value.
35bf215546Sopenharmony_ci *
36bf215546Sopenharmony_ci * Although it's probably already fast enough, the datastructure could be sped
37bf215546Sopenharmony_ci * up a lot, especially when NEON is available, by making the sparse mode store
38bf215546Sopenharmony_ci * sixteen adjacent values, so that adding new keys also allocates nearby keys,
39bf215546Sopenharmony_ci * and to allow for vectorising iteration, as can be done when in the dense
40bf215546Sopenharmony_ci * mode.
41bf215546Sopenharmony_ci */
42bf215546Sopenharmony_ci
43bf215546Sopenharmony_ci#ifndef __BIFROST_NODEARRAY_H
44bf215546Sopenharmony_ci#define __BIFROST_NODEARRAY_H
45bf215546Sopenharmony_ci
46bf215546Sopenharmony_ci#include <stdint.h>
47bf215546Sopenharmony_ci
48bf215546Sopenharmony_ci#ifdef __cplusplus
49bf215546Sopenharmony_ciextern "C" {
50bf215546Sopenharmony_ci#endif
51bf215546Sopenharmony_ci
52bf215546Sopenharmony_ci/* A value that may be stored in a nodearray element, used directly for dense
53bf215546Sopenharmony_ci * elements and included into sparse elements.
54bf215546Sopenharmony_ci */
55bf215546Sopenharmony_citypedef uint16_t nodearray_value;
56bf215546Sopenharmony_ci
57bf215546Sopenharmony_ci#define NODEARRAY_MAX_VALUE 0xffff
58bf215546Sopenharmony_ci
59bf215546Sopenharmony_ci/* Type storing sparse nodearray elements, consisting of a nodearray_value at
60bf215546Sopenharmony_ci * the bottom and a nodearray_key at the top.
61bf215546Sopenharmony_ci */
62bf215546Sopenharmony_citypedef uint64_t nodearray_sparse;
63bf215546Sopenharmony_ci
64bf215546Sopenharmony_citypedef struct {
65bf215546Sopenharmony_ci        union {
66bf215546Sopenharmony_ci                nodearray_sparse *sparse;
67bf215546Sopenharmony_ci                nodearray_value *dense;
68bf215546Sopenharmony_ci        };
69bf215546Sopenharmony_ci        unsigned size;
70bf215546Sopenharmony_ci        unsigned sparse_capacity;
71bf215546Sopenharmony_ci} nodearray;
72bf215546Sopenharmony_ci
73bf215546Sopenharmony_ci/* Align sizes to 16-bytes for SIMD purposes */
74bf215546Sopenharmony_ci#define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
75bf215546Sopenharmony_ci
76bf215546Sopenharmony_ci#define nodearray_sparse_foreach(buf, elem) \
77bf215546Sopenharmony_ci   for (nodearray_sparse *elem = (buf)->sparse; \
78bf215546Sopenharmony_ci        elem < (buf)->sparse + (buf)->size; elem++)
79bf215546Sopenharmony_ci
80bf215546Sopenharmony_ci#define nodearray_dense_foreach(buf, elem) \
81bf215546Sopenharmony_ci   for (nodearray_value *elem = (buf)->dense; \
82bf215546Sopenharmony_ci        elem < (buf)->dense + (buf)->size; elem++)
83bf215546Sopenharmony_ci
84bf215546Sopenharmony_ci#define nodearray_dense_foreach_64(buf, elem) \
85bf215546Sopenharmony_ci   for (uint64_t *elem = (uint64_t *)(buf)->dense; \
86bf215546Sopenharmony_ci        (nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
87bf215546Sopenharmony_ci
88bf215546Sopenharmony_cistatic inline bool
89bf215546Sopenharmony_cinodearray_is_sparse(const nodearray *a)
90bf215546Sopenharmony_ci{
91bf215546Sopenharmony_ci        return a->sparse_capacity != ~0U;
92bf215546Sopenharmony_ci}
93bf215546Sopenharmony_ci
94bf215546Sopenharmony_cistatic inline void
95bf215546Sopenharmony_cinodearray_init(nodearray *a)
96bf215546Sopenharmony_ci{
97bf215546Sopenharmony_ci        memset(a, 0, sizeof(nodearray));
98bf215546Sopenharmony_ci}
99bf215546Sopenharmony_ci
100bf215546Sopenharmony_cistatic inline void
101bf215546Sopenharmony_cinodearray_reset(nodearray *a)
102bf215546Sopenharmony_ci{
103bf215546Sopenharmony_ci        free(a->sparse);
104bf215546Sopenharmony_ci        nodearray_init(a);
105bf215546Sopenharmony_ci}
106bf215546Sopenharmony_ci
107bf215546Sopenharmony_cistatic inline nodearray_sparse
108bf215546Sopenharmony_cinodearray_encode(unsigned key, nodearray_value value)
109bf215546Sopenharmony_ci{
110bf215546Sopenharmony_ci        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
111bf215546Sopenharmony_ci        return ((nodearray_sparse) key << 16) | value;
112bf215546Sopenharmony_ci}
113bf215546Sopenharmony_ci
114bf215546Sopenharmony_cistatic inline unsigned
115bf215546Sopenharmony_cinodearray_sparse_key(const nodearray_sparse *elem)
116bf215546Sopenharmony_ci{
117bf215546Sopenharmony_ci        static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
118bf215546Sopenharmony_ci        return *elem >> 16;
119bf215546Sopenharmony_ci}
120bf215546Sopenharmony_ci
121bf215546Sopenharmony_cistatic inline nodearray_value
122bf215546Sopenharmony_cinodearray_sparse_value(const nodearray_sparse *elem)
123bf215546Sopenharmony_ci{
124bf215546Sopenharmony_ci        return *elem & NODEARRAY_MAX_VALUE;
125bf215546Sopenharmony_ci}
126bf215546Sopenharmony_ci
127bf215546Sopenharmony_cistatic inline unsigned
128bf215546Sopenharmony_cinodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
129bf215546Sopenharmony_ci{
130bf215546Sopenharmony_ci        assert(nodearray_is_sparse(a) && a->size);
131bf215546Sopenharmony_ci
132bf215546Sopenharmony_ci        nodearray_sparse *data = a->sparse;
133bf215546Sopenharmony_ci
134bf215546Sopenharmony_ci        /* Encode the key using the highest possible value, so that the
135bf215546Sopenharmony_ci         * matching node must be encoded lower than this
136bf215546Sopenharmony_ci         */
137bf215546Sopenharmony_ci        nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
138bf215546Sopenharmony_ci
139bf215546Sopenharmony_ci        unsigned left = 0;
140bf215546Sopenharmony_ci        unsigned right = a->size - 1;
141bf215546Sopenharmony_ci
142bf215546Sopenharmony_ci        if (data[right] <= skey)
143bf215546Sopenharmony_ci                left = right;
144bf215546Sopenharmony_ci
145bf215546Sopenharmony_ci        while (left != right) {
146bf215546Sopenharmony_ci                /* No need to worry about overflow, we couldn't have more than
147bf215546Sopenharmony_ci                 * 2^24 elements */
148bf215546Sopenharmony_ci                unsigned probe = (left + right + 1) / 2;
149bf215546Sopenharmony_ci
150bf215546Sopenharmony_ci                if (data[probe] > skey)
151bf215546Sopenharmony_ci                        right = probe - 1;
152bf215546Sopenharmony_ci                else
153bf215546Sopenharmony_ci                        left = probe;
154bf215546Sopenharmony_ci        }
155bf215546Sopenharmony_ci
156bf215546Sopenharmony_ci        *elem = data + left;
157bf215546Sopenharmony_ci        return left;
158bf215546Sopenharmony_ci}
159bf215546Sopenharmony_ci
160bf215546Sopenharmony_cistatic inline void
161bf215546Sopenharmony_cinodearray_orr(nodearray *a, unsigned key, nodearray_value value,
162bf215546Sopenharmony_ci              unsigned max_sparse, unsigned max)
163bf215546Sopenharmony_ci{
164bf215546Sopenharmony_ci        assert(key < (1 << 24));
165bf215546Sopenharmony_ci        assert(key < max);
166bf215546Sopenharmony_ci
167bf215546Sopenharmony_ci        if (!value)
168bf215546Sopenharmony_ci                return;
169bf215546Sopenharmony_ci
170bf215546Sopenharmony_ci        if (nodearray_is_sparse(a)) {
171bf215546Sopenharmony_ci                unsigned size = a->size;
172bf215546Sopenharmony_ci                unsigned left = 0;
173bf215546Sopenharmony_ci
174bf215546Sopenharmony_ci                if (size) {
175bf215546Sopenharmony_ci                        /* First, binary search for key */
176bf215546Sopenharmony_ci                        nodearray_sparse *elem;
177bf215546Sopenharmony_ci                        left = nodearray_sparse_search(a, key, &elem);
178bf215546Sopenharmony_ci
179bf215546Sopenharmony_ci                        if (nodearray_sparse_key(elem) == key) {
180bf215546Sopenharmony_ci                                *elem |= value;
181bf215546Sopenharmony_ci                                return;
182bf215546Sopenharmony_ci                        }
183bf215546Sopenharmony_ci
184bf215546Sopenharmony_ci                        /* We insert before `left`, so increment it if it's
185bf215546Sopenharmony_ci                         * out of order */
186bf215546Sopenharmony_ci                        if (nodearray_sparse_key(elem) < key)
187bf215546Sopenharmony_ci                                ++left;
188bf215546Sopenharmony_ci                }
189bf215546Sopenharmony_ci
190bf215546Sopenharmony_ci                if (size < max_sparse && (size + 1) < max / 4) {
191bf215546Sopenharmony_ci                        /* We didn't find it, but we know where to insert it. */
192bf215546Sopenharmony_ci
193bf215546Sopenharmony_ci                        nodearray_sparse *data = a->sparse;
194bf215546Sopenharmony_ci                        nodearray_sparse *data_move = data + left;
195bf215546Sopenharmony_ci
196bf215546Sopenharmony_ci                        bool realloc = (++a->size) > a->sparse_capacity;
197bf215546Sopenharmony_ci
198bf215546Sopenharmony_ci                        if (realloc) {
199bf215546Sopenharmony_ci                                a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
200bf215546Sopenharmony_ci
201bf215546Sopenharmony_ci                                a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
202bf215546Sopenharmony_ci
203bf215546Sopenharmony_ci                                if (left)
204bf215546Sopenharmony_ci                                        memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
205bf215546Sopenharmony_ci                        }
206bf215546Sopenharmony_ci
207bf215546Sopenharmony_ci                        nodearray_sparse *elem = a->sparse + left;
208bf215546Sopenharmony_ci
209bf215546Sopenharmony_ci                        if (left != size)
210bf215546Sopenharmony_ci                                memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
211bf215546Sopenharmony_ci
212bf215546Sopenharmony_ci                        *elem = nodearray_encode(key, value);
213bf215546Sopenharmony_ci
214bf215546Sopenharmony_ci                        if (realloc)
215bf215546Sopenharmony_ci                                free(data);
216bf215546Sopenharmony_ci
217bf215546Sopenharmony_ci                        return;
218bf215546Sopenharmony_ci                }
219bf215546Sopenharmony_ci
220bf215546Sopenharmony_ci                /* There are too many elements, so convert to a dense array */
221bf215546Sopenharmony_ci                nodearray old = *a;
222bf215546Sopenharmony_ci
223bf215546Sopenharmony_ci                a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
224bf215546Sopenharmony_ci                a->size = max;
225bf215546Sopenharmony_ci                a->sparse_capacity = ~0U;
226bf215546Sopenharmony_ci
227bf215546Sopenharmony_ci                nodearray_value *data = a->dense;
228bf215546Sopenharmony_ci
229bf215546Sopenharmony_ci                nodearray_sparse_foreach(&old, x) {
230bf215546Sopenharmony_ci                        unsigned key = nodearray_sparse_key(x);
231bf215546Sopenharmony_ci                        nodearray_value value = nodearray_sparse_value(x);
232bf215546Sopenharmony_ci
233bf215546Sopenharmony_ci                        assert(key < max);
234bf215546Sopenharmony_ci                        data[key] = value;
235bf215546Sopenharmony_ci                }
236bf215546Sopenharmony_ci
237bf215546Sopenharmony_ci                free(old.sparse);
238bf215546Sopenharmony_ci        }
239bf215546Sopenharmony_ci
240bf215546Sopenharmony_ci        a->dense[key] |= value;
241bf215546Sopenharmony_ci}
242bf215546Sopenharmony_ci
243bf215546Sopenharmony_ci#ifdef __cplusplus
244bf215546Sopenharmony_ci} /* extern C */
245bf215546Sopenharmony_ci#endif
246bf215546Sopenharmony_ci
247bf215546Sopenharmony_ci#endif
248