xref: /third_party/eudev/src/shared/strbuf.c (revision 99ca880a)
1/***
2  This file is part of eudev, forked from systemd.
3
4  Copyright 2012 Kay Sievers <kay@vrfy.org>
5
6  systemd is free software; you can redistribute it and/or modify it
7  under the terms of the GNU Lesser General Public License as published by
8  the Free Software Foundation; either version 2.1 of the License, or
9  (at your option) any later version.
10
11  systemd is distributed in the hope that it will be useful, but
12  WITHOUT ANY WARRANTY; without even the implied warranty of
13  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14  Lesser General Public License for more details.
15
16  You should have received a copy of the GNU Lesser General Public License
17  along with systemd; If not, see <http://www.gnu.org/licenses/>.
18***/
19
20#include <stdlib.h>
21#include <string.h>
22
23#include "util.h"
24#include "strbuf.h"
25
26/*
27 * Strbuf stores given strings in a single continuous allocated memory
28 * area. Identical strings are de-duplicated and return the same offset
29 * as the first string stored. If the tail of a string already exists
30 * in the buffer, the tail is returned.
31 *
32 * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
33 * information about the stored strings.
34 *
35 * Example of udev rules:
36 *   $ ./udevadm test .
37 *   ...
38 *   read rules file: /usr/lib/udev/rules.d/99-systemd.rules
39 *   rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
40 *   23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
41 *   ...
42 */
43
44struct strbuf *strbuf_new(void) {
45        struct strbuf *str;
46
47        str = new0(struct strbuf, 1);
48        if (!str)
49                return NULL;
50
51        str->buf = new0(char, 1);
52        if (!str->buf)
53                goto err;
54        str->len = 1;
55
56        str->root = new0(struct strbuf_node, 1);
57        if (!str->root)
58                goto err;
59        str->nodes_count = 1;
60        return str;
61err:
62        free(str->buf);
63        free(str->root);
64        free(str);
65        return NULL;
66}
67
68static void strbuf_node_cleanup(struct strbuf_node *node) {
69        size_t i;
70
71        for (i = 0; i < node->children_count; i++)
72                strbuf_node_cleanup(node->children[i].child);
73        free(node->children);
74        free(node);
75}
76
77/* clean up trie data, leave only the string buffer */
78void strbuf_complete(struct strbuf *str) {
79        if (!str)
80                return;
81        if (str->root)
82                strbuf_node_cleanup(str->root);
83        str->root = NULL;
84}
85
86/* clean up everything */
87void strbuf_cleanup(struct strbuf *str) {
88        if (!str)
89                return;
90        if (str->root)
91                strbuf_node_cleanup(str->root);
92        free(str->buf);
93        free(str);
94}
95
96static int strbuf_children_cmp(const void* v1, const void* v2) {
97	const struct strbuf_child_entry *n1 = v1, *n2 = v2;
98        return n1->c - n2->c;
99}
100
101static void bubbleinsert(struct strbuf_node *node,
102                         uint8_t c,
103                         struct strbuf_node *node_child) {
104
105        struct strbuf_child_entry new = {
106                .c = c,
107                .child = node_child,
108        };
109        int left = 0, right = node->children_count;
110
111        while (right > left) {
112                int middle = (right + left) / 2 ;
113                if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
114                        left = middle + 1;
115                else
116                        right = middle;
117        }
118
119        memmove(node->children + left + 1, node->children + left,
120                sizeof(struct strbuf_child_entry) * (node->children_count - left));
121        node->children[left] = new;
122
123        node->children_count ++;
124}
125
126/* add string, return the index/offset into the buffer */
127ssize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
128        uint8_t c;
129        struct strbuf_node *node;
130        size_t depth;
131        char *buf_new;
132        struct strbuf_child_entry *child;
133        struct strbuf_node *node_child;
134        ssize_t off;
135
136        if (!str->root)
137                return -EINVAL;
138
139        /* search string; start from last character to find possibly matching tails */
140        if (len == 0)
141                return 0;
142        str->in_count++;
143        str->in_len += len;
144
145        node = str->root;
146        c = s[len-1];
147        for (depth = 0; depth <= len; depth++) {
148                struct strbuf_child_entry search;
149
150                /* match against current node */
151                off = node->value_off + node->value_len - len;
152                if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
153                        str->dedup_len += len;
154                        str->dedup_count++;
155                        return off;
156                }
157
158                /* lookup child node */
159                c = s[len - 1 - depth];
160                search.c = c;
161                child = bsearch(&search, node->children, node->children_count,
162                                sizeof(struct strbuf_child_entry), strbuf_children_cmp);
163                if (!child)
164                        break;
165                node = child->child;
166        }
167
168        /* add new string */
169        buf_new = realloc(str->buf, str->len + len+1);
170        if (!buf_new)
171                return -ENOMEM;
172        str->buf = buf_new;
173        off = str->len;
174        memcpy(str->buf + off, s, len);
175        str->len += len;
176        str->buf[str->len++] = '\0';
177
178        /* new node */
179        node_child = new0(struct strbuf_node, 1);
180        if (!node_child)
181                return -ENOMEM;
182        node_child->value_off = off;
183        node_child->value_len = len;
184
185        /* extend array, add new entry, sort for bisection */
186        child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
187        if (!child) {
188                free(node_child);
189                return -ENOMEM;
190        }
191
192        str->nodes_count++;
193
194        node->children = child;
195        bubbleinsert(node, c, node_child);
196
197        return off;
198}
199