199ca880aSopenharmony_ci/***
299ca880aSopenharmony_ci  This file is part of eudev, forked from systemd.
399ca880aSopenharmony_ci
499ca880aSopenharmony_ci  Copyright 2012 Kay Sievers <kay@vrfy.org>
599ca880aSopenharmony_ci
699ca880aSopenharmony_ci  systemd is free software; you can redistribute it and/or modify it
799ca880aSopenharmony_ci  under the terms of the GNU Lesser General Public License as published by
899ca880aSopenharmony_ci  the Free Software Foundation; either version 2.1 of the License, or
999ca880aSopenharmony_ci  (at your option) any later version.
1099ca880aSopenharmony_ci
1199ca880aSopenharmony_ci  systemd is distributed in the hope that it will be useful, but
1299ca880aSopenharmony_ci  WITHOUT ANY WARRANTY; without even the implied warranty of
1399ca880aSopenharmony_ci  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
1499ca880aSopenharmony_ci  Lesser General Public License for more details.
1599ca880aSopenharmony_ci
1699ca880aSopenharmony_ci  You should have received a copy of the GNU Lesser General Public License
1799ca880aSopenharmony_ci  along with systemd; If not, see <http://www.gnu.org/licenses/>.
1899ca880aSopenharmony_ci***/
1999ca880aSopenharmony_ci
2099ca880aSopenharmony_ci#include <stdlib.h>
2199ca880aSopenharmony_ci#include <string.h>
2299ca880aSopenharmony_ci
2399ca880aSopenharmony_ci#include "util.h"
2499ca880aSopenharmony_ci#include "strbuf.h"
2599ca880aSopenharmony_ci
2699ca880aSopenharmony_ci/*
2799ca880aSopenharmony_ci * Strbuf stores given strings in a single continuous allocated memory
2899ca880aSopenharmony_ci * area. Identical strings are de-duplicated and return the same offset
2999ca880aSopenharmony_ci * as the first string stored. If the tail of a string already exists
3099ca880aSopenharmony_ci * in the buffer, the tail is returned.
3199ca880aSopenharmony_ci *
3299ca880aSopenharmony_ci * A trie (http://en.wikipedia.org/wiki/Trie) is used to maintain the
3399ca880aSopenharmony_ci * information about the stored strings.
3499ca880aSopenharmony_ci *
3599ca880aSopenharmony_ci * Example of udev rules:
3699ca880aSopenharmony_ci *   $ ./udevadm test .
3799ca880aSopenharmony_ci *   ...
3899ca880aSopenharmony_ci *   read rules file: /usr/lib/udev/rules.d/99-systemd.rules
3999ca880aSopenharmony_ci *   rules contain 196608 bytes tokens (16384 * 12 bytes), 39742 bytes strings
4099ca880aSopenharmony_ci *   23939 strings (207859 bytes), 20404 de-duplicated (171653 bytes), 3536 trie nodes used
4199ca880aSopenharmony_ci *   ...
4299ca880aSopenharmony_ci */
4399ca880aSopenharmony_ci
4499ca880aSopenharmony_cistruct strbuf *strbuf_new(void) {
4599ca880aSopenharmony_ci        struct strbuf *str;
4699ca880aSopenharmony_ci
4799ca880aSopenharmony_ci        str = new0(struct strbuf, 1);
4899ca880aSopenharmony_ci        if (!str)
4999ca880aSopenharmony_ci                return NULL;
5099ca880aSopenharmony_ci
5199ca880aSopenharmony_ci        str->buf = new0(char, 1);
5299ca880aSopenharmony_ci        if (!str->buf)
5399ca880aSopenharmony_ci                goto err;
5499ca880aSopenharmony_ci        str->len = 1;
5599ca880aSopenharmony_ci
5699ca880aSopenharmony_ci        str->root = new0(struct strbuf_node, 1);
5799ca880aSopenharmony_ci        if (!str->root)
5899ca880aSopenharmony_ci                goto err;
5999ca880aSopenharmony_ci        str->nodes_count = 1;
6099ca880aSopenharmony_ci        return str;
6199ca880aSopenharmony_cierr:
6299ca880aSopenharmony_ci        free(str->buf);
6399ca880aSopenharmony_ci        free(str->root);
6499ca880aSopenharmony_ci        free(str);
6599ca880aSopenharmony_ci        return NULL;
6699ca880aSopenharmony_ci}
6799ca880aSopenharmony_ci
6899ca880aSopenharmony_cistatic void strbuf_node_cleanup(struct strbuf_node *node) {
6999ca880aSopenharmony_ci        size_t i;
7099ca880aSopenharmony_ci
7199ca880aSopenharmony_ci        for (i = 0; i < node->children_count; i++)
7299ca880aSopenharmony_ci                strbuf_node_cleanup(node->children[i].child);
7399ca880aSopenharmony_ci        free(node->children);
7499ca880aSopenharmony_ci        free(node);
7599ca880aSopenharmony_ci}
7699ca880aSopenharmony_ci
7799ca880aSopenharmony_ci/* clean up trie data, leave only the string buffer */
7899ca880aSopenharmony_civoid strbuf_complete(struct strbuf *str) {
7999ca880aSopenharmony_ci        if (!str)
8099ca880aSopenharmony_ci                return;
8199ca880aSopenharmony_ci        if (str->root)
8299ca880aSopenharmony_ci                strbuf_node_cleanup(str->root);
8399ca880aSopenharmony_ci        str->root = NULL;
8499ca880aSopenharmony_ci}
8599ca880aSopenharmony_ci
8699ca880aSopenharmony_ci/* clean up everything */
8799ca880aSopenharmony_civoid strbuf_cleanup(struct strbuf *str) {
8899ca880aSopenharmony_ci        if (!str)
8999ca880aSopenharmony_ci                return;
9099ca880aSopenharmony_ci        if (str->root)
9199ca880aSopenharmony_ci                strbuf_node_cleanup(str->root);
9299ca880aSopenharmony_ci        free(str->buf);
9399ca880aSopenharmony_ci        free(str);
9499ca880aSopenharmony_ci}
9599ca880aSopenharmony_ci
9699ca880aSopenharmony_cistatic int strbuf_children_cmp(const void* v1, const void* v2) {
9799ca880aSopenharmony_ci	const struct strbuf_child_entry *n1 = v1, *n2 = v2;
9899ca880aSopenharmony_ci        return n1->c - n2->c;
9999ca880aSopenharmony_ci}
10099ca880aSopenharmony_ci
10199ca880aSopenharmony_cistatic void bubbleinsert(struct strbuf_node *node,
10299ca880aSopenharmony_ci                         uint8_t c,
10399ca880aSopenharmony_ci                         struct strbuf_node *node_child) {
10499ca880aSopenharmony_ci
10599ca880aSopenharmony_ci        struct strbuf_child_entry new = {
10699ca880aSopenharmony_ci                .c = c,
10799ca880aSopenharmony_ci                .child = node_child,
10899ca880aSopenharmony_ci        };
10999ca880aSopenharmony_ci        int left = 0, right = node->children_count;
11099ca880aSopenharmony_ci
11199ca880aSopenharmony_ci        while (right > left) {
11299ca880aSopenharmony_ci                int middle = (right + left) / 2 ;
11399ca880aSopenharmony_ci                if (strbuf_children_cmp(&node->children[middle], &new) <= 0)
11499ca880aSopenharmony_ci                        left = middle + 1;
11599ca880aSopenharmony_ci                else
11699ca880aSopenharmony_ci                        right = middle;
11799ca880aSopenharmony_ci        }
11899ca880aSopenharmony_ci
11999ca880aSopenharmony_ci        memmove(node->children + left + 1, node->children + left,
12099ca880aSopenharmony_ci                sizeof(struct strbuf_child_entry) * (node->children_count - left));
12199ca880aSopenharmony_ci        node->children[left] = new;
12299ca880aSopenharmony_ci
12399ca880aSopenharmony_ci        node->children_count ++;
12499ca880aSopenharmony_ci}
12599ca880aSopenharmony_ci
12699ca880aSopenharmony_ci/* add string, return the index/offset into the buffer */
12799ca880aSopenharmony_cissize_t strbuf_add_string(struct strbuf *str, const char *s, size_t len) {
12899ca880aSopenharmony_ci        uint8_t c;
12999ca880aSopenharmony_ci        struct strbuf_node *node;
13099ca880aSopenharmony_ci        size_t depth;
13199ca880aSopenharmony_ci        char *buf_new;
13299ca880aSopenharmony_ci        struct strbuf_child_entry *child;
13399ca880aSopenharmony_ci        struct strbuf_node *node_child;
13499ca880aSopenharmony_ci        ssize_t off;
13599ca880aSopenharmony_ci
13699ca880aSopenharmony_ci        if (!str->root)
13799ca880aSopenharmony_ci                return -EINVAL;
13899ca880aSopenharmony_ci
13999ca880aSopenharmony_ci        /* search string; start from last character to find possibly matching tails */
14099ca880aSopenharmony_ci        if (len == 0)
14199ca880aSopenharmony_ci                return 0;
14299ca880aSopenharmony_ci        str->in_count++;
14399ca880aSopenharmony_ci        str->in_len += len;
14499ca880aSopenharmony_ci
14599ca880aSopenharmony_ci        node = str->root;
14699ca880aSopenharmony_ci        c = s[len-1];
14799ca880aSopenharmony_ci        for (depth = 0; depth <= len; depth++) {
14899ca880aSopenharmony_ci                struct strbuf_child_entry search;
14999ca880aSopenharmony_ci
15099ca880aSopenharmony_ci                /* match against current node */
15199ca880aSopenharmony_ci                off = node->value_off + node->value_len - len;
15299ca880aSopenharmony_ci                if (depth == len || (node->value_len >= len && memcmp(str->buf + off, s, len) == 0)) {
15399ca880aSopenharmony_ci                        str->dedup_len += len;
15499ca880aSopenharmony_ci                        str->dedup_count++;
15599ca880aSopenharmony_ci                        return off;
15699ca880aSopenharmony_ci                }
15799ca880aSopenharmony_ci
15899ca880aSopenharmony_ci                /* lookup child node */
15999ca880aSopenharmony_ci                c = s[len - 1 - depth];
16099ca880aSopenharmony_ci                search.c = c;
16199ca880aSopenharmony_ci                child = bsearch(&search, node->children, node->children_count,
16299ca880aSopenharmony_ci                                sizeof(struct strbuf_child_entry), strbuf_children_cmp);
16399ca880aSopenharmony_ci                if (!child)
16499ca880aSopenharmony_ci                        break;
16599ca880aSopenharmony_ci                node = child->child;
16699ca880aSopenharmony_ci        }
16799ca880aSopenharmony_ci
16899ca880aSopenharmony_ci        /* add new string */
16999ca880aSopenharmony_ci        buf_new = realloc(str->buf, str->len + len+1);
17099ca880aSopenharmony_ci        if (!buf_new)
17199ca880aSopenharmony_ci                return -ENOMEM;
17299ca880aSopenharmony_ci        str->buf = buf_new;
17399ca880aSopenharmony_ci        off = str->len;
17499ca880aSopenharmony_ci        memcpy(str->buf + off, s, len);
17599ca880aSopenharmony_ci        str->len += len;
17699ca880aSopenharmony_ci        str->buf[str->len++] = '\0';
17799ca880aSopenharmony_ci
17899ca880aSopenharmony_ci        /* new node */
17999ca880aSopenharmony_ci        node_child = new0(struct strbuf_node, 1);
18099ca880aSopenharmony_ci        if (!node_child)
18199ca880aSopenharmony_ci                return -ENOMEM;
18299ca880aSopenharmony_ci        node_child->value_off = off;
18399ca880aSopenharmony_ci        node_child->value_len = len;
18499ca880aSopenharmony_ci
18599ca880aSopenharmony_ci        /* extend array, add new entry, sort for bisection */
18699ca880aSopenharmony_ci        child = realloc(node->children, (node->children_count + 1) * sizeof(struct strbuf_child_entry));
18799ca880aSopenharmony_ci        if (!child) {
18899ca880aSopenharmony_ci                free(node_child);
18999ca880aSopenharmony_ci                return -ENOMEM;
19099ca880aSopenharmony_ci        }
19199ca880aSopenharmony_ci
19299ca880aSopenharmony_ci        str->nodes_count++;
19399ca880aSopenharmony_ci
19499ca880aSopenharmony_ci        node->children = child;
19599ca880aSopenharmony_ci        bubbleinsert(node, c, node_child);
19699ca880aSopenharmony_ci
19799ca880aSopenharmony_ci        return off;
19899ca880aSopenharmony_ci}
199