1cb93a386Sopenharmony_ci/*
2cb93a386Sopenharmony_ci * Copyright 2021 Google Inc.
3cb93a386Sopenharmony_ci *
4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be
5cb93a386Sopenharmony_ci * found in the LICENSE file.
6cb93a386Sopenharmony_ci */
7cb93a386Sopenharmony_ci
8cb93a386Sopenharmony_ci#include "src/sksl/lex/DFA.h"
9cb93a386Sopenharmony_ci#include "src/sksl/lex/TransitionTable.h"
10cb93a386Sopenharmony_ci
11cb93a386Sopenharmony_ci#include <array>
12cb93a386Sopenharmony_ci#include <bitset>
13cb93a386Sopenharmony_ci#include <cassert>
14cb93a386Sopenharmony_ci#include <cmath>
15cb93a386Sopenharmony_ci#include <unordered_map>
16cb93a386Sopenharmony_ci#include <unordered_set>
17cb93a386Sopenharmony_ci#include <vector>
18cb93a386Sopenharmony_ci
19cb93a386Sopenharmony_cinamespace {
20cb93a386Sopenharmony_ci
21cb93a386Sopenharmony_ci// The number of bits to use per entry in our compact transition table. This is customizable:
22cb93a386Sopenharmony_ci// - 1-bit: reasonable in theory. Doesn't actually pack many slices.
23cb93a386Sopenharmony_ci// - 2-bit: best fit for our data. Packs extremely well.
24cb93a386Sopenharmony_ci// - 4-bit: packs all but one slice, but doesn't save as much space overall.
25cb93a386Sopenharmony_ci// - 8-bit: way too large (an 8-bit LUT plus an 8-bit data table is as big as a 16-bit table)
26cb93a386Sopenharmony_ci// Other values don't divide cleanly into a byte and do not work.
27cb93a386Sopenharmony_ciconstexpr int kNumBits = 2;
28cb93a386Sopenharmony_ci
29cb93a386Sopenharmony_ci// These values are derived from kNumBits and shouldn't need to change.
30cb93a386Sopenharmony_ciconstexpr int kNumValues = (1 << kNumBits) - 1;
31cb93a386Sopenharmony_ciconstexpr int kDataPerByte = 8 / kNumBits;
32cb93a386Sopenharmony_ci
33cb93a386Sopenharmony_cienum IndexType {
34cb93a386Sopenharmony_ci    kZero = 0,
35cb93a386Sopenharmony_ci    kFullEntry,
36cb93a386Sopenharmony_ci    kCompactEntry,
37cb93a386Sopenharmony_ci};
38cb93a386Sopenharmony_cistruct IndexEntry {
39cb93a386Sopenharmony_ci    IndexType type;
40cb93a386Sopenharmony_ci    int pos;
41cb93a386Sopenharmony_ci};
42cb93a386Sopenharmony_cistruct CompactEntry {
43cb93a386Sopenharmony_ci    std::array<int, kNumValues> v = {};
44cb93a386Sopenharmony_ci    std::vector<int> data;
45cb93a386Sopenharmony_ci};
46cb93a386Sopenharmony_cistruct FullEntry {
47cb93a386Sopenharmony_ci    std::vector<int> data;
48cb93a386Sopenharmony_ci};
49cb93a386Sopenharmony_ci
50cb93a386Sopenharmony_ciusing TransitionSet = std::unordered_set<int>;
51cb93a386Sopenharmony_ci
52cb93a386Sopenharmony_cistatic int add_compact_entry(const TransitionSet& transitionSet,
53cb93a386Sopenharmony_ci                             const std::vector<int>& data,
54cb93a386Sopenharmony_ci                             std::vector<CompactEntry>* entries) {
55cb93a386Sopenharmony_ci    // Create a compact entry with the unique values from the transition set, padded out with zeros
56cb93a386Sopenharmony_ci    // and sorted.
57cb93a386Sopenharmony_ci    CompactEntry result{};
58cb93a386Sopenharmony_ci    assert(transitionSet.size() <= result.v.size());
59cb93a386Sopenharmony_ci    std::copy(transitionSet.begin(), transitionSet.end(), result.v.begin());
60cb93a386Sopenharmony_ci    std::sort(result.v.begin(), result.v.end());
61cb93a386Sopenharmony_ci
62cb93a386Sopenharmony_ci    // Create a mapping from real values to small values. (0 -> 0, v[0] -> 1, v[1] -> 2, v[2] -> 3)
63cb93a386Sopenharmony_ci    std::unordered_map<int, int> translationTable;
64cb93a386Sopenharmony_ci    for (size_t index = 0; index < result.v.size(); ++index) {
65cb93a386Sopenharmony_ci        translationTable[result.v[index]] = 1 + index;
66cb93a386Sopenharmony_ci    }
67cb93a386Sopenharmony_ci    translationTable[0] = 0;
68cb93a386Sopenharmony_ci
69cb93a386Sopenharmony_ci    // Convert the real values into small values.
70cb93a386Sopenharmony_ci    for (size_t index = 0; index < data.size(); ++index) {
71cb93a386Sopenharmony_ci        int value = data[index];
72cb93a386Sopenharmony_ci        assert(translationTable.find(value) != translationTable.end());
73cb93a386Sopenharmony_ci        result.data.push_back(translationTable[value]);
74cb93a386Sopenharmony_ci    }
75cb93a386Sopenharmony_ci
76cb93a386Sopenharmony_ci    // Look for an existing entry that exactly matches this one.
77cb93a386Sopenharmony_ci    for (size_t index = 0; index < entries->size(); ++index) {
78cb93a386Sopenharmony_ci        if (entries->at(index).v == result.v && entries->at(index).data == result.data) {
79cb93a386Sopenharmony_ci            return index;
80cb93a386Sopenharmony_ci        }
81cb93a386Sopenharmony_ci    }
82cb93a386Sopenharmony_ci
83cb93a386Sopenharmony_ci    // Add this as a new entry.
84cb93a386Sopenharmony_ci    entries->push_back(std::move(result));
85cb93a386Sopenharmony_ci    return (int)(entries->size() - 1);
86cb93a386Sopenharmony_ci}
87cb93a386Sopenharmony_ci
88cb93a386Sopenharmony_cistatic int add_full_entry(const TransitionSet& transitionMap,
89cb93a386Sopenharmony_ci                          const std::vector<int>& data,
90cb93a386Sopenharmony_ci                          std::vector<FullEntry>* entries) {
91cb93a386Sopenharmony_ci    // Create a full entry with this data.
92cb93a386Sopenharmony_ci    FullEntry result{};
93cb93a386Sopenharmony_ci    result.data = std::vector<int>(data.begin(), data.end());
94cb93a386Sopenharmony_ci
95cb93a386Sopenharmony_ci    // Look for an existing entry that exactly matches this one.
96cb93a386Sopenharmony_ci    for (size_t index = 0; index < entries->size(); ++index) {
97cb93a386Sopenharmony_ci        if (entries->at(index).data == result.data) {
98cb93a386Sopenharmony_ci            return index;
99cb93a386Sopenharmony_ci        }
100cb93a386Sopenharmony_ci    }
101cb93a386Sopenharmony_ci
102cb93a386Sopenharmony_ci    // Add this as a new entry.
103cb93a386Sopenharmony_ci    entries->push_back(std::move(result));
104cb93a386Sopenharmony_ci    return (int)(entries->size() - 1);
105cb93a386Sopenharmony_ci}
106cb93a386Sopenharmony_ci
107cb93a386Sopenharmony_ci}  // namespace
108cb93a386Sopenharmony_ci
109cb93a386Sopenharmony_civoid WriteTransitionTable(std::ofstream& out, const DFA& dfa, size_t states) {
110cb93a386Sopenharmony_ci    int numTransitions = dfa.fTransitions.size();
111cb93a386Sopenharmony_ci
112cb93a386Sopenharmony_ci    // Assemble our compact and full data tables, and an index into them.
113cb93a386Sopenharmony_ci    std::vector<CompactEntry> compactEntries;
114cb93a386Sopenharmony_ci    std::vector<FullEntry> fullEntries;
115cb93a386Sopenharmony_ci    std::vector<IndexEntry> indices;
116cb93a386Sopenharmony_ci    for (size_t s = 0; s < states; ++s) {
117cb93a386Sopenharmony_ci        // Copy all the transitions for this state into a flat array, and into a histogram (counting
118cb93a386Sopenharmony_ci        // the number of unique state-transition values). Most states only transition to a few
119cb93a386Sopenharmony_ci        // possible new states.
120cb93a386Sopenharmony_ci        TransitionSet transitionSet;
121cb93a386Sopenharmony_ci        std::vector<int> data(numTransitions);
122cb93a386Sopenharmony_ci        for (int t = 0; t < numTransitions; ++t) {
123cb93a386Sopenharmony_ci            if ((size_t) t < dfa.fTransitions.size() && s < dfa.fTransitions[t].size()) {
124cb93a386Sopenharmony_ci                int value = dfa.fTransitions[t][s];
125cb93a386Sopenharmony_ci                assert(value >= 0 && value < (int)states);
126cb93a386Sopenharmony_ci                data[t] = value;
127cb93a386Sopenharmony_ci                transitionSet.insert(value);
128cb93a386Sopenharmony_ci            }
129cb93a386Sopenharmony_ci        }
130cb93a386Sopenharmony_ci
131cb93a386Sopenharmony_ci        transitionSet.erase(0);
132cb93a386Sopenharmony_ci        if (transitionSet.empty()) {
133cb93a386Sopenharmony_ci            // This transition table was completely empty (every value was zero). No data needed;
134cb93a386Sopenharmony_ci            // zero pages are handled as a special index type.
135cb93a386Sopenharmony_ci            indices.push_back(IndexEntry{kZero, 0});
136cb93a386Sopenharmony_ci        } else if (transitionSet.size() <= kNumValues) {
137cb93a386Sopenharmony_ci            // This table only contained a small number of unique nonzero values.
138cb93a386Sopenharmony_ci            // Use a compact representation that squishes each value down to a few bits.
139cb93a386Sopenharmony_ci            int index = add_compact_entry(transitionSet, data, &compactEntries);
140cb93a386Sopenharmony_ci            indices.push_back(IndexEntry{kCompactEntry, index});
141cb93a386Sopenharmony_ci        } else {
142cb93a386Sopenharmony_ci            // This table contained a large number of values. We can't compact it.
143cb93a386Sopenharmony_ci            int index = add_full_entry(transitionSet, data, &fullEntries);
144cb93a386Sopenharmony_ci            indices.push_back(IndexEntry{kFullEntry, index});
145cb93a386Sopenharmony_ci        }
146cb93a386Sopenharmony_ci    }
147cb93a386Sopenharmony_ci
148cb93a386Sopenharmony_ci    // Find the largest value for each compact-entry slot.
149cb93a386Sopenharmony_ci    int maxValue[kNumValues] = {};
150cb93a386Sopenharmony_ci    for (const CompactEntry& entry : compactEntries) {
151cb93a386Sopenharmony_ci        for (int index=0; index < kNumValues; ++index) {
152cb93a386Sopenharmony_ci            maxValue[index] = std::max(maxValue[index], entry.v[index]);
153cb93a386Sopenharmony_ci        }
154cb93a386Sopenharmony_ci    }
155cb93a386Sopenharmony_ci
156cb93a386Sopenharmony_ci    // Emit all the structs our transition table will use.
157cb93a386Sopenharmony_ci    out << "struct IndexEntry {\n"
158cb93a386Sopenharmony_ci        << "    uint16_t type : 2;\n"
159cb93a386Sopenharmony_ci        << "    uint16_t pos : 14;\n"
160cb93a386Sopenharmony_ci        << "};\n"
161cb93a386Sopenharmony_ci        << "struct FullEntry {\n"
162cb93a386Sopenharmony_ci        << "    State data[" << numTransitions << "];\n"
163cb93a386Sopenharmony_ci        << "};\n";
164cb93a386Sopenharmony_ci
165cb93a386Sopenharmony_ci    // Emit the compact-entry structure; minimize the number of bits needed per value.
166cb93a386Sopenharmony_ci    out << "struct CompactEntry {\n";
167cb93a386Sopenharmony_ci    for (int index=0; index < kNumValues; ++index) {
168cb93a386Sopenharmony_ci        if (maxValue[index] > 0) {
169cb93a386Sopenharmony_ci            out << "    State v" << index << " : " << int(std::ceil(std::log2(maxValue[index])))
170cb93a386Sopenharmony_ci                << ";\n";
171cb93a386Sopenharmony_ci        }
172cb93a386Sopenharmony_ci    }
173cb93a386Sopenharmony_ci
174cb93a386Sopenharmony_ci    out << "    uint8_t data[" << std::ceil(float(numTransitions) / float(kDataPerByte)) << "];\n"
175cb93a386Sopenharmony_ci        << "};\n";
176cb93a386Sopenharmony_ci
177cb93a386Sopenharmony_ci    // Emit the full-table data.
178cb93a386Sopenharmony_ci    out << "static constexpr FullEntry kFull[] = {\n";
179cb93a386Sopenharmony_ci    for (const FullEntry& entry : fullEntries) {
180cb93a386Sopenharmony_ci        out << "    {";
181cb93a386Sopenharmony_ci        for (int value : entry.data) {
182cb93a386Sopenharmony_ci            out << value << ", ";
183cb93a386Sopenharmony_ci        }
184cb93a386Sopenharmony_ci        out << "},\n";
185cb93a386Sopenharmony_ci    }
186cb93a386Sopenharmony_ci    out << "};\n";
187cb93a386Sopenharmony_ci
188cb93a386Sopenharmony_ci    // Emit the compact-table data.
189cb93a386Sopenharmony_ci    out << "static constexpr CompactEntry kCompact[] = {\n";
190cb93a386Sopenharmony_ci    for (const CompactEntry& entry : compactEntries) {
191cb93a386Sopenharmony_ci        out << "    {";
192cb93a386Sopenharmony_ci        for (int index=0; index < kNumValues; ++index) {
193cb93a386Sopenharmony_ci            if (maxValue[index] > 0) {
194cb93a386Sopenharmony_ci                out << entry.v[index] << ", ";
195cb93a386Sopenharmony_ci            }
196cb93a386Sopenharmony_ci        }
197cb93a386Sopenharmony_ci        out << "{";
198cb93a386Sopenharmony_ci        unsigned int shiftBits = 0, combinedBits = 0;
199cb93a386Sopenharmony_ci        for (int index = 0; index < numTransitions; index++) {
200cb93a386Sopenharmony_ci            combinedBits |= entry.data[index] << shiftBits;
201cb93a386Sopenharmony_ci            shiftBits += kNumBits;
202cb93a386Sopenharmony_ci            assert(shiftBits <= 8);
203cb93a386Sopenharmony_ci            if (shiftBits == 8) {
204cb93a386Sopenharmony_ci                out << combinedBits << ", ";
205cb93a386Sopenharmony_ci                shiftBits = 0;
206cb93a386Sopenharmony_ci                combinedBits = 0;
207cb93a386Sopenharmony_ci            }
208cb93a386Sopenharmony_ci        }
209cb93a386Sopenharmony_ci        if (shiftBits > 0) {
210cb93a386Sopenharmony_ci            // Flush any partial values.
211cb93a386Sopenharmony_ci            out << combinedBits;
212cb93a386Sopenharmony_ci        }
213cb93a386Sopenharmony_ci        out << "}},\n";
214cb93a386Sopenharmony_ci    }
215cb93a386Sopenharmony_ci    out << "};\n"
216cb93a386Sopenharmony_ci        << "static constexpr IndexEntry kIndices[] = {\n";
217cb93a386Sopenharmony_ci    for (const IndexEntry& entry : indices) {
218cb93a386Sopenharmony_ci        out << "    {" << entry.type << ", " << entry.pos << "},\n";
219cb93a386Sopenharmony_ci    }
220cb93a386Sopenharmony_ci    out << "};\n"
221cb93a386Sopenharmony_ci        << "State get_transition(int transition, int state) {\n"
222cb93a386Sopenharmony_ci        << "    IndexEntry index = kIndices[state];\n"
223cb93a386Sopenharmony_ci        << "    if (index.type == 0) { return 0; }\n"
224cb93a386Sopenharmony_ci        << "    if (index.type == 1) { return kFull[index.pos].data[transition]; }\n"
225cb93a386Sopenharmony_ci        << "    const CompactEntry& entry = kCompact[index.pos];\n"
226cb93a386Sopenharmony_ci        << "    int value = entry.data[transition >> " << std::log2(kDataPerByte) << "];\n"
227cb93a386Sopenharmony_ci        << "    value >>= " << kNumBits << " * (transition & " << kDataPerByte - 1 << ");\n"
228cb93a386Sopenharmony_ci        << "    value &= " << kNumValues << ";\n"
229cb93a386Sopenharmony_ci        << "    State table[] = {0";
230cb93a386Sopenharmony_ci
231cb93a386Sopenharmony_ci    for (int index=0; index < kNumValues; ++index) {
232cb93a386Sopenharmony_ci        if (maxValue[index] > 0) {
233cb93a386Sopenharmony_ci            out << ", entry.v" << index;
234cb93a386Sopenharmony_ci        } else {
235cb93a386Sopenharmony_ci            out << ", 0";
236cb93a386Sopenharmony_ci        }
237cb93a386Sopenharmony_ci    }
238cb93a386Sopenharmony_ci
239cb93a386Sopenharmony_ci    out << "};\n"
240cb93a386Sopenharmony_ci        << "    return table[value];\n"
241cb93a386Sopenharmony_ci        << "}\n";
242cb93a386Sopenharmony_ci}
243