1cb93a386Sopenharmony_ci/* 2cb93a386Sopenharmony_ci * Copyright 2021 Google Inc. 3cb93a386Sopenharmony_ci * 4cb93a386Sopenharmony_ci * Use of this source code is governed by a BSD-style license that can be 5cb93a386Sopenharmony_ci * found in the LICENSE file. 6cb93a386Sopenharmony_ci */ 7cb93a386Sopenharmony_ci 8cb93a386Sopenharmony_ci#include "src/sksl/lex/DFA.h" 9cb93a386Sopenharmony_ci#include "src/sksl/lex/TransitionTable.h" 10cb93a386Sopenharmony_ci 11cb93a386Sopenharmony_ci#include <array> 12cb93a386Sopenharmony_ci#include <bitset> 13cb93a386Sopenharmony_ci#include <cassert> 14cb93a386Sopenharmony_ci#include <cmath> 15cb93a386Sopenharmony_ci#include <unordered_map> 16cb93a386Sopenharmony_ci#include <unordered_set> 17cb93a386Sopenharmony_ci#include <vector> 18cb93a386Sopenharmony_ci 19cb93a386Sopenharmony_cinamespace { 20cb93a386Sopenharmony_ci 21cb93a386Sopenharmony_ci// The number of bits to use per entry in our compact transition table. This is customizable: 22cb93a386Sopenharmony_ci// - 1-bit: reasonable in theory. Doesn't actually pack many slices. 23cb93a386Sopenharmony_ci// - 2-bit: best fit for our data. Packs extremely well. 24cb93a386Sopenharmony_ci// - 4-bit: packs all but one slice, but doesn't save as much space overall. 25cb93a386Sopenharmony_ci// - 8-bit: way too large (an 8-bit LUT plus an 8-bit data table is as big as a 16-bit table) 26cb93a386Sopenharmony_ci// Other values don't divide cleanly into a byte and do not work. 27cb93a386Sopenharmony_ciconstexpr int kNumBits = 2; 28cb93a386Sopenharmony_ci 29cb93a386Sopenharmony_ci// These values are derived from kNumBits and shouldn't need to change. 30cb93a386Sopenharmony_ciconstexpr int kNumValues = (1 << kNumBits) - 1; 31cb93a386Sopenharmony_ciconstexpr int kDataPerByte = 8 / kNumBits; 32cb93a386Sopenharmony_ci 33cb93a386Sopenharmony_cienum IndexType { 34cb93a386Sopenharmony_ci kZero = 0, 35cb93a386Sopenharmony_ci kFullEntry, 36cb93a386Sopenharmony_ci kCompactEntry, 37cb93a386Sopenharmony_ci}; 38cb93a386Sopenharmony_cistruct IndexEntry { 39cb93a386Sopenharmony_ci IndexType type; 40cb93a386Sopenharmony_ci int pos; 41cb93a386Sopenharmony_ci}; 42cb93a386Sopenharmony_cistruct CompactEntry { 43cb93a386Sopenharmony_ci std::array<int, kNumValues> v = {}; 44cb93a386Sopenharmony_ci std::vector<int> data; 45cb93a386Sopenharmony_ci}; 46cb93a386Sopenharmony_cistruct FullEntry { 47cb93a386Sopenharmony_ci std::vector<int> data; 48cb93a386Sopenharmony_ci}; 49cb93a386Sopenharmony_ci 50cb93a386Sopenharmony_ciusing TransitionSet = std::unordered_set<int>; 51cb93a386Sopenharmony_ci 52cb93a386Sopenharmony_cistatic int add_compact_entry(const TransitionSet& transitionSet, 53cb93a386Sopenharmony_ci const std::vector<int>& data, 54cb93a386Sopenharmony_ci std::vector<CompactEntry>* entries) { 55cb93a386Sopenharmony_ci // Create a compact entry with the unique values from the transition set, padded out with zeros 56cb93a386Sopenharmony_ci // and sorted. 57cb93a386Sopenharmony_ci CompactEntry result{}; 58cb93a386Sopenharmony_ci assert(transitionSet.size() <= result.v.size()); 59cb93a386Sopenharmony_ci std::copy(transitionSet.begin(), transitionSet.end(), result.v.begin()); 60cb93a386Sopenharmony_ci std::sort(result.v.begin(), result.v.end()); 61cb93a386Sopenharmony_ci 62cb93a386Sopenharmony_ci // Create a mapping from real values to small values. (0 -> 0, v[0] -> 1, v[1] -> 2, v[2] -> 3) 63cb93a386Sopenharmony_ci std::unordered_map<int, int> translationTable; 64cb93a386Sopenharmony_ci for (size_t index = 0; index < result.v.size(); ++index) { 65cb93a386Sopenharmony_ci translationTable[result.v[index]] = 1 + index; 66cb93a386Sopenharmony_ci } 67cb93a386Sopenharmony_ci translationTable[0] = 0; 68cb93a386Sopenharmony_ci 69cb93a386Sopenharmony_ci // Convert the real values into small values. 70cb93a386Sopenharmony_ci for (size_t index = 0; index < data.size(); ++index) { 71cb93a386Sopenharmony_ci int value = data[index]; 72cb93a386Sopenharmony_ci assert(translationTable.find(value) != translationTable.end()); 73cb93a386Sopenharmony_ci result.data.push_back(translationTable[value]); 74cb93a386Sopenharmony_ci } 75cb93a386Sopenharmony_ci 76cb93a386Sopenharmony_ci // Look for an existing entry that exactly matches this one. 77cb93a386Sopenharmony_ci for (size_t index = 0; index < entries->size(); ++index) { 78cb93a386Sopenharmony_ci if (entries->at(index).v == result.v && entries->at(index).data == result.data) { 79cb93a386Sopenharmony_ci return index; 80cb93a386Sopenharmony_ci } 81cb93a386Sopenharmony_ci } 82cb93a386Sopenharmony_ci 83cb93a386Sopenharmony_ci // Add this as a new entry. 84cb93a386Sopenharmony_ci entries->push_back(std::move(result)); 85cb93a386Sopenharmony_ci return (int)(entries->size() - 1); 86cb93a386Sopenharmony_ci} 87cb93a386Sopenharmony_ci 88cb93a386Sopenharmony_cistatic int add_full_entry(const TransitionSet& transitionMap, 89cb93a386Sopenharmony_ci const std::vector<int>& data, 90cb93a386Sopenharmony_ci std::vector<FullEntry>* entries) { 91cb93a386Sopenharmony_ci // Create a full entry with this data. 92cb93a386Sopenharmony_ci FullEntry result{}; 93cb93a386Sopenharmony_ci result.data = std::vector<int>(data.begin(), data.end()); 94cb93a386Sopenharmony_ci 95cb93a386Sopenharmony_ci // Look for an existing entry that exactly matches this one. 96cb93a386Sopenharmony_ci for (size_t index = 0; index < entries->size(); ++index) { 97cb93a386Sopenharmony_ci if (entries->at(index).data == result.data) { 98cb93a386Sopenharmony_ci return index; 99cb93a386Sopenharmony_ci } 100cb93a386Sopenharmony_ci } 101cb93a386Sopenharmony_ci 102cb93a386Sopenharmony_ci // Add this as a new entry. 103cb93a386Sopenharmony_ci entries->push_back(std::move(result)); 104cb93a386Sopenharmony_ci return (int)(entries->size() - 1); 105cb93a386Sopenharmony_ci} 106cb93a386Sopenharmony_ci 107cb93a386Sopenharmony_ci} // namespace 108cb93a386Sopenharmony_ci 109cb93a386Sopenharmony_civoid WriteTransitionTable(std::ofstream& out, const DFA& dfa, size_t states) { 110cb93a386Sopenharmony_ci int numTransitions = dfa.fTransitions.size(); 111cb93a386Sopenharmony_ci 112cb93a386Sopenharmony_ci // Assemble our compact and full data tables, and an index into them. 113cb93a386Sopenharmony_ci std::vector<CompactEntry> compactEntries; 114cb93a386Sopenharmony_ci std::vector<FullEntry> fullEntries; 115cb93a386Sopenharmony_ci std::vector<IndexEntry> indices; 116cb93a386Sopenharmony_ci for (size_t s = 0; s < states; ++s) { 117cb93a386Sopenharmony_ci // Copy all the transitions for this state into a flat array, and into a histogram (counting 118cb93a386Sopenharmony_ci // the number of unique state-transition values). Most states only transition to a few 119cb93a386Sopenharmony_ci // possible new states. 120cb93a386Sopenharmony_ci TransitionSet transitionSet; 121cb93a386Sopenharmony_ci std::vector<int> data(numTransitions); 122cb93a386Sopenharmony_ci for (int t = 0; t < numTransitions; ++t) { 123cb93a386Sopenharmony_ci if ((size_t) t < dfa.fTransitions.size() && s < dfa.fTransitions[t].size()) { 124cb93a386Sopenharmony_ci int value = dfa.fTransitions[t][s]; 125cb93a386Sopenharmony_ci assert(value >= 0 && value < (int)states); 126cb93a386Sopenharmony_ci data[t] = value; 127cb93a386Sopenharmony_ci transitionSet.insert(value); 128cb93a386Sopenharmony_ci } 129cb93a386Sopenharmony_ci } 130cb93a386Sopenharmony_ci 131cb93a386Sopenharmony_ci transitionSet.erase(0); 132cb93a386Sopenharmony_ci if (transitionSet.empty()) { 133cb93a386Sopenharmony_ci // This transition table was completely empty (every value was zero). No data needed; 134cb93a386Sopenharmony_ci // zero pages are handled as a special index type. 135cb93a386Sopenharmony_ci indices.push_back(IndexEntry{kZero, 0}); 136cb93a386Sopenharmony_ci } else if (transitionSet.size() <= kNumValues) { 137cb93a386Sopenharmony_ci // This table only contained a small number of unique nonzero values. 138cb93a386Sopenharmony_ci // Use a compact representation that squishes each value down to a few bits. 139cb93a386Sopenharmony_ci int index = add_compact_entry(transitionSet, data, &compactEntries); 140cb93a386Sopenharmony_ci indices.push_back(IndexEntry{kCompactEntry, index}); 141cb93a386Sopenharmony_ci } else { 142cb93a386Sopenharmony_ci // This table contained a large number of values. We can't compact it. 143cb93a386Sopenharmony_ci int index = add_full_entry(transitionSet, data, &fullEntries); 144cb93a386Sopenharmony_ci indices.push_back(IndexEntry{kFullEntry, index}); 145cb93a386Sopenharmony_ci } 146cb93a386Sopenharmony_ci } 147cb93a386Sopenharmony_ci 148cb93a386Sopenharmony_ci // Find the largest value for each compact-entry slot. 149cb93a386Sopenharmony_ci int maxValue[kNumValues] = {}; 150cb93a386Sopenharmony_ci for (const CompactEntry& entry : compactEntries) { 151cb93a386Sopenharmony_ci for (int index=0; index < kNumValues; ++index) { 152cb93a386Sopenharmony_ci maxValue[index] = std::max(maxValue[index], entry.v[index]); 153cb93a386Sopenharmony_ci } 154cb93a386Sopenharmony_ci } 155cb93a386Sopenharmony_ci 156cb93a386Sopenharmony_ci // Emit all the structs our transition table will use. 157cb93a386Sopenharmony_ci out << "struct IndexEntry {\n" 158cb93a386Sopenharmony_ci << " uint16_t type : 2;\n" 159cb93a386Sopenharmony_ci << " uint16_t pos : 14;\n" 160cb93a386Sopenharmony_ci << "};\n" 161cb93a386Sopenharmony_ci << "struct FullEntry {\n" 162cb93a386Sopenharmony_ci << " State data[" << numTransitions << "];\n" 163cb93a386Sopenharmony_ci << "};\n"; 164cb93a386Sopenharmony_ci 165cb93a386Sopenharmony_ci // Emit the compact-entry structure; minimize the number of bits needed per value. 166cb93a386Sopenharmony_ci out << "struct CompactEntry {\n"; 167cb93a386Sopenharmony_ci for (int index=0; index < kNumValues; ++index) { 168cb93a386Sopenharmony_ci if (maxValue[index] > 0) { 169cb93a386Sopenharmony_ci out << " State v" << index << " : " << int(std::ceil(std::log2(maxValue[index]))) 170cb93a386Sopenharmony_ci << ";\n"; 171cb93a386Sopenharmony_ci } 172cb93a386Sopenharmony_ci } 173cb93a386Sopenharmony_ci 174cb93a386Sopenharmony_ci out << " uint8_t data[" << std::ceil(float(numTransitions) / float(kDataPerByte)) << "];\n" 175cb93a386Sopenharmony_ci << "};\n"; 176cb93a386Sopenharmony_ci 177cb93a386Sopenharmony_ci // Emit the full-table data. 178cb93a386Sopenharmony_ci out << "static constexpr FullEntry kFull[] = {\n"; 179cb93a386Sopenharmony_ci for (const FullEntry& entry : fullEntries) { 180cb93a386Sopenharmony_ci out << " {"; 181cb93a386Sopenharmony_ci for (int value : entry.data) { 182cb93a386Sopenharmony_ci out << value << ", "; 183cb93a386Sopenharmony_ci } 184cb93a386Sopenharmony_ci out << "},\n"; 185cb93a386Sopenharmony_ci } 186cb93a386Sopenharmony_ci out << "};\n"; 187cb93a386Sopenharmony_ci 188cb93a386Sopenharmony_ci // Emit the compact-table data. 189cb93a386Sopenharmony_ci out << "static constexpr CompactEntry kCompact[] = {\n"; 190cb93a386Sopenharmony_ci for (const CompactEntry& entry : compactEntries) { 191cb93a386Sopenharmony_ci out << " {"; 192cb93a386Sopenharmony_ci for (int index=0; index < kNumValues; ++index) { 193cb93a386Sopenharmony_ci if (maxValue[index] > 0) { 194cb93a386Sopenharmony_ci out << entry.v[index] << ", "; 195cb93a386Sopenharmony_ci } 196cb93a386Sopenharmony_ci } 197cb93a386Sopenharmony_ci out << "{"; 198cb93a386Sopenharmony_ci unsigned int shiftBits = 0, combinedBits = 0; 199cb93a386Sopenharmony_ci for (int index = 0; index < numTransitions; index++) { 200cb93a386Sopenharmony_ci combinedBits |= entry.data[index] << shiftBits; 201cb93a386Sopenharmony_ci shiftBits += kNumBits; 202cb93a386Sopenharmony_ci assert(shiftBits <= 8); 203cb93a386Sopenharmony_ci if (shiftBits == 8) { 204cb93a386Sopenharmony_ci out << combinedBits << ", "; 205cb93a386Sopenharmony_ci shiftBits = 0; 206cb93a386Sopenharmony_ci combinedBits = 0; 207cb93a386Sopenharmony_ci } 208cb93a386Sopenharmony_ci } 209cb93a386Sopenharmony_ci if (shiftBits > 0) { 210cb93a386Sopenharmony_ci // Flush any partial values. 211cb93a386Sopenharmony_ci out << combinedBits; 212cb93a386Sopenharmony_ci } 213cb93a386Sopenharmony_ci out << "}},\n"; 214cb93a386Sopenharmony_ci } 215cb93a386Sopenharmony_ci out << "};\n" 216cb93a386Sopenharmony_ci << "static constexpr IndexEntry kIndices[] = {\n"; 217cb93a386Sopenharmony_ci for (const IndexEntry& entry : indices) { 218cb93a386Sopenharmony_ci out << " {" << entry.type << ", " << entry.pos << "},\n"; 219cb93a386Sopenharmony_ci } 220cb93a386Sopenharmony_ci out << "};\n" 221cb93a386Sopenharmony_ci << "State get_transition(int transition, int state) {\n" 222cb93a386Sopenharmony_ci << " IndexEntry index = kIndices[state];\n" 223cb93a386Sopenharmony_ci << " if (index.type == 0) { return 0; }\n" 224cb93a386Sopenharmony_ci << " if (index.type == 1) { return kFull[index.pos].data[transition]; }\n" 225cb93a386Sopenharmony_ci << " const CompactEntry& entry = kCompact[index.pos];\n" 226cb93a386Sopenharmony_ci << " int value = entry.data[transition >> " << std::log2(kDataPerByte) << "];\n" 227cb93a386Sopenharmony_ci << " value >>= " << kNumBits << " * (transition & " << kDataPerByte - 1 << ");\n" 228cb93a386Sopenharmony_ci << " value &= " << kNumValues << ";\n" 229cb93a386Sopenharmony_ci << " State table[] = {0"; 230cb93a386Sopenharmony_ci 231cb93a386Sopenharmony_ci for (int index=0; index < kNumValues; ++index) { 232cb93a386Sopenharmony_ci if (maxValue[index] > 0) { 233cb93a386Sopenharmony_ci out << ", entry.v" << index; 234cb93a386Sopenharmony_ci } else { 235cb93a386Sopenharmony_ci out << ", 0"; 236cb93a386Sopenharmony_ci } 237cb93a386Sopenharmony_ci } 238cb93a386Sopenharmony_ci 239cb93a386Sopenharmony_ci out << "};\n" 240cb93a386Sopenharmony_ci << " return table[value];\n" 241cb93a386Sopenharmony_ci << "}\n"; 242cb93a386Sopenharmony_ci} 243