1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 //   * Redistributions of source code must retain the above copyright notice,
8 //     this list of conditions and the following disclaimer.
9 //   * Redistributions in binary form must reproduce the above copyright notice,
10 //     this list of conditions and the following disclaimer in the documentation
11 //     and/or other materials provided with the distribution.
12 //   * Neither the name of ARM Limited nor the names of its contributors may be
13 //     used to endorse or promote products derived from this software without
14 //     specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 
27 #ifndef VIXL_AARCH64_DECODER_AARCH64_H_
28 #define VIXL_AARCH64_DECODER_AARCH64_H_
29 
30 #include <functional>
31 #include <list>
32 #include <map>
33 #include <string>
34 
35 #include "../globals-vixl.h"
36 #include "../utils-vixl.h"
37 
38 #include "instructions-aarch64.h"
39 
40 // List macro containing all visitors needed by the decoder class.
41 #define VISITOR_LIST_THAT_RETURN(V)                              \
42   V(AddSubExtended)                                              \
43   V(AddSubImmediate)                                             \
44   V(AddSubShifted)                                               \
45   V(AddSubWithCarry)                                             \
46   V(AtomicMemory)                                                \
47   V(Bitfield)                                                    \
48   V(CompareBranch)                                               \
49   V(ConditionalBranch)                                           \
50   V(ConditionalCompareImmediate)                                 \
51   V(ConditionalCompareRegister)                                  \
52   V(ConditionalSelect)                                           \
53   V(Crypto2RegSHA)                                               \
54   V(Crypto3RegSHA)                                               \
55   V(CryptoAES)                                                   \
56   V(DataProcessing1Source)                                       \
57   V(DataProcessing2Source)                                       \
58   V(DataProcessing3Source)                                       \
59   V(EvaluateIntoFlags)                                           \
60   V(Exception)                                                   \
61   V(Extract)                                                     \
62   V(FPCompare)                                                   \
63   V(FPConditionalCompare)                                        \
64   V(FPConditionalSelect)                                         \
65   V(FPDataProcessing1Source)                                     \
66   V(FPDataProcessing2Source)                                     \
67   V(FPDataProcessing3Source)                                     \
68   V(FPFixedPointConvert)                                         \
69   V(FPImmediate)                                                 \
70   V(FPIntegerConvert)                                            \
71   V(LoadLiteral)                                                 \
72   V(LoadStoreExclusive)                                          \
73   V(LoadStorePAC)                                                \
74   V(LoadStorePairNonTemporal)                                    \
75   V(LoadStorePairOffset)                                         \
76   V(LoadStorePairPostIndex)                                      \
77   V(LoadStorePairPreIndex)                                       \
78   V(LoadStorePostIndex)                                          \
79   V(LoadStorePreIndex)                                           \
80   V(LoadStoreRCpcUnscaledOffset)                                 \
81   V(LoadStoreRegisterOffset)                                     \
82   V(LoadStoreUnscaledOffset)                                     \
83   V(LoadStoreUnsignedOffset)                                     \
84   V(LogicalImmediate)                                            \
85   V(LogicalShifted)                                              \
86   V(MoveWideImmediate)                                           \
87   V(NEON2RegMisc)                                                \
88   V(NEON2RegMiscFP16)                                            \
89   V(NEON3Different)                                              \
90   V(NEON3Same)                                                   \
91   V(NEON3SameExtra)                                              \
92   V(NEON3SameFP16)                                               \
93   V(NEONAcrossLanes)                                             \
94   V(NEONByIndexedElement)                                        \
95   V(NEONCopy)                                                    \
96   V(NEONExtract)                                                 \
97   V(NEONLoadStoreMultiStruct)                                    \
98   V(NEONLoadStoreMultiStructPostIndex)                           \
99   V(NEONLoadStoreSingleStruct)                                   \
100   V(NEONLoadStoreSingleStructPostIndex)                          \
101   V(NEONModifiedImmediate)                                       \
102   V(NEONPerm)                                                    \
103   V(NEONScalar2RegMisc)                                          \
104   V(NEONScalar2RegMiscFP16)                                      \
105   V(NEONScalar3Diff)                                             \
106   V(NEONScalar3Same)                                             \
107   V(NEONScalar3SameExtra)                                        \
108   V(NEONScalar3SameFP16)                                         \
109   V(NEONScalarByIndexedElement)                                  \
110   V(NEONScalarCopy)                                              \
111   V(NEONScalarPairwise)                                          \
112   V(NEONScalarShiftImmediate)                                    \
113   V(NEONShiftImmediate)                                          \
114   V(NEONTable)                                                   \
115   V(PCRelAddressing)                                             \
116   V(RotateRightIntoFlags)                                        \
117   V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets)           \
118   V(SVE32BitGatherLoad_VectorPlusImm)                            \
119   V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets)    \
120   V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets)        \
121   V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets)         \
122   V(SVE32BitGatherPrefetch_VectorPlusImm)                        \
123   V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets)           \
124   V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets)         \
125   V(SVE32BitScatterStore_VectorPlusImm)                          \
126   V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets)     \
127   V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets)             \
128   V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets)           \
129   V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets)   \
130   V(SVE64BitGatherLoad_VectorPlusImm)                            \
131   V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets)         \
132   V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \
133   V(SVE64BitGatherPrefetch_VectorPlusImm)                        \
134   V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets)           \
135   V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets)         \
136   V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets)   \
137   V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \
138   V(SVE64BitScatterStore_VectorPlusImm)                          \
139   V(SVEAddressGeneration)                                        \
140   V(SVEBitwiseLogicalUnpredicated)                               \
141   V(SVEBitwiseShiftUnpredicated)                                 \
142   V(SVEFFRInitialise)                                            \
143   V(SVEFFRWriteFromPredicate)                                    \
144   V(SVEFPAccumulatingReduction)                                  \
145   V(SVEFPArithmeticUnpredicated)                                 \
146   V(SVEFPCompareVectors)                                         \
147   V(SVEFPCompareWithZero)                                        \
148   V(SVEFPComplexAddition)                                        \
149   V(SVEFPComplexMulAdd)                                          \
150   V(SVEFPComplexMulAddIndex)                                     \
151   V(SVEFPFastReduction)                                          \
152   V(SVEFPMulIndex)                                               \
153   V(SVEFPMulAdd)                                                 \
154   V(SVEFPMulAddIndex)                                            \
155   V(SVEFPUnaryOpUnpredicated)                                    \
156   V(SVEIncDecByPredicateCount)                                   \
157   V(SVEIndexGeneration)                                          \
158   V(SVEIntArithmeticUnpredicated)                                \
159   V(SVEIntCompareSignedImm)                                      \
160   V(SVEIntCompareUnsignedImm)                                    \
161   V(SVEIntCompareVectors)                                        \
162   V(SVEIntMulAddPredicated)                                      \
163   V(SVEIntMulAddUnpredicated)                                    \
164   V(SVEIntReduction)                                             \
165   V(SVEIntUnaryArithmeticPredicated)                             \
166   V(SVEMovprfx)                                                  \
167   V(SVEMulIndex)                                                 \
168   V(SVEPermuteVectorExtract)                                     \
169   V(SVEPermuteVectorInterleaving)                                \
170   V(SVEPredicateCount)                                           \
171   V(SVEPredicateLogical)                                         \
172   V(SVEPropagateBreak)                                           \
173   V(SVEStackFrameAdjustment)                                     \
174   V(SVEStackFrameSize)                                           \
175   V(SVEVectorSelect)                                             \
176   V(SVEBitwiseLogical_Predicated)                                \
177   V(SVEBitwiseLogicalWithImm_Unpredicated)                       \
178   V(SVEBitwiseShiftByImm_Predicated)                             \
179   V(SVEBitwiseShiftByVector_Predicated)                          \
180   V(SVEBitwiseShiftByWideElements_Predicated)                    \
181   V(SVEBroadcastBitmaskImm)                                      \
182   V(SVEBroadcastFPImm_Unpredicated)                              \
183   V(SVEBroadcastGeneralRegister)                                 \
184   V(SVEBroadcastIndexElement)                                    \
185   V(SVEBroadcastIntImm_Unpredicated)                             \
186   V(SVECompressActiveElements)                                   \
187   V(SVEConditionallyBroadcastElementToVector)                    \
188   V(SVEConditionallyExtractElementToSIMDFPScalar)                \
189   V(SVEConditionallyExtractElementToGeneralRegister)             \
190   V(SVEConditionallyTerminateScalars)                            \
191   V(SVEConstructivePrefix_Unpredicated)                          \
192   V(SVEContiguousFirstFaultLoad_ScalarPlusScalar)                \
193   V(SVEContiguousLoad_ScalarPlusImm)                             \
194   V(SVEContiguousLoad_ScalarPlusScalar)                          \
195   V(SVEContiguousNonFaultLoad_ScalarPlusImm)                     \
196   V(SVEContiguousNonTemporalLoad_ScalarPlusImm)                  \
197   V(SVEContiguousNonTemporalLoad_ScalarPlusScalar)               \
198   V(SVEContiguousNonTemporalStore_ScalarPlusImm)                 \
199   V(SVEContiguousNonTemporalStore_ScalarPlusScalar)              \
200   V(SVEContiguousPrefetch_ScalarPlusImm)                         \
201   V(SVEContiguousPrefetch_ScalarPlusScalar)                      \
202   V(SVEContiguousStore_ScalarPlusImm)                            \
203   V(SVEContiguousStore_ScalarPlusScalar)                         \
204   V(SVECopySIMDFPScalarRegisterToVector_Predicated)              \
205   V(SVECopyFPImm_Predicated)                                     \
206   V(SVECopyGeneralRegisterToVector_Predicated)                   \
207   V(SVECopyIntImm_Predicated)                                    \
208   V(SVEElementCount)                                             \
209   V(SVEExtractElementToSIMDFPScalarRegister)                     \
210   V(SVEExtractElementToGeneralRegister)                          \
211   V(SVEFPArithmetic_Predicated)                                  \
212   V(SVEFPArithmeticWithImm_Predicated)                           \
213   V(SVEFPConvertPrecision)                                       \
214   V(SVEFPConvertToInt)                                           \
215   V(SVEFPExponentialAccelerator)                                 \
216   V(SVEFPRoundToIntegralValue)                                   \
217   V(SVEFPTrigMulAddCoefficient)                                  \
218   V(SVEFPTrigSelectCoefficient)                                  \
219   V(SVEFPUnaryOp)                                                \
220   V(SVEIncDecRegisterByElementCount)                             \
221   V(SVEIncDecVectorByElementCount)                               \
222   V(SVEInsertSIMDFPScalarRegister)                               \
223   V(SVEInsertGeneralRegister)                                    \
224   V(SVEIntAddSubtractImm_Unpredicated)                           \
225   V(SVEIntAddSubtractVectors_Predicated)                         \
226   V(SVEIntCompareScalarCountAndLimit)                            \
227   V(SVEIntConvertToFP)                                           \
228   V(SVEIntDivideVectors_Predicated)                              \
229   V(SVEIntMinMaxImm_Unpredicated)                                \
230   V(SVEIntMinMaxDifference_Predicated)                           \
231   V(SVEIntMulImm_Unpredicated)                                   \
232   V(SVEIntMulVectors_Predicated)                                 \
233   V(SVELoadAndBroadcastElement)                                  \
234   V(SVELoadAndBroadcastQOWord_ScalarPlusImm)                     \
235   V(SVELoadAndBroadcastQOWord_ScalarPlusScalar)                  \
236   V(SVELoadMultipleStructures_ScalarPlusImm)                     \
237   V(SVELoadMultipleStructures_ScalarPlusScalar)                  \
238   V(SVELoadPredicateRegister)                                    \
239   V(SVELoadVectorRegister)                                       \
240   V(SVEPartitionBreakCondition)                                  \
241   V(SVEPermutePredicateElements)                                 \
242   V(SVEPredicateFirstActive)                                     \
243   V(SVEPredicateInitialize)                                      \
244   V(SVEPredicateNextActive)                                      \
245   V(SVEPredicateReadFromFFR_Predicated)                          \
246   V(SVEPredicateReadFromFFR_Unpredicated)                        \
247   V(SVEPredicateTest)                                            \
248   V(SVEPredicateZero)                                            \
249   V(SVEPropagateBreakToNextPartition)                            \
250   V(SVEReversePredicateElements)                                 \
251   V(SVEReverseVectorElements)                                    \
252   V(SVEReverseWithinElements)                                    \
253   V(SVESaturatingIncDecRegisterByElementCount)                   \
254   V(SVESaturatingIncDecVectorByElementCount)                     \
255   V(SVEStoreMultipleStructures_ScalarPlusImm)                    \
256   V(SVEStoreMultipleStructures_ScalarPlusScalar)                 \
257   V(SVEStorePredicateRegister)                                   \
258   V(SVEStoreVectorRegister)                                      \
259   V(SVETableLookup)                                              \
260   V(SVEUnpackPredicateElements)                                  \
261   V(SVEUnpackVectorElements)                                     \
262   V(SVEVectorSplice)                                             \
263   V(System)                                                      \
264   V(TestBranch)                                                  \
265   V(Unallocated)                                                 \
266   V(UnconditionalBranch)                                         \
267   V(UnconditionalBranchToRegister)                               \
268   V(Unimplemented)
269 
270 #define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved)
271 
272 #define VISITOR_LIST(V)       \
273   VISITOR_LIST_THAT_RETURN(V) \
274   VISITOR_LIST_THAT_DONT_RETURN(V)
275 
276 namespace vixl {
277 namespace aarch64 {
278 
279 using Metadata = std::map<std::string, std::string>;
280 
281 // The Visitor interface consists only of the Visit() method. User classes
282 // that inherit from this one must provide an implementation of the method.
283 // Information about the instruction encountered by the Decoder is available
284 // via the metadata pointer.
285 class DecoderVisitor {
286  public:
287   enum VisitorConstness { kConstVisitor, kNonConstVisitor };
DecoderVisitor(VisitorConstness constness = kConstVisitor)288   explicit DecoderVisitor(VisitorConstness constness = kConstVisitor)
289       : constness_(constness) {}
290 
~DecoderVisitor()291   virtual ~DecoderVisitor() {}
292 
293   virtual void Visit(Metadata* metadata, const Instruction* instr) = 0;
294 
IsConstVisitor() const295   bool IsConstVisitor() const { return constness_ == kConstVisitor; }
MutableInstruction(const Instruction* instr)296   Instruction* MutableInstruction(const Instruction* instr) {
297     VIXL_ASSERT(!IsConstVisitor());
298     return const_cast<Instruction*>(instr);
299   }
300 
301  protected:
302   template <typename T>
303   using FormToVisitorFnMapT = std::unordered_map<
304       uint32_t,
305       std::function<void(T*, const Instruction*)>>;
306 
307  private:
308   const VisitorConstness constness_;
309 };
310 
311 class DecodeNode;
312 class CompiledDecodeNode;
313 
314 // The instruction decoder is constructed from a graph of decode nodes. At each
315 // node, a number of bits are sampled from the instruction being decoded. The
316 // resulting value is used to look up the next node in the graph, which then
317 // samples other bits, and moves to other decode nodes. Eventually, a visitor
318 // node is reached, and the corresponding visitor function is called, which
319 // handles the instruction.
320 class Decoder {
321  public:
322 #ifndef PANDA_BUILD
Decoder()323   Decoder() { ConstructDecodeGraph(); }
324 #else
325   Decoder(AllocatorWrapper allocator) :
326       allocator_(allocator),
327       visitors_(allocator_.Adapter()),
328       decode_nodes_(allocator_.Adapter()) {
329     ConstructDecodeGraph();
330   }
331 #endif
332 
333   Decoder(const Decoder&) = delete;
334   Decoder(Decoder&&) = delete;
335   Decoder& operator=(const Decoder&) = delete;
336   Decoder& operator=(Decoder&&) = delete;
337 
338   ~Decoder() = default;
339 
GetAllocator()340   auto GetAllocator() {
341     return allocator_;
342   }
343 
344   // Top-level wrappers around the actual decoding function.
345   void Decode(const Instruction* instr);
346   void Decode(Instruction* instr);
347 
348   // Decode all instructions from start (inclusive) to end (exclusive).
349   template <typename T>
Decode(T start, T end)350   void Decode(T start, T end) {
351     for (T instr = start; instr < end; instr = instr->GetNextInstruction()) {
352       Decode(instr);
353     }
354   }
355 
356   // Register a new visitor class with the decoder.
357   // Decode() will call the corresponding visitor method from all registered
358   // visitor classes when decoding reaches the leaf node of the instruction
359   // decode tree.
360   // Visitors are called in order.
361   // A visitor can be registered multiple times.
362   //
363   //   d.AppendVisitor(V1);
364   //   d.AppendVisitor(V2);
365   //   d.PrependVisitor(V2);
366   //   d.AppendVisitor(V3);
367   //
368   //   d.Decode(i);
369   //
370   // will call in order visitor methods in V2, V1, V2, V3.
371   void AppendVisitor(DecoderVisitor* visitor);
372   void PrependVisitor(DecoderVisitor* visitor);
373   // These helpers register `new_visitor` before or after the first instance of
374   // `registered_visiter` in the list.
375   // So if
376   //   V1, V2, V1, V2
377   // are registered in this order in the decoder, calls to
378   //   d.InsertVisitorAfter(V3, V1);
379   //   d.InsertVisitorBefore(V4, V2);
380   // will yield the order
381   //   V1, V3, V4, V2, V1, V2
382   //
383   // For more complex modifications of the order of registered visitors, one can
384   // directly access and modify the list of visitors via the `visitors()'
385   // accessor.
386   void InsertVisitorBefore(DecoderVisitor* new_visitor,
387                            DecoderVisitor* registered_visitor);
388   void InsertVisitorAfter(DecoderVisitor* new_visitor,
389                           DecoderVisitor* registered_visitor);
390 
391   // Remove all instances of a previously registered visitor class from the list
392   // of visitors stored by the decoder.
393   void RemoveVisitor(DecoderVisitor* visitor);
394 
GetAllocator() const395   auto GetAllocator() const {
396     return allocator_;
397   }
398 
399   class ScopedAddVisitors {
400    public:
ScopedAddVisitors(Decoder& decoder, std::initializer_list<DecoderVisitor*> visitors)401     ScopedAddVisitors(Decoder& decoder, std::initializer_list<DecoderVisitor*> visitors)
402       : visitors_(decoder.visitors_)
403       , old_end_(visitors_.insert(visitors_.end(), visitors)) {
404     }
405 
~ScopedAddVisitors()406     ~ScopedAddVisitors() {
407       visitors_.erase(old_end_, visitors_.end());
408     }
409 
410    private:
411     List<DecoderVisitor*>& visitors_;
412     List<DecoderVisitor*>::iterator old_end_;
413   };
414 
415   void VisitNamedInstruction(const Instruction* instr, const std::string_view name);
416 
417 #ifndef PANDA_BUILD
visitors()418   std::list<DecoderVisitor*>* visitors() { return &visitors_; }
419 #else
visitors()420   List<DecoderVisitor*>* visitors() { return &visitors_; }
421 #endif
422 
423   // Get a DecodeNode by name from the Decoder's map.
424   DecodeNode* GetDecodeNode(const String& name);
425 
426  private:
427   // Decodes an instruction and calls the visitor functions registered with the
428   // Decoder class.
429   void DecodeInstruction(const Instruction* instr);
430 
431   // Add an initialised DecodeNode to the decode_node_ map.
432   void AddDecodeNode(const DecodeNode& node);
433 
434   AllocatorWrapper allocator_;
435 
436   // Visitors are registered in a list.
437   List<DecoderVisitor*> visitors_;
438   // Map of node names to DecodeNodes.
439   Map<String, DecodeNode> decode_nodes_;
440 
441   // Compile the dynamically generated decode graph based on the static
442   // information in kDecodeMapping and kVisitorNodes.
443   void ConstructDecodeGraph();
444 
445   // Root node for the compiled decoder graph, stored here to avoid a map lookup
446   // for every instruction decoded.
447   CompiledDecodeNode* compiled_decoder_root_;
448 };
449 
450 typedef void (Decoder::*DecodeFnPtr)(const Instruction*);
451 typedef uint32_t (Instruction::*BitExtractFn)(void) const;
452 
453 // A Visitor node maps the name of a visitor to the function that handles it.
454 struct VisitorNode {
455   const char* name;
456   const DecodeFnPtr visitor_fn;
457 };
458 
459 // DecodePattern and DecodeMapping represent the input data to the decoder
460 // compilation stage. After compilation, the decoder is embodied in the graph
461 // of CompiledDecodeNodes pointer to by compiled_decoder_root_.
462 
463 // A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded
464 // as uint32_t to its handler.
465 // The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10.
466 // 0b11 marks the edge of the most-significant bits of the pattern, which is
467 // required to determine the length. For example, the pattern "1x01"_b is
468 // encoded in a uint32_t as 0b11_01_10_00_01.
469 struct DecodePattern {
470   uint32_t pattern;
471   const char* handler;
472 };
473 
474 // A DecodeMapping consists of the name of a handler, the bits sampled in the
475 // instruction by that handler, and a mapping from the pattern that those
476 // sampled bits match to the corresponding name of a node.
477 struct DecodeMapping {
478   const char* name;
479   const std::vector<uint8_t> sampled_bits;
480   const std::vector<DecodePattern> mapping;
481 };
482 
483 // For speed, before nodes can be used for decoding instructions, they must
484 // be compiled. This converts the mapping "bit pattern strings to decoder name
485 // string" stored in DecodeNodes to an array look up for the pointer to the next
486 // node, stored in CompiledDecodeNodes. Compilation may also apply other
487 // optimisations for simple decode patterns.
488 class CompiledDecodeNode {
489  public:
490   // Constructor for decode node, containing a decode table and pointer to a
491   // function that extracts the bits to be sampled.
492 #ifndef PANDA_BUILD
CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)493   CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)
494 #else
495   CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size, AllocatorWrapper allocator)
496 #endif
497       : bit_extract_fn_(bit_extract_fn),
498         instruction_name_("node"),
499         decode_table_size_(decode_table_size),
500         decoder_(NULL) {
501 #ifndef PANDA_BUILD
502     decode_table_ = new CompiledDecodeNode*[decode_table_size_];
503 #else
504     decode_table_ = allocator.New<CompiledDecodeNode*[]>(decode_table_size_);
505 #endif
506     memset(decode_table_, 0, decode_table_size_ * sizeof(decode_table_[0]));
507   }
508 
509   // Constructor for wrappers around visitor functions. These require no
510   // decoding, so no bit extraction function or decode table is assigned.
CompiledDecodeNode(const std::string_view iname, Decoder* decoder)511   explicit CompiledDecodeNode(const std::string_view iname, Decoder* decoder)
512       : bit_extract_fn_(NULL),
513         instruction_name_(iname),
514         decode_table_(NULL),
515         decode_table_size_(0),
516         decoder_(decoder) {}
517 
518   ~CompiledDecodeNode() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION {
519     // Free the decode table, if this is a compiled, non-leaf node.
520     if (decode_table_ != NULL) {
521       VIXL_ASSERT(!IsLeafNode());
522 #ifndef VIXL_USE_PANDA_ALLOC
523       delete[] decode_table_;
524 #endif
525     }
526   }
527 
528   // Decode the instruction by either sampling the bits using the bit extract
529   // function to find the next node, or, if we're at a leaf, calling the visitor
530   // function.
531   void Decode(const Instruction* instr) const;
532 
533   // A leaf node is a wrapper for a visitor function.
IsLeafNode() const534   bool IsLeafNode() const {
535     VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) ||
536                 ((instruction_name_ != "node") && (bit_extract_fn_ == NULL)));
537     return instruction_name_ != "node";
538   }
539 
540   // Get a pointer to the next node required in the decode process, based on the
541   // bits sampled by the current node.
GetNodeForBits(uint32_t bits) const542   CompiledDecodeNode* GetNodeForBits(uint32_t bits) const {
543     VIXL_ASSERT(bits < decode_table_size_);
544     return decode_table_[bits];
545   }
546 
547   // Set the next node in the decode process for the pattern of sampled bits in
548   // the current node.
549   void SetNodeForBits(uint32_t bits, CompiledDecodeNode* n) {
550     VIXL_ASSERT(bits < decode_table_size_);
551     VIXL_ASSERT(n != NULL);
552     decode_table_[bits] = n;
553   }
554 
555  private:
556   // Pointer to an instantiated template function for extracting the bits
557   // sampled by this node. Set to NULL for leaf nodes.
558   const BitExtractFn bit_extract_fn_;
559 
560   // Visitor function that handles the instruction identified. Set only for
561   // leaf nodes, where no extra decoding is required, otherwise NULL.
562   std::string_view instruction_name_;
563 
564   // Mapping table from instruction bits to next decode stage.
565   CompiledDecodeNode** decode_table_;
566   const size_t decode_table_size_;
567 
568   // Pointer to the decoder containing this node, used to call its visitor
569   // function for leaf nodes. Set to NULL for non-leaf nodes.
570   Decoder* decoder_;
571 };
572 
573 class DecodeNode {
574  public:
575   // Constructor for DecodeNode wrappers around visitor functions. These are
576   // marked as "compiled", as there is no decoding left to do.
577   explicit DecodeNode(const std::string_view iname, Decoder* decoder)
578       :
579 #ifdef PANDA_BUILD
580         allocator_(decoder->GetAllocator()),
581 #endif
582         name_(iname, allocator_.Adapter()),
583         sampled_bits_(kEmptySampledBits),
584         instruction_name_(iname),
585         pattern_table_(kEmptyPatternTable),
586         decoder_(decoder),
587         compiled_node_(NULL) {}
588 
589   // Constructor for DecodeNodes that map bit patterns to other DecodeNodes.
590   explicit DecodeNode(const DecodeMapping& map, Decoder* decoder)
591       :
592 #ifdef PANDA_BUILD
593         allocator_(decoder->GetAllocator()),
594 #endif
595         name_(map.name, allocator_.Adapter()),
596         sampled_bits_(map.sampled_bits),
597         instruction_name_("node"),
598         pattern_table_(map.mapping),
599         decoder_(decoder),
compiled_node_(NULL)600         compiled_node_(NULL) {
601     // With the current two bits per symbol encoding scheme, the maximum pattern
602     // length is (32 - 2) / 2 = 15 bits.
603     VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15);
604     for (const DecodePattern& p : map.mapping) {
605       VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size());
606     }
607   }
608 
~DecodeNode()609   ~DecodeNode() {
610 #ifndef VIXL_USE_PANDA_ALLOC
611     // Delete the compiled version of this node, if one was created.
612     if (compiled_node_ != NULL) {
613       delete compiled_node_;
614     }
615 #endif
616   }
617 
GetAllocator() const618   auto GetAllocator() const {
619     return allocator_;
620   }
621 
622   // Get the bits sampled from the instruction by this node.
GetSampledBits() const623   const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; }
624 
625   // Get the number of bits sampled from the instruction by this node.
GetSampledBitsCount() const626   size_t GetSampledBitsCount() const { return sampled_bits_.size(); }
627 
628   // A leaf node is a DecodeNode that wraps the visitor function for the
629   // identified instruction class.
IsLeafNode() const630   bool IsLeafNode() const { return instruction_name_ != "node"; }
631 
GetName() const632   const String& GetName() const { return name_; }
633 
634   // Create a CompiledDecodeNode of specified table size that uses
635   // bit_extract_fn to sample bits from the instruction.
CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size)636   void CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size) {
637     VIXL_ASSERT(bit_extract_fn != NULL);
638     VIXL_ASSERT(table_size > 0);
639 #ifndef PANDA_BUILD
640     compiled_node_ = new CompiledDecodeNode(bit_extract_fn, table_size);
641 #else
642     auto allocator{decoder_->GetAllocator()};
643     compiled_node_ = allocator.New<CompiledDecodeNode>(bit_extract_fn, table_size, allocator);
644 #endif
645   }
646 
647   // Create a CompiledDecodeNode wrapping a visitor function. No decoding is
648   // required for this node; the visitor function is called instead.
CreateVisitorNode()649   void CreateVisitorNode() {
650 #ifndef PANDA_BUILD
651     compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_);
652 #else
653     auto allocator{decoder_->GetAllocator()};
654     compiled_node_ = allocator.New<CompiledDecodeNode>(instruction_name_, decoder_);
655 #endif
656   }
657 
658   // Find and compile the DecodeNode named "name", and set it as the node for
659   // the pattern "bits".
660   void CompileNodeForBits(Decoder* decoder, const String& name, uint32_t bits);
661 
662   // Get a pointer to an instruction method that extracts the instruction bits
663   // specified by the mask argument, and returns those sampled bits as a
664   // contiguous sequence, suitable for indexing an array.
665   // For example, a mask of 0b1010 returns a function that, given an instruction
666   // 0bXYZW, will return 0bXZ.
GetBitExtractFunction(uint32_t mask)667   BitExtractFn GetBitExtractFunction(uint32_t mask) {
668     return GetBitExtractFunctionHelper(mask, 0);
669   }
670 
671   // Get a pointer to an Instruction method that applies a mask to the
672   // instruction bits, and tests if the result is equal to value. The returned
673   // function gives a 1 result if (inst & mask == value), 0 otherwise.
GetBitExtractFunction(uint32_t mask, uint32_t value)674   BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) {
675     return GetBitExtractFunctionHelper(value, mask);
676   }
677 
678   // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer
679   // to it. This pointer is also stored inside the DecodeNode itself. Destroying
680   // a DecodeNode frees its associated CompiledDecodeNode.
681   CompiledDecodeNode* Compile(Decoder* decoder);
682 
683   // Get a pointer to the CompiledDecodeNode associated with this DecodeNode.
684   // Returns NULL if the node has not been compiled yet.
GetCompiledNode() const685   CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; }
IsCompiled() const686   bool IsCompiled() const { return GetCompiledNode() != NULL; }
687 
688   enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 };
689   static const uint32_t kEndOfPattern = 3;
690   static const uint32_t kPatternSymbolMask = 3;
691 
GetPatternLength(uint32_t pattern) const692   size_t GetPatternLength(uint32_t pattern) const {
693     uint32_t hsb = HighestSetBitPosition(pattern);
694     // The pattern length is signified by two set bits in a two bit-aligned
695     // position. Ensure that the pattern has a highest set bit, it's at an odd
696     // bit position, and that the bit to the right of the hsb is also set.
697     VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern);
698     return hsb / 2;
699   }
700 
PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const701   bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const {
702     while ((pattern & kPatternSymbolMask) != kEndOfPattern) {
703       if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol)
704         return true;
705       pattern >>= 2;
706     }
707     return false;
708   }
709 
GetSymbolAt(uint32_t pattern, size_t pos) const710   PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const {
711     size_t len = GetPatternLength(pattern);
712     VIXL_ASSERT((pos < 15) && (pos < len));
713     uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1));
714     uint32_t sym = (pattern >> shift) & kPatternSymbolMask;
715     return static_cast<PatternSymbol>(sym);
716   }
717 
718  private:
719   // Generate a mask and value pair from a pattern constructed from 0, 1 and x
720   // (don't care) 2-bit symbols.
721   // For example "10x1"_b should return mask = 0b1101, value = 0b1001.
722   typedef std::pair<Instr, Instr> MaskValuePair;
723   MaskValuePair GenerateMaskValuePair(uint32_t pattern) const;
724 
725   // Generate a pattern ordered by the bit positions sampled by this node.
726   // The symbol corresponding to the lowest sample position is placed in the
727   // least-significant bits of the result pattern.
728   // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30
729   // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31
730   // '1'.
731   // This output makes comparisons easier between the pattern and bits sampled
732   // from an instruction using the fast "compress" algorithm. See
733   // Instruction::Compress().
734   uint32_t GenerateOrderedPattern(uint32_t pattern) const;
735 
736   // Generate a mask with a bit set at each sample position.
737   uint32_t GenerateSampledBitsMask() const;
738 
739   // Try to compile a more optimised decode operation for this node, returning
740   // true if successful.
741   bool TryCompileOptimisedDecodeTable(Decoder* decoder);
742 
743   // Helper function that returns a bit extracting function. If y is zero,
744   // x is a bit extraction mask. Otherwise, y is the mask, and x is the value
745   // to match after masking.
746   BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y);
747 
748   AllocatorWrapper allocator_;
749 
750   // Name of this decoder node, used to construct edges in the decode graph.
751   String name_;
752 
753   // Vector of bits sampled from an instruction to determine which node to look
754   // up next in the decode process.
755   const std::vector<uint8_t>& sampled_bits_;
756   static const std::vector<uint8_t> kEmptySampledBits;
757 
758   // For leaf nodes, this is the name of the instruction form that the node
759   // represents. For other nodes, this is always set to "node".
760   std::string_view instruction_name_;
761 
762   // Source mapping from bit pattern to name of next decode stage.
763   const std::vector<DecodePattern>& pattern_table_;
764   static const std::vector<DecodePattern> kEmptyPatternTable;
765 
766   // Pointer to the decoder containing this node, used to call its visitor
767   // function for leaf nodes.
768   Decoder* decoder_;
769 
770   // Pointer to the compiled version of this node. Is this node hasn't been
771   // compiled yet, this pointer is NULL.
772   CompiledDecodeNode* compiled_node_;
773 };
774 
775 }  // namespace aarch64
776 }  // namespace vixl
777 
778 #endif  // VIXL_AARCH64_DECODER_AARCH64_H_
779