1 // Copyright 2019, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_AARCH64_DECODER_AARCH64_H_ 28 #define VIXL_AARCH64_DECODER_AARCH64_H_ 29 30 #include <functional> 31 #include <list> 32 #include <map> 33 #include <string> 34 35 #include "../globals-vixl.h" 36 #include "../utils-vixl.h" 37 38 #include "instructions-aarch64.h" 39 40 // List macro containing all visitors needed by the decoder class. 41 #define VISITOR_LIST_THAT_RETURN(V) \ 42 V(AddSubExtended) \ 43 V(AddSubImmediate) \ 44 V(AddSubShifted) \ 45 V(AddSubWithCarry) \ 46 V(AtomicMemory) \ 47 V(Bitfield) \ 48 V(CompareBranch) \ 49 V(ConditionalBranch) \ 50 V(ConditionalCompareImmediate) \ 51 V(ConditionalCompareRegister) \ 52 V(ConditionalSelect) \ 53 V(Crypto2RegSHA) \ 54 V(Crypto3RegSHA) \ 55 V(CryptoAES) \ 56 V(DataProcessing1Source) \ 57 V(DataProcessing2Source) \ 58 V(DataProcessing3Source) \ 59 V(EvaluateIntoFlags) \ 60 V(Exception) \ 61 V(Extract) \ 62 V(FPCompare) \ 63 V(FPConditionalCompare) \ 64 V(FPConditionalSelect) \ 65 V(FPDataProcessing1Source) \ 66 V(FPDataProcessing2Source) \ 67 V(FPDataProcessing3Source) \ 68 V(FPFixedPointConvert) \ 69 V(FPImmediate) \ 70 V(FPIntegerConvert) \ 71 V(LoadLiteral) \ 72 V(LoadStoreExclusive) \ 73 V(LoadStorePAC) \ 74 V(LoadStorePairNonTemporal) \ 75 V(LoadStorePairOffset) \ 76 V(LoadStorePairPostIndex) \ 77 V(LoadStorePairPreIndex) \ 78 V(LoadStorePostIndex) \ 79 V(LoadStorePreIndex) \ 80 V(LoadStoreRCpcUnscaledOffset) \ 81 V(LoadStoreRegisterOffset) \ 82 V(LoadStoreUnscaledOffset) \ 83 V(LoadStoreUnsignedOffset) \ 84 V(LogicalImmediate) \ 85 V(LogicalShifted) \ 86 V(MoveWideImmediate) \ 87 V(NEON2RegMisc) \ 88 V(NEON2RegMiscFP16) \ 89 V(NEON3Different) \ 90 V(NEON3Same) \ 91 V(NEON3SameExtra) \ 92 V(NEON3SameFP16) \ 93 V(NEONAcrossLanes) \ 94 V(NEONByIndexedElement) \ 95 V(NEONCopy) \ 96 V(NEONExtract) \ 97 V(NEONLoadStoreMultiStruct) \ 98 V(NEONLoadStoreMultiStructPostIndex) \ 99 V(NEONLoadStoreSingleStruct) \ 100 V(NEONLoadStoreSingleStructPostIndex) \ 101 V(NEONModifiedImmediate) \ 102 V(NEONPerm) \ 103 V(NEONScalar2RegMisc) \ 104 V(NEONScalar2RegMiscFP16) \ 105 V(NEONScalar3Diff) \ 106 V(NEONScalar3Same) \ 107 V(NEONScalar3SameExtra) \ 108 V(NEONScalar3SameFP16) \ 109 V(NEONScalarByIndexedElement) \ 110 V(NEONScalarCopy) \ 111 V(NEONScalarPairwise) \ 112 V(NEONScalarShiftImmediate) \ 113 V(NEONShiftImmediate) \ 114 V(NEONTable) \ 115 V(PCRelAddressing) \ 116 V(RotateRightIntoFlags) \ 117 V(SVE32BitGatherLoad_ScalarPlus32BitUnscaledOffsets) \ 118 V(SVE32BitGatherLoad_VectorPlusImm) \ 119 V(SVE32BitGatherLoadHalfwords_ScalarPlus32BitScaledOffsets) \ 120 V(SVE32BitGatherLoadWords_ScalarPlus32BitScaledOffsets) \ 121 V(SVE32BitGatherPrefetch_ScalarPlus32BitScaledOffsets) \ 122 V(SVE32BitGatherPrefetch_VectorPlusImm) \ 123 V(SVE32BitScatterStore_ScalarPlus32BitScaledOffsets) \ 124 V(SVE32BitScatterStore_ScalarPlus32BitUnscaledOffsets) \ 125 V(SVE32BitScatterStore_VectorPlusImm) \ 126 V(SVE64BitGatherLoad_ScalarPlus32BitUnpackedScaledOffsets) \ 127 V(SVE64BitGatherLoad_ScalarPlus64BitScaledOffsets) \ 128 V(SVE64BitGatherLoad_ScalarPlus64BitUnscaledOffsets) \ 129 V(SVE64BitGatherLoad_ScalarPlusUnpacked32BitUnscaledOffsets) \ 130 V(SVE64BitGatherLoad_VectorPlusImm) \ 131 V(SVE64BitGatherPrefetch_ScalarPlus64BitScaledOffsets) \ 132 V(SVE64BitGatherPrefetch_ScalarPlusUnpacked32BitScaledOffsets) \ 133 V(SVE64BitGatherPrefetch_VectorPlusImm) \ 134 V(SVE64BitScatterStore_ScalarPlus64BitScaledOffsets) \ 135 V(SVE64BitScatterStore_ScalarPlus64BitUnscaledOffsets) \ 136 V(SVE64BitScatterStore_ScalarPlusUnpacked32BitScaledOffsets) \ 137 V(SVE64BitScatterStore_ScalarPlusUnpacked32BitUnscaledOffsets) \ 138 V(SVE64BitScatterStore_VectorPlusImm) \ 139 V(SVEAddressGeneration) \ 140 V(SVEBitwiseLogicalUnpredicated) \ 141 V(SVEBitwiseShiftUnpredicated) \ 142 V(SVEFFRInitialise) \ 143 V(SVEFFRWriteFromPredicate) \ 144 V(SVEFPAccumulatingReduction) \ 145 V(SVEFPArithmeticUnpredicated) \ 146 V(SVEFPCompareVectors) \ 147 V(SVEFPCompareWithZero) \ 148 V(SVEFPComplexAddition) \ 149 V(SVEFPComplexMulAdd) \ 150 V(SVEFPComplexMulAddIndex) \ 151 V(SVEFPFastReduction) \ 152 V(SVEFPMulIndex) \ 153 V(SVEFPMulAdd) \ 154 V(SVEFPMulAddIndex) \ 155 V(SVEFPUnaryOpUnpredicated) \ 156 V(SVEIncDecByPredicateCount) \ 157 V(SVEIndexGeneration) \ 158 V(SVEIntArithmeticUnpredicated) \ 159 V(SVEIntCompareSignedImm) \ 160 V(SVEIntCompareUnsignedImm) \ 161 V(SVEIntCompareVectors) \ 162 V(SVEIntMulAddPredicated) \ 163 V(SVEIntMulAddUnpredicated) \ 164 V(SVEIntReduction) \ 165 V(SVEIntUnaryArithmeticPredicated) \ 166 V(SVEMovprfx) \ 167 V(SVEMulIndex) \ 168 V(SVEPermuteVectorExtract) \ 169 V(SVEPermuteVectorInterleaving) \ 170 V(SVEPredicateCount) \ 171 V(SVEPredicateLogical) \ 172 V(SVEPropagateBreak) \ 173 V(SVEStackFrameAdjustment) \ 174 V(SVEStackFrameSize) \ 175 V(SVEVectorSelect) \ 176 V(SVEBitwiseLogical_Predicated) \ 177 V(SVEBitwiseLogicalWithImm_Unpredicated) \ 178 V(SVEBitwiseShiftByImm_Predicated) \ 179 V(SVEBitwiseShiftByVector_Predicated) \ 180 V(SVEBitwiseShiftByWideElements_Predicated) \ 181 V(SVEBroadcastBitmaskImm) \ 182 V(SVEBroadcastFPImm_Unpredicated) \ 183 V(SVEBroadcastGeneralRegister) \ 184 V(SVEBroadcastIndexElement) \ 185 V(SVEBroadcastIntImm_Unpredicated) \ 186 V(SVECompressActiveElements) \ 187 V(SVEConditionallyBroadcastElementToVector) \ 188 V(SVEConditionallyExtractElementToSIMDFPScalar) \ 189 V(SVEConditionallyExtractElementToGeneralRegister) \ 190 V(SVEConditionallyTerminateScalars) \ 191 V(SVEConstructivePrefix_Unpredicated) \ 192 V(SVEContiguousFirstFaultLoad_ScalarPlusScalar) \ 193 V(SVEContiguousLoad_ScalarPlusImm) \ 194 V(SVEContiguousLoad_ScalarPlusScalar) \ 195 V(SVEContiguousNonFaultLoad_ScalarPlusImm) \ 196 V(SVEContiguousNonTemporalLoad_ScalarPlusImm) \ 197 V(SVEContiguousNonTemporalLoad_ScalarPlusScalar) \ 198 V(SVEContiguousNonTemporalStore_ScalarPlusImm) \ 199 V(SVEContiguousNonTemporalStore_ScalarPlusScalar) \ 200 V(SVEContiguousPrefetch_ScalarPlusImm) \ 201 V(SVEContiguousPrefetch_ScalarPlusScalar) \ 202 V(SVEContiguousStore_ScalarPlusImm) \ 203 V(SVEContiguousStore_ScalarPlusScalar) \ 204 V(SVECopySIMDFPScalarRegisterToVector_Predicated) \ 205 V(SVECopyFPImm_Predicated) \ 206 V(SVECopyGeneralRegisterToVector_Predicated) \ 207 V(SVECopyIntImm_Predicated) \ 208 V(SVEElementCount) \ 209 V(SVEExtractElementToSIMDFPScalarRegister) \ 210 V(SVEExtractElementToGeneralRegister) \ 211 V(SVEFPArithmetic_Predicated) \ 212 V(SVEFPArithmeticWithImm_Predicated) \ 213 V(SVEFPConvertPrecision) \ 214 V(SVEFPConvertToInt) \ 215 V(SVEFPExponentialAccelerator) \ 216 V(SVEFPRoundToIntegralValue) \ 217 V(SVEFPTrigMulAddCoefficient) \ 218 V(SVEFPTrigSelectCoefficient) \ 219 V(SVEFPUnaryOp) \ 220 V(SVEIncDecRegisterByElementCount) \ 221 V(SVEIncDecVectorByElementCount) \ 222 V(SVEInsertSIMDFPScalarRegister) \ 223 V(SVEInsertGeneralRegister) \ 224 V(SVEIntAddSubtractImm_Unpredicated) \ 225 V(SVEIntAddSubtractVectors_Predicated) \ 226 V(SVEIntCompareScalarCountAndLimit) \ 227 V(SVEIntConvertToFP) \ 228 V(SVEIntDivideVectors_Predicated) \ 229 V(SVEIntMinMaxImm_Unpredicated) \ 230 V(SVEIntMinMaxDifference_Predicated) \ 231 V(SVEIntMulImm_Unpredicated) \ 232 V(SVEIntMulVectors_Predicated) \ 233 V(SVELoadAndBroadcastElement) \ 234 V(SVELoadAndBroadcastQOWord_ScalarPlusImm) \ 235 V(SVELoadAndBroadcastQOWord_ScalarPlusScalar) \ 236 V(SVELoadMultipleStructures_ScalarPlusImm) \ 237 V(SVELoadMultipleStructures_ScalarPlusScalar) \ 238 V(SVELoadPredicateRegister) \ 239 V(SVELoadVectorRegister) \ 240 V(SVEPartitionBreakCondition) \ 241 V(SVEPermutePredicateElements) \ 242 V(SVEPredicateFirstActive) \ 243 V(SVEPredicateInitialize) \ 244 V(SVEPredicateNextActive) \ 245 V(SVEPredicateReadFromFFR_Predicated) \ 246 V(SVEPredicateReadFromFFR_Unpredicated) \ 247 V(SVEPredicateTest) \ 248 V(SVEPredicateZero) \ 249 V(SVEPropagateBreakToNextPartition) \ 250 V(SVEReversePredicateElements) \ 251 V(SVEReverseVectorElements) \ 252 V(SVEReverseWithinElements) \ 253 V(SVESaturatingIncDecRegisterByElementCount) \ 254 V(SVESaturatingIncDecVectorByElementCount) \ 255 V(SVEStoreMultipleStructures_ScalarPlusImm) \ 256 V(SVEStoreMultipleStructures_ScalarPlusScalar) \ 257 V(SVEStorePredicateRegister) \ 258 V(SVEStoreVectorRegister) \ 259 V(SVETableLookup) \ 260 V(SVEUnpackPredicateElements) \ 261 V(SVEUnpackVectorElements) \ 262 V(SVEVectorSplice) \ 263 V(System) \ 264 V(TestBranch) \ 265 V(Unallocated) \ 266 V(UnconditionalBranch) \ 267 V(UnconditionalBranchToRegister) \ 268 V(Unimplemented) 269 270 #define VISITOR_LIST_THAT_DONT_RETURN(V) V(Reserved) 271 272 #define VISITOR_LIST(V) \ 273 VISITOR_LIST_THAT_RETURN(V) \ 274 VISITOR_LIST_THAT_DONT_RETURN(V) 275 276 namespace vixl { 277 namespace aarch64 { 278 279 using Metadata = std::map<std::string, std::string>; 280 281 // The Visitor interface consists only of the Visit() method. User classes 282 // that inherit from this one must provide an implementation of the method. 283 // Information about the instruction encountered by the Decoder is available 284 // via the metadata pointer. 285 class DecoderVisitor { 286 public: 287 enum VisitorConstness { kConstVisitor, kNonConstVisitor }; DecoderVisitor(VisitorConstness constness = kConstVisitor)288 explicit DecoderVisitor(VisitorConstness constness = kConstVisitor) 289 : constness_(constness) {} 290 ~DecoderVisitor()291 virtual ~DecoderVisitor() {} 292 293 virtual void Visit(Metadata* metadata, const Instruction* instr) = 0; 294 IsConstVisitor() const295 bool IsConstVisitor() const { return constness_ == kConstVisitor; } MutableInstruction(const Instruction* instr)296 Instruction* MutableInstruction(const Instruction* instr) { 297 VIXL_ASSERT(!IsConstVisitor()); 298 return const_cast<Instruction*>(instr); 299 } 300 301 protected: 302 template <typename T> 303 using FormToVisitorFnMapT = std::unordered_map< 304 uint32_t, 305 std::function<void(T*, const Instruction*)>>; 306 307 private: 308 const VisitorConstness constness_; 309 }; 310 311 class DecodeNode; 312 class CompiledDecodeNode; 313 314 // The instruction decoder is constructed from a graph of decode nodes. At each 315 // node, a number of bits are sampled from the instruction being decoded. The 316 // resulting value is used to look up the next node in the graph, which then 317 // samples other bits, and moves to other decode nodes. Eventually, a visitor 318 // node is reached, and the corresponding visitor function is called, which 319 // handles the instruction. 320 class Decoder { 321 public: 322 #ifndef PANDA_BUILD Decoder()323 Decoder() { ConstructDecodeGraph(); } 324 #else 325 Decoder(AllocatorWrapper allocator) : 326 allocator_(allocator), 327 visitors_(allocator_.Adapter()), 328 decode_nodes_(allocator_.Adapter()) { 329 ConstructDecodeGraph(); 330 } 331 #endif 332 333 Decoder(const Decoder&) = delete; 334 Decoder(Decoder&&) = delete; 335 Decoder& operator=(const Decoder&) = delete; 336 Decoder& operator=(Decoder&&) = delete; 337 338 ~Decoder() = default; 339 GetAllocator()340 auto GetAllocator() { 341 return allocator_; 342 } 343 344 // Top-level wrappers around the actual decoding function. 345 void Decode(const Instruction* instr); 346 void Decode(Instruction* instr); 347 348 // Decode all instructions from start (inclusive) to end (exclusive). 349 template <typename T> Decode(T start, T end)350 void Decode(T start, T end) { 351 for (T instr = start; instr < end; instr = instr->GetNextInstruction()) { 352 Decode(instr); 353 } 354 } 355 356 // Register a new visitor class with the decoder. 357 // Decode() will call the corresponding visitor method from all registered 358 // visitor classes when decoding reaches the leaf node of the instruction 359 // decode tree. 360 // Visitors are called in order. 361 // A visitor can be registered multiple times. 362 // 363 // d.AppendVisitor(V1); 364 // d.AppendVisitor(V2); 365 // d.PrependVisitor(V2); 366 // d.AppendVisitor(V3); 367 // 368 // d.Decode(i); 369 // 370 // will call in order visitor methods in V2, V1, V2, V3. 371 void AppendVisitor(DecoderVisitor* visitor); 372 void PrependVisitor(DecoderVisitor* visitor); 373 // These helpers register `new_visitor` before or after the first instance of 374 // `registered_visiter` in the list. 375 // So if 376 // V1, V2, V1, V2 377 // are registered in this order in the decoder, calls to 378 // d.InsertVisitorAfter(V3, V1); 379 // d.InsertVisitorBefore(V4, V2); 380 // will yield the order 381 // V1, V3, V4, V2, V1, V2 382 // 383 // For more complex modifications of the order of registered visitors, one can 384 // directly access and modify the list of visitors via the `visitors()' 385 // accessor. 386 void InsertVisitorBefore(DecoderVisitor* new_visitor, 387 DecoderVisitor* registered_visitor); 388 void InsertVisitorAfter(DecoderVisitor* new_visitor, 389 DecoderVisitor* registered_visitor); 390 391 // Remove all instances of a previously registered visitor class from the list 392 // of visitors stored by the decoder. 393 void RemoveVisitor(DecoderVisitor* visitor); 394 GetAllocator() const395 auto GetAllocator() const { 396 return allocator_; 397 } 398 399 class ScopedAddVisitors { 400 public: ScopedAddVisitors(Decoder& decoder, std::initializer_list<DecoderVisitor*> visitors)401 ScopedAddVisitors(Decoder& decoder, std::initializer_list<DecoderVisitor*> visitors) 402 : visitors_(decoder.visitors_) 403 , old_end_(visitors_.insert(visitors_.end(), visitors)) { 404 } 405 ~ScopedAddVisitors()406 ~ScopedAddVisitors() { 407 visitors_.erase(old_end_, visitors_.end()); 408 } 409 410 private: 411 List<DecoderVisitor*>& visitors_; 412 List<DecoderVisitor*>::iterator old_end_; 413 }; 414 415 void VisitNamedInstruction(const Instruction* instr, const std::string_view name); 416 417 #ifndef PANDA_BUILD visitors()418 std::list<DecoderVisitor*>* visitors() { return &visitors_; } 419 #else visitors()420 List<DecoderVisitor*>* visitors() { return &visitors_; } 421 #endif 422 423 // Get a DecodeNode by name from the Decoder's map. 424 DecodeNode* GetDecodeNode(const String& name); 425 426 private: 427 // Decodes an instruction and calls the visitor functions registered with the 428 // Decoder class. 429 void DecodeInstruction(const Instruction* instr); 430 431 // Add an initialised DecodeNode to the decode_node_ map. 432 void AddDecodeNode(const DecodeNode& node); 433 434 AllocatorWrapper allocator_; 435 436 // Visitors are registered in a list. 437 List<DecoderVisitor*> visitors_; 438 // Map of node names to DecodeNodes. 439 Map<String, DecodeNode> decode_nodes_; 440 441 // Compile the dynamically generated decode graph based on the static 442 // information in kDecodeMapping and kVisitorNodes. 443 void ConstructDecodeGraph(); 444 445 // Root node for the compiled decoder graph, stored here to avoid a map lookup 446 // for every instruction decoded. 447 CompiledDecodeNode* compiled_decoder_root_; 448 }; 449 450 typedef void (Decoder::*DecodeFnPtr)(const Instruction*); 451 typedef uint32_t (Instruction::*BitExtractFn)(void) const; 452 453 // A Visitor node maps the name of a visitor to the function that handles it. 454 struct VisitorNode { 455 const char* name; 456 const DecodeFnPtr visitor_fn; 457 }; 458 459 // DecodePattern and DecodeMapping represent the input data to the decoder 460 // compilation stage. After compilation, the decoder is embodied in the graph 461 // of CompiledDecodeNodes pointer to by compiled_decoder_root_. 462 463 // A DecodePattern maps a pattern of set/unset/don't care (1, 0, x) bits encoded 464 // as uint32_t to its handler. 465 // The encoding uses two bits per symbol: 0 => 0b00, 1 => 0b01, x => 0b10. 466 // 0b11 marks the edge of the most-significant bits of the pattern, which is 467 // required to determine the length. For example, the pattern "1x01"_b is 468 // encoded in a uint32_t as 0b11_01_10_00_01. 469 struct DecodePattern { 470 uint32_t pattern; 471 const char* handler; 472 }; 473 474 // A DecodeMapping consists of the name of a handler, the bits sampled in the 475 // instruction by that handler, and a mapping from the pattern that those 476 // sampled bits match to the corresponding name of a node. 477 struct DecodeMapping { 478 const char* name; 479 const std::vector<uint8_t> sampled_bits; 480 const std::vector<DecodePattern> mapping; 481 }; 482 483 // For speed, before nodes can be used for decoding instructions, they must 484 // be compiled. This converts the mapping "bit pattern strings to decoder name 485 // string" stored in DecodeNodes to an array look up for the pointer to the next 486 // node, stored in CompiledDecodeNodes. Compilation may also apply other 487 // optimisations for simple decode patterns. 488 class CompiledDecodeNode { 489 public: 490 // Constructor for decode node, containing a decode table and pointer to a 491 // function that extracts the bits to be sampled. 492 #ifndef PANDA_BUILD CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size)493 CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size) 494 #else 495 CompiledDecodeNode(BitExtractFn bit_extract_fn, size_t decode_table_size, AllocatorWrapper allocator) 496 #endif 497 : bit_extract_fn_(bit_extract_fn), 498 instruction_name_("node"), 499 decode_table_size_(decode_table_size), 500 decoder_(NULL) { 501 #ifndef PANDA_BUILD 502 decode_table_ = new CompiledDecodeNode*[decode_table_size_]; 503 #else 504 decode_table_ = allocator.New<CompiledDecodeNode*[]>(decode_table_size_); 505 #endif 506 memset(decode_table_, 0, decode_table_size_ * sizeof(decode_table_[0])); 507 } 508 509 // Constructor for wrappers around visitor functions. These require no 510 // decoding, so no bit extraction function or decode table is assigned. CompiledDecodeNode(const std::string_view iname, Decoder* decoder)511 explicit CompiledDecodeNode(const std::string_view iname, Decoder* decoder) 512 : bit_extract_fn_(NULL), 513 instruction_name_(iname), 514 decode_table_(NULL), 515 decode_table_size_(0), 516 decoder_(decoder) {} 517 518 ~CompiledDecodeNode() VIXL_NEGATIVE_TESTING_ALLOW_EXCEPTION { 519 // Free the decode table, if this is a compiled, non-leaf node. 520 if (decode_table_ != NULL) { 521 VIXL_ASSERT(!IsLeafNode()); 522 #ifndef VIXL_USE_PANDA_ALLOC 523 delete[] decode_table_; 524 #endif 525 } 526 } 527 528 // Decode the instruction by either sampling the bits using the bit extract 529 // function to find the next node, or, if we're at a leaf, calling the visitor 530 // function. 531 void Decode(const Instruction* instr) const; 532 533 // A leaf node is a wrapper for a visitor function. IsLeafNode() const534 bool IsLeafNode() const { 535 VIXL_ASSERT(((instruction_name_ == "node") && (bit_extract_fn_ != NULL)) || 536 ((instruction_name_ != "node") && (bit_extract_fn_ == NULL))); 537 return instruction_name_ != "node"; 538 } 539 540 // Get a pointer to the next node required in the decode process, based on the 541 // bits sampled by the current node. GetNodeForBits(uint32_t bits) const542 CompiledDecodeNode* GetNodeForBits(uint32_t bits) const { 543 VIXL_ASSERT(bits < decode_table_size_); 544 return decode_table_[bits]; 545 } 546 547 // Set the next node in the decode process for the pattern of sampled bits in 548 // the current node. 549 void SetNodeForBits(uint32_t bits, CompiledDecodeNode* n) { 550 VIXL_ASSERT(bits < decode_table_size_); 551 VIXL_ASSERT(n != NULL); 552 decode_table_[bits] = n; 553 } 554 555 private: 556 // Pointer to an instantiated template function for extracting the bits 557 // sampled by this node. Set to NULL for leaf nodes. 558 const BitExtractFn bit_extract_fn_; 559 560 // Visitor function that handles the instruction identified. Set only for 561 // leaf nodes, where no extra decoding is required, otherwise NULL. 562 std::string_view instruction_name_; 563 564 // Mapping table from instruction bits to next decode stage. 565 CompiledDecodeNode** decode_table_; 566 const size_t decode_table_size_; 567 568 // Pointer to the decoder containing this node, used to call its visitor 569 // function for leaf nodes. Set to NULL for non-leaf nodes. 570 Decoder* decoder_; 571 }; 572 573 class DecodeNode { 574 public: 575 // Constructor for DecodeNode wrappers around visitor functions. These are 576 // marked as "compiled", as there is no decoding left to do. 577 explicit DecodeNode(const std::string_view iname, Decoder* decoder) 578 : 579 #ifdef PANDA_BUILD 580 allocator_(decoder->GetAllocator()), 581 #endif 582 name_(iname, allocator_.Adapter()), 583 sampled_bits_(kEmptySampledBits), 584 instruction_name_(iname), 585 pattern_table_(kEmptyPatternTable), 586 decoder_(decoder), 587 compiled_node_(NULL) {} 588 589 // Constructor for DecodeNodes that map bit patterns to other DecodeNodes. 590 explicit DecodeNode(const DecodeMapping& map, Decoder* decoder) 591 : 592 #ifdef PANDA_BUILD 593 allocator_(decoder->GetAllocator()), 594 #endif 595 name_(map.name, allocator_.Adapter()), 596 sampled_bits_(map.sampled_bits), 597 instruction_name_("node"), 598 pattern_table_(map.mapping), 599 decoder_(decoder), compiled_node_(NULL)600 compiled_node_(NULL) { 601 // With the current two bits per symbol encoding scheme, the maximum pattern 602 // length is (32 - 2) / 2 = 15 bits. 603 VIXL_CHECK(GetPatternLength(map.mapping[0].pattern) <= 15); 604 for (const DecodePattern& p : map.mapping) { 605 VIXL_CHECK(GetPatternLength(p.pattern) == map.sampled_bits.size()); 606 } 607 } 608 ~DecodeNode()609 ~DecodeNode() { 610 #ifndef VIXL_USE_PANDA_ALLOC 611 // Delete the compiled version of this node, if one was created. 612 if (compiled_node_ != NULL) { 613 delete compiled_node_; 614 } 615 #endif 616 } 617 GetAllocator() const618 auto GetAllocator() const { 619 return allocator_; 620 } 621 622 // Get the bits sampled from the instruction by this node. GetSampledBits() const623 const std::vector<uint8_t>& GetSampledBits() const { return sampled_bits_; } 624 625 // Get the number of bits sampled from the instruction by this node. GetSampledBitsCount() const626 size_t GetSampledBitsCount() const { return sampled_bits_.size(); } 627 628 // A leaf node is a DecodeNode that wraps the visitor function for the 629 // identified instruction class. IsLeafNode() const630 bool IsLeafNode() const { return instruction_name_ != "node"; } 631 GetName() const632 const String& GetName() const { return name_; } 633 634 // Create a CompiledDecodeNode of specified table size that uses 635 // bit_extract_fn to sample bits from the instruction. CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size)636 void CreateCompiledNode(BitExtractFn bit_extract_fn, size_t table_size) { 637 VIXL_ASSERT(bit_extract_fn != NULL); 638 VIXL_ASSERT(table_size > 0); 639 #ifndef PANDA_BUILD 640 compiled_node_ = new CompiledDecodeNode(bit_extract_fn, table_size); 641 #else 642 auto allocator{decoder_->GetAllocator()}; 643 compiled_node_ = allocator.New<CompiledDecodeNode>(bit_extract_fn, table_size, allocator); 644 #endif 645 } 646 647 // Create a CompiledDecodeNode wrapping a visitor function. No decoding is 648 // required for this node; the visitor function is called instead. CreateVisitorNode()649 void CreateVisitorNode() { 650 #ifndef PANDA_BUILD 651 compiled_node_ = new CompiledDecodeNode(instruction_name_, decoder_); 652 #else 653 auto allocator{decoder_->GetAllocator()}; 654 compiled_node_ = allocator.New<CompiledDecodeNode>(instruction_name_, decoder_); 655 #endif 656 } 657 658 // Find and compile the DecodeNode named "name", and set it as the node for 659 // the pattern "bits". 660 void CompileNodeForBits(Decoder* decoder, const String& name, uint32_t bits); 661 662 // Get a pointer to an instruction method that extracts the instruction bits 663 // specified by the mask argument, and returns those sampled bits as a 664 // contiguous sequence, suitable for indexing an array. 665 // For example, a mask of 0b1010 returns a function that, given an instruction 666 // 0bXYZW, will return 0bXZ. GetBitExtractFunction(uint32_t mask)667 BitExtractFn GetBitExtractFunction(uint32_t mask) { 668 return GetBitExtractFunctionHelper(mask, 0); 669 } 670 671 // Get a pointer to an Instruction method that applies a mask to the 672 // instruction bits, and tests if the result is equal to value. The returned 673 // function gives a 1 result if (inst & mask == value), 0 otherwise. GetBitExtractFunction(uint32_t mask, uint32_t value)674 BitExtractFn GetBitExtractFunction(uint32_t mask, uint32_t value) { 675 return GetBitExtractFunctionHelper(value, mask); 676 } 677 678 // Compile this DecodeNode into a new CompiledDecodeNode and returns a pointer 679 // to it. This pointer is also stored inside the DecodeNode itself. Destroying 680 // a DecodeNode frees its associated CompiledDecodeNode. 681 CompiledDecodeNode* Compile(Decoder* decoder); 682 683 // Get a pointer to the CompiledDecodeNode associated with this DecodeNode. 684 // Returns NULL if the node has not been compiled yet. GetCompiledNode() const685 CompiledDecodeNode* GetCompiledNode() const { return compiled_node_; } IsCompiled() const686 bool IsCompiled() const { return GetCompiledNode() != NULL; } 687 688 enum class PatternSymbol { kSymbol0 = 0, kSymbol1 = 1, kSymbolX = 2 }; 689 static const uint32_t kEndOfPattern = 3; 690 static const uint32_t kPatternSymbolMask = 3; 691 GetPatternLength(uint32_t pattern) const692 size_t GetPatternLength(uint32_t pattern) const { 693 uint32_t hsb = HighestSetBitPosition(pattern); 694 // The pattern length is signified by two set bits in a two bit-aligned 695 // position. Ensure that the pattern has a highest set bit, it's at an odd 696 // bit position, and that the bit to the right of the hsb is also set. 697 VIXL_ASSERT(((hsb % 2) == 1) && (pattern >> (hsb - 1)) == kEndOfPattern); 698 return hsb / 2; 699 } 700 PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const701 bool PatternContainsSymbol(uint32_t pattern, PatternSymbol symbol) const { 702 while ((pattern & kPatternSymbolMask) != kEndOfPattern) { 703 if (static_cast<PatternSymbol>(pattern & kPatternSymbolMask) == symbol) 704 return true; 705 pattern >>= 2; 706 } 707 return false; 708 } 709 GetSymbolAt(uint32_t pattern, size_t pos) const710 PatternSymbol GetSymbolAt(uint32_t pattern, size_t pos) const { 711 size_t len = GetPatternLength(pattern); 712 VIXL_ASSERT((pos < 15) && (pos < len)); 713 uint32_t shift = static_cast<uint32_t>(2 * (len - pos - 1)); 714 uint32_t sym = (pattern >> shift) & kPatternSymbolMask; 715 return static_cast<PatternSymbol>(sym); 716 } 717 718 private: 719 // Generate a mask and value pair from a pattern constructed from 0, 1 and x 720 // (don't care) 2-bit symbols. 721 // For example "10x1"_b should return mask = 0b1101, value = 0b1001. 722 typedef std::pair<Instr, Instr> MaskValuePair; 723 MaskValuePair GenerateMaskValuePair(uint32_t pattern) const; 724 725 // Generate a pattern ordered by the bit positions sampled by this node. 726 // The symbol corresponding to the lowest sample position is placed in the 727 // least-significant bits of the result pattern. 728 // For example, a pattern of "1x0"_b expected when sampling bits 31, 1 and 30 729 // returns the pattern "x01"_b; bit 1 should be 'x', bit 30 '0' and bit 31 730 // '1'. 731 // This output makes comparisons easier between the pattern and bits sampled 732 // from an instruction using the fast "compress" algorithm. See 733 // Instruction::Compress(). 734 uint32_t GenerateOrderedPattern(uint32_t pattern) const; 735 736 // Generate a mask with a bit set at each sample position. 737 uint32_t GenerateSampledBitsMask() const; 738 739 // Try to compile a more optimised decode operation for this node, returning 740 // true if successful. 741 bool TryCompileOptimisedDecodeTable(Decoder* decoder); 742 743 // Helper function that returns a bit extracting function. If y is zero, 744 // x is a bit extraction mask. Otherwise, y is the mask, and x is the value 745 // to match after masking. 746 BitExtractFn GetBitExtractFunctionHelper(uint32_t x, uint32_t y); 747 748 AllocatorWrapper allocator_; 749 750 // Name of this decoder node, used to construct edges in the decode graph. 751 String name_; 752 753 // Vector of bits sampled from an instruction to determine which node to look 754 // up next in the decode process. 755 const std::vector<uint8_t>& sampled_bits_; 756 static const std::vector<uint8_t> kEmptySampledBits; 757 758 // For leaf nodes, this is the name of the instruction form that the node 759 // represents. For other nodes, this is always set to "node". 760 std::string_view instruction_name_; 761 762 // Source mapping from bit pattern to name of next decode stage. 763 const std::vector<DecodePattern>& pattern_table_; 764 static const std::vector<DecodePattern> kEmptyPatternTable; 765 766 // Pointer to the decoder containing this node, used to call its visitor 767 // function for leaf nodes. 768 Decoder* decoder_; 769 770 // Pointer to the compiled version of this node. Is this node hasn't been 771 // compiled yet, this pointer is NULL. 772 CompiledDecodeNode* compiled_node_; 773 }; 774 775 } // namespace aarch64 776 } // namespace vixl 777 778 #endif // VIXL_AARCH64_DECODER_AARCH64_H_ 779