1c5f01b2fSopenharmony_ci//===- FuzzerMerge.h - merging corpa ----------------------------*- C++ -* ===//
2c5f01b2fSopenharmony_ci//
3c5f01b2fSopenharmony_ci//                     The LLVM Compiler Infrastructure
4c5f01b2fSopenharmony_ci//
5c5f01b2fSopenharmony_ci// This file is distributed under the University of Illinois Open Source
6c5f01b2fSopenharmony_ci// License. See LICENSE.TXT for details.
7c5f01b2fSopenharmony_ci//
8c5f01b2fSopenharmony_ci//===----------------------------------------------------------------------===//
9c5f01b2fSopenharmony_ci// Merging Corpora.
10c5f01b2fSopenharmony_ci//
11c5f01b2fSopenharmony_ci// The task:
12c5f01b2fSopenharmony_ci//   Take the existing corpus (possibly empty) and merge new inputs into
13c5f01b2fSopenharmony_ci//   it so that only inputs with new coverage ('features') are added.
14c5f01b2fSopenharmony_ci//   The process should tolerate the crashes, OOMs, leaks, etc.
15c5f01b2fSopenharmony_ci//
16c5f01b2fSopenharmony_ci// Algorithm:
17c5f01b2fSopenharmony_ci//   The outter process collects the set of files and writes their names
18c5f01b2fSopenharmony_ci//   into a temporary "control" file, then repeatedly launches the inner
19c5f01b2fSopenharmony_ci//   process until all inputs are processed.
20c5f01b2fSopenharmony_ci//   The outer process does not actually execute the target code.
21c5f01b2fSopenharmony_ci//
22c5f01b2fSopenharmony_ci//   The inner process reads the control file and sees a) list of all the inputs
23c5f01b2fSopenharmony_ci//   and b) the last processed input. Then it starts processing the inputs one
24c5f01b2fSopenharmony_ci//   by one. Before processing every input it writes one line to control file:
25c5f01b2fSopenharmony_ci//   STARTED INPUT_ID INPUT_SIZE
26c5f01b2fSopenharmony_ci//   After processing an input it write another line:
27c5f01b2fSopenharmony_ci//   DONE INPUT_ID Feature1 Feature2 Feature3 ...
28c5f01b2fSopenharmony_ci//   If a crash happens while processing an input the last line in the control
29c5f01b2fSopenharmony_ci//   file will be "STARTED INPUT_ID" and so the next process will know
30c5f01b2fSopenharmony_ci//   where to resume.
31c5f01b2fSopenharmony_ci//
32c5f01b2fSopenharmony_ci//   Once all inputs are processed by the innner process(es) the outer process
33c5f01b2fSopenharmony_ci//   reads the control files and does the merge based entirely on the contents
34c5f01b2fSopenharmony_ci//   of control file.
35c5f01b2fSopenharmony_ci//   It uses a single pass greedy algorithm choosing first the smallest inputs
36c5f01b2fSopenharmony_ci//   within the same size the inputs that have more new features.
37c5f01b2fSopenharmony_ci//
38c5f01b2fSopenharmony_ci//===----------------------------------------------------------------------===//
39c5f01b2fSopenharmony_ci
40c5f01b2fSopenharmony_ci#ifndef LLVM_FUZZER_MERGE_H
41c5f01b2fSopenharmony_ci#define LLVM_FUZZER_MERGE_H
42c5f01b2fSopenharmony_ci
43c5f01b2fSopenharmony_ci#include "FuzzerDefs.h"
44c5f01b2fSopenharmony_ci
45c5f01b2fSopenharmony_ci#include <istream>
46c5f01b2fSopenharmony_ci#include <set>
47c5f01b2fSopenharmony_ci
48c5f01b2fSopenharmony_cinamespace fuzzer {
49c5f01b2fSopenharmony_ci
50c5f01b2fSopenharmony_cistruct MergeFileInfo {
51c5f01b2fSopenharmony_ci  std::string Name;
52c5f01b2fSopenharmony_ci  size_t Size = 0;
53c5f01b2fSopenharmony_ci  std::vector<uint32_t> Features;
54c5f01b2fSopenharmony_ci};
55c5f01b2fSopenharmony_ci
56c5f01b2fSopenharmony_cistruct Merger {
57c5f01b2fSopenharmony_ci  std::vector<MergeFileInfo> Files;
58c5f01b2fSopenharmony_ci  size_t NumFilesInFirstCorpus = 0;
59c5f01b2fSopenharmony_ci  size_t FirstNotProcessedFile = 0;
60c5f01b2fSopenharmony_ci  std::string LastFailure;
61c5f01b2fSopenharmony_ci
62c5f01b2fSopenharmony_ci  bool Parse(std::istream &IS, bool ParseCoverage);
63c5f01b2fSopenharmony_ci  bool Parse(const std::string &Str, bool ParseCoverage);
64c5f01b2fSopenharmony_ci  void ParseOrExit(std::istream &IS, bool ParseCoverage);
65c5f01b2fSopenharmony_ci  size_t Merge(std::vector<std::string> *NewFiles);
66c5f01b2fSopenharmony_ci};
67c5f01b2fSopenharmony_ci
68c5f01b2fSopenharmony_ci}  // namespace fuzzer
69c5f01b2fSopenharmony_ci
70c5f01b2fSopenharmony_ci#endif  // LLVM_FUZZER_MERGE_H
71