1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// Author: kenton@google.com (Kenton Varda)
32//  Based on original Protocol Buffers design by
33//  Sanjay Ghemawat, Jeff Dean, and others.
34
35#include <google/protobuf/compiler/cpp/cpp_helpers.h>
36
37#include <functional>
38#include <limits>
39#include <map>
40#include <queue>
41#include <unordered_set>
42#include <vector>
43
44#include <google/protobuf/stubs/common.h>
45#include <google/protobuf/stubs/logging.h>
46#include <google/protobuf/compiler/cpp/cpp_options.h>
47#include <google/protobuf/descriptor.pb.h>
48#include <google/protobuf/descriptor.h>
49#include <google/protobuf/compiler/scc.h>
50#include <google/protobuf/io/printer.h>
51#include <google/protobuf/io/zero_copy_stream.h>
52#include <google/protobuf/dynamic_message.h>
53#include <google/protobuf/wire_format.h>
54#include <google/protobuf/wire_format_lite.h>
55#include <google/protobuf/stubs/strutil.h>
56#include <google/protobuf/stubs/substitute.h>
57#include <google/protobuf/stubs/hash.h>
58
59#include <google/protobuf/port_def.inc>
60
61namespace google {
62namespace protobuf {
63namespace compiler {
64namespace cpp {
65
66namespace {
67
68static const char kAnyMessageName[] = "Any";
69static const char kAnyProtoFile[] = "google/protobuf/any.proto";
70
71std::string DotsToColons(const std::string& name) {
72  return StringReplace(name, ".", "::", true);
73}
74
75static const char* const kKeywordList[] = {  //
76    "NULL",
77    "alignas",
78    "alignof",
79    "and",
80    "and_eq",
81    "asm",
82    "auto",
83    "bitand",
84    "bitor",
85    "bool",
86    "break",
87    "case",
88    "catch",
89    "char",
90    "class",
91    "compl",
92    "const",
93    "constexpr",
94    "const_cast",
95    "continue",
96    "decltype",
97    "default",
98    "delete",
99    "do",
100    "double",
101    "dynamic_cast",
102    "else",
103    "enum",
104    "explicit",
105    "export",
106    "extern",
107    "false",
108    "float",
109    "for",
110    "friend",
111    "goto",
112    "if",
113    "inline",
114    "int",
115    "long",
116    "mutable",
117    "namespace",
118    "new",
119    "noexcept",
120    "not",
121    "not_eq",
122    "nullptr",
123    "operator",
124    "or",
125    "or_eq",
126    "private",
127    "protected",
128    "public",
129    "register",
130    "reinterpret_cast",
131    "return",
132    "short",
133    "signed",
134    "sizeof",
135    "static",
136    "static_assert",
137    "static_cast",
138    "struct",
139    "switch",
140    "template",
141    "this",
142    "thread_local",
143    "throw",
144    "true",
145    "try",
146    "typedef",
147    "typeid",
148    "typename",
149    "union",
150    "unsigned",
151    "using",
152    "virtual",
153    "void",
154    "volatile",
155    "wchar_t",
156    "while",
157    "xor",
158    "xor_eq"};
159
160static std::unordered_set<std::string>* MakeKeywordsMap() {
161  auto* result = new std::unordered_set<std::string>();
162  for (const auto keyword : kKeywordList) {
163    result->emplace(keyword);
164  }
165  return result;
166}
167
168static std::unordered_set<std::string>& kKeywords = *MakeKeywordsMap();
169
170// Encode [0..63] as 'A'-'Z', 'a'-'z', '0'-'9', '_'
171char Base63Char(int value) {
172  GOOGLE_CHECK_GE(value, 0);
173  if (value < 26) return 'A' + value;
174  value -= 26;
175  if (value < 26) return 'a' + value;
176  value -= 26;
177  if (value < 10) return '0' + value;
178  GOOGLE_CHECK_EQ(value, 10);
179  return '_';
180}
181
182// Given a c identifier has 63 legal characters we can't implement base64
183// encoding. So we return the k least significant "digits" in base 63.
184template <typename I>
185std::string Base63(I n, int k) {
186  std::string res;
187  while (k-- > 0) {
188    res += Base63Char(static_cast<int>(n % 63));
189    n /= 63;
190  }
191  return res;
192}
193
194std::string IntTypeName(const Options& options, const std::string& type) {
195  if (options.opensource_runtime) {
196    return "::PROTOBUF_NAMESPACE_ID::" + type;
197  } else {
198    return "::" + type;
199  }
200}
201
202void SetIntVar(const Options& options, const std::string& type,
203               std::map<std::string, std::string>* variables) {
204  (*variables)[type] = IntTypeName(options, type);
205}
206
207bool HasInternalAccessors(const FieldOptions::CType ctype) {
208  return ctype == FieldOptions::STRING || ctype == FieldOptions::CORD;
209}
210
211}  // namespace
212
213void SetCommonVars(const Options& options,
214                   std::map<std::string, std::string>* variables) {
215  (*variables)["proto_ns"] = ProtobufNamespace(options);
216
217  // Warning: there is some clever naming/splitting here to avoid extract script
218  // rewrites.  The names of these variables must not be things that the extract
219  // script will rewrite.  That's why we use "CHK" (for example) instead of
220  // "GOOGLE_CHECK".
221  if (options.opensource_runtime) {
222    (*variables)["GOOGLE_PROTOBUF"] = "GOOGLE_PROTOBUF";
223    (*variables)["CHK"] = "GOOGLE_CHECK";
224    (*variables)["DCHK"] = "GOOGLE_DCHECK";
225  } else {
226    // These values are things the extract script would rewrite if we did not
227    // split them.  It might not strictly matter since we don't generate google3
228    // code in open-source.  But it's good to prevent surprising things from
229    // happening.
230    (*variables)["GOOGLE_PROTOBUF"] =
231        "GOOGLE3"
232        "_PROTOBUF";
233    (*variables)["CHK"] =
234        "CH"
235        "ECK";
236    (*variables)["DCHK"] =
237        "DCH"
238        "ECK";
239  }
240
241  SetIntVar(options, "int8", variables);
242  SetIntVar(options, "uint8", variables);
243  SetIntVar(options, "uint32", variables);
244  SetIntVar(options, "uint64", variables);
245  SetIntVar(options, "int32", variables);
246  SetIntVar(options, "int64", variables);
247  (*variables)["string"] = "std::string";
248}
249
250void SetUnknkownFieldsVariable(const Descriptor* descriptor,
251                               const Options& options,
252                               std::map<std::string, std::string>* variables) {
253  std::string proto_ns = ProtobufNamespace(options);
254  std::string unknown_fields_type;
255  if (UseUnknownFieldSet(descriptor->file(), options)) {
256    unknown_fields_type = "::" + proto_ns + "::UnknownFieldSet";
257    (*variables)["unknown_fields"] =
258        "_internal_metadata_.unknown_fields<" + unknown_fields_type + ">(" +
259        unknown_fields_type + "::default_instance)";
260  } else {
261    unknown_fields_type =
262        PrimitiveTypeName(options, FieldDescriptor::CPPTYPE_STRING);
263    (*variables)["unknown_fields"] = "_internal_metadata_.unknown_fields<" +
264                                     unknown_fields_type + ">(::" + proto_ns +
265                                     "::internal::GetEmptyString)";
266  }
267  (*variables)["unknown_fields_type"] = unknown_fields_type;
268  (*variables)["have_unknown_fields"] =
269      "_internal_metadata_.have_unknown_fields()";
270  (*variables)["mutable_unknown_fields"] =
271      "_internal_metadata_.mutable_unknown_fields<" + unknown_fields_type +
272      ">()";
273}
274
275std::string UnderscoresToCamelCase(const std::string& input,
276                                   bool cap_next_letter) {
277  std::string result;
278  // Note:  I distrust ctype.h due to locales.
279  for (int i = 0; i < input.size(); i++) {
280    if ('a' <= input[i] && input[i] <= 'z') {
281      if (cap_next_letter) {
282        result += input[i] + ('A' - 'a');
283      } else {
284        result += input[i];
285      }
286      cap_next_letter = false;
287    } else if ('A' <= input[i] && input[i] <= 'Z') {
288      // Capital letters are left as-is.
289      result += input[i];
290      cap_next_letter = false;
291    } else if ('0' <= input[i] && input[i] <= '9') {
292      result += input[i];
293      cap_next_letter = true;
294    } else {
295      cap_next_letter = true;
296    }
297  }
298  return result;
299}
300
301const char kThickSeparator[] =
302    "// ===================================================================\n";
303const char kThinSeparator[] =
304    "// -------------------------------------------------------------------\n";
305
306bool CanInitializeByZeroing(const FieldDescriptor* field) {
307  if (field->is_repeated() || field->is_extension()) return false;
308  switch (field->cpp_type()) {
309    case FieldDescriptor::CPPTYPE_ENUM:
310      return field->default_value_enum()->number() == 0;
311    case FieldDescriptor::CPPTYPE_INT32:
312      return field->default_value_int32() == 0;
313    case FieldDescriptor::CPPTYPE_INT64:
314      return field->default_value_int64() == 0;
315    case FieldDescriptor::CPPTYPE_UINT32:
316      return field->default_value_uint32() == 0;
317    case FieldDescriptor::CPPTYPE_UINT64:
318      return field->default_value_uint64() == 0;
319    case FieldDescriptor::CPPTYPE_FLOAT:
320      return field->default_value_float() == 0;
321    case FieldDescriptor::CPPTYPE_DOUBLE:
322      return field->default_value_double() == 0;
323    case FieldDescriptor::CPPTYPE_BOOL:
324      return field->default_value_bool() == false;
325    default:
326      return false;
327  }
328}
329
330std::string ClassName(const Descriptor* descriptor) {
331  const Descriptor* parent = descriptor->containing_type();
332  std::string res;
333  if (parent) res += ClassName(parent) + "_";
334  res += descriptor->name();
335  if (IsMapEntryMessage(descriptor)) res += "_DoNotUse";
336  return ResolveKeyword(res);
337}
338
339std::string ClassName(const EnumDescriptor* enum_descriptor) {
340  if (enum_descriptor->containing_type() == nullptr) {
341    return ResolveKeyword(enum_descriptor->name());
342  } else {
343    return ClassName(enum_descriptor->containing_type()) + "_" +
344           enum_descriptor->name();
345  }
346}
347
348std::string QualifiedClassName(const Descriptor* d, const Options& options) {
349  return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
350}
351
352std::string QualifiedClassName(const EnumDescriptor* d,
353                               const Options& options) {
354  return QualifiedFileLevelSymbol(d->file(), ClassName(d), options);
355}
356
357std::string QualifiedClassName(const Descriptor* d) {
358  return QualifiedClassName(d, Options());
359}
360
361std::string QualifiedClassName(const EnumDescriptor* d) {
362  return QualifiedClassName(d, Options());
363}
364
365std::string QualifiedExtensionName(const FieldDescriptor* d,
366                                   const Options& options) {
367  GOOGLE_DCHECK(d->is_extension());
368  return QualifiedFileLevelSymbol(d->file(), FieldName(d), options);
369}
370
371std::string QualifiedExtensionName(const FieldDescriptor* d) {
372  return QualifiedExtensionName(d, Options());
373}
374
375std::string Namespace(const std::string& package) {
376  if (package.empty()) return "";
377  return "::" + DotsToColons(package);
378}
379
380std::string Namespace(const FileDescriptor* d, const Options& options) {
381  std::string ret = Namespace(d->package());
382  if (IsWellKnownMessage(d) && options.opensource_runtime) {
383    // Written with string concatenation to prevent rewriting of
384    // ::google::protobuf.
385    ret = StringReplace(ret,
386                        "::google::"
387                        "protobuf",
388                        "PROTOBUF_NAMESPACE_ID", false);
389  }
390  return ret;
391}
392
393std::string Namespace(const Descriptor* d, const Options& options) {
394  return Namespace(d->file(), options);
395}
396
397std::string Namespace(const FieldDescriptor* d, const Options& options) {
398  return Namespace(d->file(), options);
399}
400
401std::string Namespace(const EnumDescriptor* d, const Options& options) {
402  return Namespace(d->file(), options);
403}
404
405std::string DefaultInstanceType(const Descriptor* descriptor,
406                                const Options& options) {
407  return ClassName(descriptor) + "DefaultTypeInternal";
408}
409
410std::string DefaultInstanceName(const Descriptor* descriptor,
411                                const Options& options) {
412  return "_" + ClassName(descriptor, false) + "_default_instance_";
413}
414
415std::string DefaultInstancePtr(const Descriptor* descriptor,
416                               const Options& options) {
417  return DefaultInstanceName(descriptor, options) + "ptr_";
418}
419
420std::string QualifiedDefaultInstanceName(const Descriptor* descriptor,
421                                         const Options& options) {
422  return QualifiedFileLevelSymbol(
423      descriptor->file(), DefaultInstanceName(descriptor, options), options);
424}
425
426std::string QualifiedDefaultInstancePtr(const Descriptor* descriptor,
427                                        const Options& options) {
428  return QualifiedDefaultInstanceName(descriptor, options) + "ptr_";
429}
430
431std::string DescriptorTableName(const FileDescriptor* file,
432                                const Options& options) {
433  return UniqueName("descriptor_table", file, options);
434}
435
436std::string FileDllExport(const FileDescriptor* file, const Options& options) {
437  return UniqueName("PROTOBUF_INTERNAL_EXPORT", file, options);
438}
439
440std::string SuperClassName(const Descriptor* descriptor,
441                           const Options& options) {
442  return "::" + ProtobufNamespace(options) +
443         (HasDescriptorMethods(descriptor->file(), options) ? "::Message"
444                                                            : "::MessageLite");
445}
446
447std::string ResolveKeyword(const std::string& name) {
448  if (kKeywords.count(name) > 0) {
449    return name + "_";
450  }
451  return name;
452}
453
454std::string FieldName(const FieldDescriptor* field) {
455  std::string result = field->name();
456  LowerString(&result);
457  if (kKeywords.count(result) > 0) {
458    result.append("_");
459  }
460  return result;
461}
462
463std::string EnumValueName(const EnumValueDescriptor* enum_value) {
464  std::string result = enum_value->name();
465  if (kKeywords.count(result) > 0) {
466    result.append("_");
467  }
468  return result;
469}
470
471int EstimateAlignmentSize(const FieldDescriptor* field) {
472  if (field == nullptr) return 0;
473  if (field->is_repeated()) return 8;
474  switch (field->cpp_type()) {
475    case FieldDescriptor::CPPTYPE_BOOL:
476      return 1;
477
478    case FieldDescriptor::CPPTYPE_INT32:
479    case FieldDescriptor::CPPTYPE_UINT32:
480    case FieldDescriptor::CPPTYPE_ENUM:
481    case FieldDescriptor::CPPTYPE_FLOAT:
482      return 4;
483
484    case FieldDescriptor::CPPTYPE_INT64:
485    case FieldDescriptor::CPPTYPE_UINT64:
486    case FieldDescriptor::CPPTYPE_DOUBLE:
487    case FieldDescriptor::CPPTYPE_STRING:
488    case FieldDescriptor::CPPTYPE_MESSAGE:
489      return 8;
490  }
491  GOOGLE_LOG(FATAL) << "Can't get here.";
492  return -1;  // Make compiler happy.
493}
494
495std::string FieldConstantName(const FieldDescriptor* field) {
496  std::string field_name = UnderscoresToCamelCase(field->name(), true);
497  std::string result = "k" + field_name + "FieldNumber";
498
499  if (!field->is_extension() &&
500      field->containing_type()->FindFieldByCamelcaseName(
501          field->camelcase_name()) != field) {
502    // This field's camelcase name is not unique.  As a hack, add the field
503    // number to the constant name.  This makes the constant rather useless,
504    // but what can we do?
505    result += "_" + StrCat(field->number());
506  }
507
508  return result;
509}
510
511std::string FieldMessageTypeName(const FieldDescriptor* field,
512                                 const Options& options) {
513  // Note:  The Google-internal version of Protocol Buffers uses this function
514  //   as a hook point for hacks to support legacy code.
515  return QualifiedClassName(field->message_type(), options);
516}
517
518std::string StripProto(const std::string& filename) {
519  if (HasSuffixString(filename, ".protodevel")) {
520    return StripSuffixString(filename, ".protodevel");
521  } else {
522    return StripSuffixString(filename, ".proto");
523  }
524}
525
526const char* PrimitiveTypeName(FieldDescriptor::CppType type) {
527  switch (type) {
528    case FieldDescriptor::CPPTYPE_INT32:
529      return "::google::protobuf::int32";
530    case FieldDescriptor::CPPTYPE_INT64:
531      return "::google::protobuf::int64";
532    case FieldDescriptor::CPPTYPE_UINT32:
533      return "::google::protobuf::uint32";
534    case FieldDescriptor::CPPTYPE_UINT64:
535      return "::google::protobuf::uint64";
536    case FieldDescriptor::CPPTYPE_DOUBLE:
537      return "double";
538    case FieldDescriptor::CPPTYPE_FLOAT:
539      return "float";
540    case FieldDescriptor::CPPTYPE_BOOL:
541      return "bool";
542    case FieldDescriptor::CPPTYPE_ENUM:
543      return "int";
544    case FieldDescriptor::CPPTYPE_STRING:
545      return "std::string";
546    case FieldDescriptor::CPPTYPE_MESSAGE:
547      return nullptr;
548
549      // No default because we want the compiler to complain if any new
550      // CppTypes are added.
551  }
552
553  GOOGLE_LOG(FATAL) << "Can't get here.";
554  return nullptr;
555}
556
557std::string PrimitiveTypeName(const Options& options,
558                              FieldDescriptor::CppType type) {
559  switch (type) {
560    case FieldDescriptor::CPPTYPE_INT32:
561      return IntTypeName(options, "int32");
562    case FieldDescriptor::CPPTYPE_INT64:
563      return IntTypeName(options, "int64");
564    case FieldDescriptor::CPPTYPE_UINT32:
565      return IntTypeName(options, "uint32");
566    case FieldDescriptor::CPPTYPE_UINT64:
567      return IntTypeName(options, "uint64");
568    case FieldDescriptor::CPPTYPE_DOUBLE:
569      return "double";
570    case FieldDescriptor::CPPTYPE_FLOAT:
571      return "float";
572    case FieldDescriptor::CPPTYPE_BOOL:
573      return "bool";
574    case FieldDescriptor::CPPTYPE_ENUM:
575      return "int";
576    case FieldDescriptor::CPPTYPE_STRING:
577      return "std::string";
578    case FieldDescriptor::CPPTYPE_MESSAGE:
579      return "";
580
581      // No default because we want the compiler to complain if any new
582      // CppTypes are added.
583  }
584
585  GOOGLE_LOG(FATAL) << "Can't get here.";
586  return "";
587}
588
589const char* DeclaredTypeMethodName(FieldDescriptor::Type type) {
590  switch (type) {
591    case FieldDescriptor::TYPE_INT32:
592      return "Int32";
593    case FieldDescriptor::TYPE_INT64:
594      return "Int64";
595    case FieldDescriptor::TYPE_UINT32:
596      return "UInt32";
597    case FieldDescriptor::TYPE_UINT64:
598      return "UInt64";
599    case FieldDescriptor::TYPE_SINT32:
600      return "SInt32";
601    case FieldDescriptor::TYPE_SINT64:
602      return "SInt64";
603    case FieldDescriptor::TYPE_FIXED32:
604      return "Fixed32";
605    case FieldDescriptor::TYPE_FIXED64:
606      return "Fixed64";
607    case FieldDescriptor::TYPE_SFIXED32:
608      return "SFixed32";
609    case FieldDescriptor::TYPE_SFIXED64:
610      return "SFixed64";
611    case FieldDescriptor::TYPE_FLOAT:
612      return "Float";
613    case FieldDescriptor::TYPE_DOUBLE:
614      return "Double";
615
616    case FieldDescriptor::TYPE_BOOL:
617      return "Bool";
618    case FieldDescriptor::TYPE_ENUM:
619      return "Enum";
620
621    case FieldDescriptor::TYPE_STRING:
622      return "String";
623    case FieldDescriptor::TYPE_BYTES:
624      return "Bytes";
625    case FieldDescriptor::TYPE_GROUP:
626      return "Group";
627    case FieldDescriptor::TYPE_MESSAGE:
628      return "Message";
629
630      // No default because we want the compiler to complain if any new
631      // types are added.
632  }
633  GOOGLE_LOG(FATAL) << "Can't get here.";
634  return "";
635}
636
637std::string Int32ToString(int number) {
638  if (number == kint32min) {
639    // This needs to be special-cased, see explanation here:
640    // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
641    return StrCat(number + 1, " - 1");
642  } else {
643    return StrCat(number);
644  }
645}
646
647std::string Int64ToString(const std::string& macro_prefix, int64 number) {
648  if (number == kint64min) {
649    // This needs to be special-cased, see explanation here:
650    // https://gcc.gnu.org/bugzilla/show_bug.cgi?id=52661
651    return StrCat(macro_prefix, "_LONGLONG(", number + 1, ") - 1");
652  }
653  return StrCat(macro_prefix, "_LONGLONG(", number, ")");
654}
655
656std::string UInt64ToString(const std::string& macro_prefix, uint64 number) {
657  return StrCat(macro_prefix, "_ULONGLONG(", number, ")");
658}
659
660std::string DefaultValue(const FieldDescriptor* field) {
661  switch (field->cpp_type()) {
662    case FieldDescriptor::CPPTYPE_INT64:
663      return Int64ToString("GG", field->default_value_int64());
664    case FieldDescriptor::CPPTYPE_UINT64:
665      return UInt64ToString("GG", field->default_value_uint64());
666    default:
667      return DefaultValue(Options(), field);
668  }
669}
670
671std::string DefaultValue(const Options& options, const FieldDescriptor* field) {
672  switch (field->cpp_type()) {
673    case FieldDescriptor::CPPTYPE_INT32:
674      return Int32ToString(field->default_value_int32());
675    case FieldDescriptor::CPPTYPE_UINT32:
676      return StrCat(field->default_value_uint32()) + "u";
677    case FieldDescriptor::CPPTYPE_INT64:
678      return Int64ToString("PROTOBUF", field->default_value_int64());
679    case FieldDescriptor::CPPTYPE_UINT64:
680      return UInt64ToString("PROTOBUF", field->default_value_uint64());
681    case FieldDescriptor::CPPTYPE_DOUBLE: {
682      double value = field->default_value_double();
683      if (value == std::numeric_limits<double>::infinity()) {
684        return "std::numeric_limits<double>::infinity()";
685      } else if (value == -std::numeric_limits<double>::infinity()) {
686        return "-std::numeric_limits<double>::infinity()";
687      } else if (value != value) {
688        return "std::numeric_limits<double>::quiet_NaN()";
689      } else {
690        return SimpleDtoa(value);
691      }
692    }
693    case FieldDescriptor::CPPTYPE_FLOAT: {
694      float value = field->default_value_float();
695      if (value == std::numeric_limits<float>::infinity()) {
696        return "std::numeric_limits<float>::infinity()";
697      } else if (value == -std::numeric_limits<float>::infinity()) {
698        return "-std::numeric_limits<float>::infinity()";
699      } else if (value != value) {
700        return "std::numeric_limits<float>::quiet_NaN()";
701      } else {
702        std::string float_value = SimpleFtoa(value);
703        // If floating point value contains a period (.) or an exponent
704        // (either E or e), then append suffix 'f' to make it a float
705        // literal.
706        if (float_value.find_first_of(".eE") != std::string::npos) {
707          float_value.push_back('f');
708        }
709        return float_value;
710      }
711    }
712    case FieldDescriptor::CPPTYPE_BOOL:
713      return field->default_value_bool() ? "true" : "false";
714    case FieldDescriptor::CPPTYPE_ENUM:
715      // Lazy:  Generate a static_cast because we don't have a helper function
716      //   that constructs the full name of an enum value.
717      return strings::Substitute(
718          "static_cast< $0 >($1)", ClassName(field->enum_type(), true),
719          Int32ToString(field->default_value_enum()->number()));
720    case FieldDescriptor::CPPTYPE_STRING:
721      return "\"" +
722             EscapeTrigraphs(CEscape(field->default_value_string())) +
723             "\"";
724    case FieldDescriptor::CPPTYPE_MESSAGE:
725      return "*" + FieldMessageTypeName(field, options) +
726             "::internal_default_instance()";
727  }
728  // Can't actually get here; make compiler happy.  (We could add a default
729  // case above but then we wouldn't get the nice compiler warning when a
730  // new type is added.)
731  GOOGLE_LOG(FATAL) << "Can't get here.";
732  return "";
733}
734
735// Convert a file name into a valid identifier.
736std::string FilenameIdentifier(const std::string& filename) {
737  std::string result;
738  for (int i = 0; i < filename.size(); i++) {
739    if (ascii_isalnum(filename[i])) {
740      result.push_back(filename[i]);
741    } else {
742      // Not alphanumeric.  To avoid any possibility of name conflicts we
743      // use the hex code for the character.
744      StrAppend(&result, "_", strings::Hex(static_cast<uint8>(filename[i])));
745    }
746  }
747  return result;
748}
749
750std::string UniqueName(const std::string& name, const std::string& filename,
751                       const Options& options) {
752  return name + "_" + FilenameIdentifier(filename);
753}
754
755// Return the qualified C++ name for a file level symbol.
756std::string QualifiedFileLevelSymbol(const FileDescriptor* file,
757                                     const std::string& name,
758                                     const Options& options) {
759  if (file->package().empty()) {
760    return StrCat("::", name);
761  }
762  return StrCat(Namespace(file, options), "::", name);
763}
764
765// Escape C++ trigraphs by escaping question marks to \?
766std::string EscapeTrigraphs(const std::string& to_escape) {
767  return StringReplace(to_escape, "?", "\\?", true);
768}
769
770// Escaped function name to eliminate naming conflict.
771std::string SafeFunctionName(const Descriptor* descriptor,
772                             const FieldDescriptor* field,
773                             const std::string& prefix) {
774  // Do not use FieldName() since it will escape keywords.
775  std::string name = field->name();
776  LowerString(&name);
777  std::string function_name = prefix + name;
778  if (descriptor->FindFieldByName(function_name)) {
779    // Single underscore will also make it conflicting with the private data
780    // member. We use double underscore to escape function names.
781    function_name.append("__");
782  } else if (kKeywords.count(name) > 0) {
783    // If the field name is a keyword, we append the underscore back to keep it
784    // consistent with other function names.
785    function_name.append("_");
786  }
787  return function_name;
788}
789
790bool IsStringInlined(const FieldDescriptor* descriptor,
791                     const Options& options) {
792  if (options.opensource_runtime) return false;
793
794  // TODO(ckennelly): Handle inlining for any.proto.
795  if (IsAnyMessage(descriptor->containing_type(), options)) return false;
796  if (descriptor->containing_type()->options().map_entry()) return false;
797
798  // We rely on has bits to distinguish field presence for release_$name$.  When
799  // there is no hasbit, we cannot use the address of the string instance when
800  // the field has been inlined.
801  if (!HasHasbit(descriptor)) return false;
802
803  if (options.access_info_map) {
804    if (descriptor->is_required()) return true;
805  }
806  return false;
807}
808
809static bool HasLazyFields(const Descriptor* descriptor,
810                          const Options& options) {
811  for (int field_idx = 0; field_idx < descriptor->field_count(); field_idx++) {
812    if (IsLazy(descriptor->field(field_idx), options)) {
813      return true;
814    }
815  }
816  for (int idx = 0; idx < descriptor->extension_count(); idx++) {
817    if (IsLazy(descriptor->extension(idx), options)) {
818      return true;
819    }
820  }
821  for (int idx = 0; idx < descriptor->nested_type_count(); idx++) {
822    if (HasLazyFields(descriptor->nested_type(idx), options)) {
823      return true;
824    }
825  }
826  return false;
827}
828
829// Does the given FileDescriptor use lazy fields?
830bool HasLazyFields(const FileDescriptor* file, const Options& options) {
831  for (int i = 0; i < file->message_type_count(); i++) {
832    const Descriptor* descriptor(file->message_type(i));
833    if (HasLazyFields(descriptor, options)) {
834      return true;
835    }
836  }
837  for (int field_idx = 0; field_idx < file->extension_count(); field_idx++) {
838    if (IsLazy(file->extension(field_idx), options)) {
839      return true;
840    }
841  }
842  return false;
843}
844
845static bool HasRepeatedFields(const Descriptor* descriptor) {
846  for (int i = 0; i < descriptor->field_count(); ++i) {
847    if (descriptor->field(i)->label() == FieldDescriptor::LABEL_REPEATED) {
848      return true;
849    }
850  }
851  for (int i = 0; i < descriptor->nested_type_count(); ++i) {
852    if (HasRepeatedFields(descriptor->nested_type(i))) return true;
853  }
854  return false;
855}
856
857bool HasRepeatedFields(const FileDescriptor* file) {
858  for (int i = 0; i < file->message_type_count(); ++i) {
859    if (HasRepeatedFields(file->message_type(i))) return true;
860  }
861  return false;
862}
863
864static bool IsStringPieceField(const FieldDescriptor* field,
865                               const Options& options) {
866  return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
867         EffectiveStringCType(field, options) == FieldOptions::STRING_PIECE;
868}
869
870static bool HasStringPieceFields(const Descriptor* descriptor,
871                                 const Options& options) {
872  for (int i = 0; i < descriptor->field_count(); ++i) {
873    if (IsStringPieceField(descriptor->field(i), options)) return true;
874  }
875  for (int i = 0; i < descriptor->nested_type_count(); ++i) {
876    if (HasStringPieceFields(descriptor->nested_type(i), options)) return true;
877  }
878  return false;
879}
880
881bool HasStringPieceFields(const FileDescriptor* file, const Options& options) {
882  for (int i = 0; i < file->message_type_count(); ++i) {
883    if (HasStringPieceFields(file->message_type(i), options)) return true;
884  }
885  return false;
886}
887
888static bool IsCordField(const FieldDescriptor* field, const Options& options) {
889  return field->cpp_type() == FieldDescriptor::CPPTYPE_STRING &&
890         EffectiveStringCType(field, options) == FieldOptions::CORD;
891}
892
893static bool HasCordFields(const Descriptor* descriptor,
894                          const Options& options) {
895  for (int i = 0; i < descriptor->field_count(); ++i) {
896    if (IsCordField(descriptor->field(i), options)) return true;
897  }
898  for (int i = 0; i < descriptor->nested_type_count(); ++i) {
899    if (HasCordFields(descriptor->nested_type(i), options)) return true;
900  }
901  return false;
902}
903
904bool HasCordFields(const FileDescriptor* file, const Options& options) {
905  for (int i = 0; i < file->message_type_count(); ++i) {
906    if (HasCordFields(file->message_type(i), options)) return true;
907  }
908  return false;
909}
910
911static bool HasExtensionsOrExtendableMessage(const Descriptor* descriptor) {
912  if (descriptor->extension_range_count() > 0) return true;
913  if (descriptor->extension_count() > 0) return true;
914  for (int i = 0; i < descriptor->nested_type_count(); ++i) {
915    if (HasExtensionsOrExtendableMessage(descriptor->nested_type(i))) {
916      return true;
917    }
918  }
919  return false;
920}
921
922bool HasExtensionsOrExtendableMessage(const FileDescriptor* file) {
923  if (file->extension_count() > 0) return true;
924  for (int i = 0; i < file->message_type_count(); ++i) {
925    if (HasExtensionsOrExtendableMessage(file->message_type(i))) return true;
926  }
927  return false;
928}
929
930static bool HasMapFields(const Descriptor* descriptor) {
931  for (int i = 0; i < descriptor->field_count(); ++i) {
932    if (descriptor->field(i)->is_map()) {
933      return true;
934    }
935  }
936  for (int i = 0; i < descriptor->nested_type_count(); ++i) {
937    if (HasMapFields(descriptor->nested_type(i))) return true;
938  }
939  return false;
940}
941
942bool HasMapFields(const FileDescriptor* file) {
943  for (int i = 0; i < file->message_type_count(); ++i) {
944    if (HasMapFields(file->message_type(i))) return true;
945  }
946  return false;
947}
948
949static bool HasEnumDefinitions(const Descriptor* message_type) {
950  if (message_type->enum_type_count() > 0) return true;
951  for (int i = 0; i < message_type->nested_type_count(); ++i) {
952    if (HasEnumDefinitions(message_type->nested_type(i))) return true;
953  }
954  return false;
955}
956
957bool HasEnumDefinitions(const FileDescriptor* file) {
958  if (file->enum_type_count() > 0) return true;
959  for (int i = 0; i < file->message_type_count(); ++i) {
960    if (HasEnumDefinitions(file->message_type(i))) return true;
961  }
962  return false;
963}
964
965bool IsStringOrMessage(const FieldDescriptor* field) {
966  switch (field->cpp_type()) {
967    case FieldDescriptor::CPPTYPE_INT32:
968    case FieldDescriptor::CPPTYPE_INT64:
969    case FieldDescriptor::CPPTYPE_UINT32:
970    case FieldDescriptor::CPPTYPE_UINT64:
971    case FieldDescriptor::CPPTYPE_DOUBLE:
972    case FieldDescriptor::CPPTYPE_FLOAT:
973    case FieldDescriptor::CPPTYPE_BOOL:
974    case FieldDescriptor::CPPTYPE_ENUM:
975      return false;
976    case FieldDescriptor::CPPTYPE_STRING:
977    case FieldDescriptor::CPPTYPE_MESSAGE:
978      return true;
979  }
980
981  GOOGLE_LOG(FATAL) << "Can't get here.";
982  return false;
983}
984
985FieldOptions::CType EffectiveStringCType(const FieldDescriptor* field,
986                                         const Options& options) {
987  GOOGLE_DCHECK(field->cpp_type() == FieldDescriptor::CPPTYPE_STRING);
988  if (options.opensource_runtime) {
989    // Open-source protobuf release only supports STRING ctype.
990    return FieldOptions::STRING;
991  } else {
992    // Google-internal supports all ctypes.
993    return field->options().ctype();
994  }
995}
996
997bool IsAnyMessage(const FileDescriptor* descriptor, const Options& options) {
998  return descriptor->name() == kAnyProtoFile;
999}
1000
1001bool IsAnyMessage(const Descriptor* descriptor, const Options& options) {
1002  return descriptor->name() == kAnyMessageName &&
1003         IsAnyMessage(descriptor->file(), options);
1004}
1005
1006bool IsWellKnownMessage(const FileDescriptor* file) {
1007  static const std::unordered_set<std::string> well_known_files{
1008      "google/protobuf/any.proto",
1009      "google/protobuf/api.proto",
1010      "google/protobuf/compiler/plugin.proto",
1011      "google/protobuf/descriptor.proto",
1012      "google/protobuf/duration.proto",
1013      "google/protobuf/empty.proto",
1014      "google/protobuf/field_mask.proto",
1015      "google/protobuf/source_context.proto",
1016      "google/protobuf/struct.proto",
1017      "google/protobuf/timestamp.proto",
1018      "google/protobuf/type.proto",
1019      "google/protobuf/wrappers.proto",
1020  };
1021  return well_known_files.find(file->name()) != well_known_files.end();
1022}
1023
1024static bool FieldEnforceUtf8(const FieldDescriptor* field,
1025                             const Options& options) {
1026  return true;
1027}
1028
1029static bool FileUtf8Verification(const FileDescriptor* file,
1030                                 const Options& options) {
1031  return true;
1032}
1033
1034// Which level of UTF-8 enforcemant is placed on this file.
1035Utf8CheckMode GetUtf8CheckMode(const FieldDescriptor* field,
1036                               const Options& options) {
1037  if (field->file()->syntax() == FileDescriptor::SYNTAX_PROTO3 &&
1038      FieldEnforceUtf8(field, options)) {
1039    return STRICT;
1040  } else if (GetOptimizeFor(field->file(), options) !=
1041                 FileOptions::LITE_RUNTIME &&
1042             FileUtf8Verification(field->file(), options)) {
1043    return VERIFY;
1044  } else {
1045    return NONE;
1046  }
1047}
1048
1049static void GenerateUtf8CheckCode(const FieldDescriptor* field,
1050                                  const Options& options, bool for_parse,
1051                                  const char* parameters,
1052                                  const char* strict_function,
1053                                  const char* verify_function,
1054                                  const Formatter& format) {
1055  switch (GetUtf8CheckMode(field, options)) {
1056    case STRICT: {
1057      if (for_parse) {
1058        format("DO_(");
1059      }
1060      format("::$proto_ns$::internal::WireFormatLite::$1$(\n", strict_function);
1061      format.Indent();
1062      format(parameters);
1063      if (for_parse) {
1064        format("::$proto_ns$::internal::WireFormatLite::PARSE,\n");
1065      } else {
1066        format("::$proto_ns$::internal::WireFormatLite::SERIALIZE,\n");
1067      }
1068      format("\"$1$\")", field->full_name());
1069      if (for_parse) {
1070        format(")");
1071      }
1072      format(";\n");
1073      format.Outdent();
1074      break;
1075    }
1076    case VERIFY: {
1077      format("::$proto_ns$::internal::WireFormat::$1$(\n", verify_function);
1078      format.Indent();
1079      format(parameters);
1080      if (for_parse) {
1081        format("::$proto_ns$::internal::WireFormat::PARSE,\n");
1082      } else {
1083        format("::$proto_ns$::internal::WireFormat::SERIALIZE,\n");
1084      }
1085      format("\"$1$\");\n", field->full_name());
1086      format.Outdent();
1087      break;
1088    }
1089    case NONE:
1090      break;
1091  }
1092}
1093
1094void GenerateUtf8CheckCodeForString(const FieldDescriptor* field,
1095                                    const Options& options, bool for_parse,
1096                                    const char* parameters,
1097                                    const Formatter& format) {
1098  GenerateUtf8CheckCode(field, options, for_parse, parameters,
1099                        "VerifyUtf8String", "VerifyUTF8StringNamedField",
1100                        format);
1101}
1102
1103void GenerateUtf8CheckCodeForCord(const FieldDescriptor* field,
1104                                  const Options& options, bool for_parse,
1105                                  const char* parameters,
1106                                  const Formatter& format) {
1107  GenerateUtf8CheckCode(field, options, for_parse, parameters, "VerifyUtf8Cord",
1108                        "VerifyUTF8CordNamedField", format);
1109}
1110
1111namespace {
1112
1113void Flatten(const Descriptor* descriptor,
1114             std::vector<const Descriptor*>* flatten) {
1115  for (int i = 0; i < descriptor->nested_type_count(); i++)
1116    Flatten(descriptor->nested_type(i), flatten);
1117  flatten->push_back(descriptor);
1118}
1119
1120}  // namespace
1121
1122void FlattenMessagesInFile(const FileDescriptor* file,
1123                           std::vector<const Descriptor*>* result) {
1124  for (int i = 0; i < file->message_type_count(); i++) {
1125    Flatten(file->message_type(i), result);
1126  }
1127}
1128
1129bool HasWeakFields(const Descriptor* descriptor, const Options& options) {
1130  for (int i = 0; i < descriptor->field_count(); i++) {
1131    if (IsWeak(descriptor->field(i), options)) return true;
1132  }
1133  return false;
1134}
1135
1136bool HasWeakFields(const FileDescriptor* file, const Options& options) {
1137  for (int i = 0; i < file->message_type_count(); ++i) {
1138    if (HasWeakFields(file->message_type(i), options)) return true;
1139  }
1140  return false;
1141}
1142
1143bool UsingImplicitWeakFields(const FileDescriptor* file,
1144                             const Options& options) {
1145  return options.lite_implicit_weak_fields &&
1146         GetOptimizeFor(file, options) == FileOptions::LITE_RUNTIME;
1147}
1148
1149bool IsImplicitWeakField(const FieldDescriptor* field, const Options& options,
1150                         MessageSCCAnalyzer* scc_analyzer) {
1151  return UsingImplicitWeakFields(field->file(), options) &&
1152         field->type() == FieldDescriptor::TYPE_MESSAGE &&
1153         !field->is_required() && !field->is_map() && !field->is_extension() &&
1154         !field->real_containing_oneof() &&
1155         !IsWellKnownMessage(field->message_type()->file()) &&
1156         field->message_type()->file()->name() !=
1157             "net/proto2/proto/descriptor.proto" &&
1158         // We do not support implicit weak fields between messages in the same
1159         // strongly-connected component.
1160         scc_analyzer->GetSCC(field->containing_type()) !=
1161             scc_analyzer->GetSCC(field->message_type());
1162}
1163
1164MessageAnalysis MessageSCCAnalyzer::GetSCCAnalysis(const SCC* scc) {
1165  if (analysis_cache_.count(scc)) return analysis_cache_[scc];
1166  MessageAnalysis result{};
1167  for (int i = 0; i < scc->descriptors.size(); i++) {
1168    const Descriptor* descriptor = scc->descriptors[i];
1169    if (descriptor->extension_range_count() > 0) {
1170      result.contains_extension = true;
1171      // Extensions are found by looking up default_instance and extension
1172      // number in a map. So you'd maybe expect here
1173      // result.constructor_requires_initialization = true;
1174      // However the extension registration mechanism already makes sure
1175      // the default will be initialized.
1176    }
1177    for (int i = 0; i < descriptor->field_count(); i++) {
1178      const FieldDescriptor* field = descriptor->field(i);
1179      if (field->is_required()) {
1180        result.contains_required = true;
1181      }
1182      switch (field->type()) {
1183        case FieldDescriptor::TYPE_STRING:
1184        case FieldDescriptor::TYPE_BYTES: {
1185          result.constructor_requires_initialization = true;
1186          if (field->options().ctype() == FieldOptions::CORD) {
1187            result.contains_cord = true;
1188          }
1189          break;
1190        }
1191        case FieldDescriptor::TYPE_GROUP:
1192        case FieldDescriptor::TYPE_MESSAGE: {
1193          result.constructor_requires_initialization = true;
1194          const SCC* child = analyzer_.GetSCC(field->message_type());
1195          if (child != scc) {
1196            MessageAnalysis analysis = GetSCCAnalysis(child);
1197            result.contains_cord |= analysis.contains_cord;
1198            result.contains_extension |= analysis.contains_extension;
1199            if (!ShouldIgnoreRequiredFieldCheck(field, options_)) {
1200              result.contains_required |= analysis.contains_required;
1201            }
1202          } else {
1203            // This field points back into the same SCC hence the messages
1204            // in the SCC are recursive. Note if SCC contains more than two
1205            // nodes it has to be recursive, however this test also works for
1206            // a single node that is recursive.
1207            result.is_recursive = true;
1208          }
1209          break;
1210        }
1211        default:
1212          break;
1213      }
1214    }
1215  }
1216  // We deliberately only insert the result here. After we contracted the SCC
1217  // in the graph, the graph should be a DAG. Hence we shouldn't need to mark
1218  // nodes visited as we can never return to them. By inserting them here
1219  // we will go in an infinite loop if the SCC is not correct.
1220  return analysis_cache_[scc] = result;
1221}
1222
1223void ListAllFields(const Descriptor* d,
1224                   std::vector<const FieldDescriptor*>* fields) {
1225  // Collect sub messages
1226  for (int i = 0; i < d->nested_type_count(); i++) {
1227    ListAllFields(d->nested_type(i), fields);
1228  }
1229  // Collect message level extensions.
1230  for (int i = 0; i < d->extension_count(); i++) {
1231    fields->push_back(d->extension(i));
1232  }
1233  // Add types of fields necessary
1234  for (int i = 0; i < d->field_count(); i++) {
1235    fields->push_back(d->field(i));
1236  }
1237}
1238
1239void ListAllFields(const FileDescriptor* d,
1240                   std::vector<const FieldDescriptor*>* fields) {
1241  // Collect file level message.
1242  for (int i = 0; i < d->message_type_count(); i++) {
1243    ListAllFields(d->message_type(i), fields);
1244  }
1245  // Collect message level extensions.
1246  for (int i = 0; i < d->extension_count(); i++) {
1247    fields->push_back(d->extension(i));
1248  }
1249}
1250
1251void ListAllTypesForServices(const FileDescriptor* fd,
1252                             std::vector<const Descriptor*>* types) {
1253  for (int i = 0; i < fd->service_count(); i++) {
1254    const ServiceDescriptor* sd = fd->service(i);
1255    for (int j = 0; j < sd->method_count(); j++) {
1256      const MethodDescriptor* method = sd->method(j);
1257      types->push_back(method->input_type());
1258      types->push_back(method->output_type());
1259    }
1260  }
1261}
1262
1263bool GetBootstrapBasename(const Options& options, const std::string& basename,
1264                          std::string* bootstrap_basename) {
1265  if (options.opensource_runtime) {
1266    return false;
1267  }
1268
1269  std::unordered_map<std::string, std::string> bootstrap_mapping{
1270      {"net/proto2/proto/descriptor",
1271       "net/proto2/internal/descriptor"},
1272      {"net/proto2/compiler/proto/plugin",
1273       "net/proto2/compiler/proto/plugin"},
1274      {"net/proto2/compiler/proto/profile",
1275       "net/proto2/compiler/proto/profile_bootstrap"},
1276  };
1277  auto iter = bootstrap_mapping.find(basename);
1278  if (iter == bootstrap_mapping.end()) {
1279    *bootstrap_basename = basename;
1280    return false;
1281  } else {
1282    *bootstrap_basename = iter->second;
1283    return true;
1284  }
1285}
1286
1287bool IsBootstrapProto(const Options& options, const FileDescriptor* file) {
1288  std::string my_name = StripProto(file->name());
1289  return GetBootstrapBasename(options, my_name, &my_name);
1290}
1291
1292bool MaybeBootstrap(const Options& options, GeneratorContext* generator_context,
1293                    bool bootstrap_flag, std::string* basename) {
1294  std::string bootstrap_basename;
1295  if (!GetBootstrapBasename(options, *basename, &bootstrap_basename)) {
1296    return false;
1297  }
1298
1299  if (bootstrap_flag) {
1300    // Adjust basename, but don't abort code generation.
1301    *basename = bootstrap_basename;
1302    return false;
1303  } else {
1304    std::string forward_to_basename = bootstrap_basename;
1305
1306    // Generate forwarding headers and empty .pb.cc.
1307    {
1308      std::unique_ptr<io::ZeroCopyOutputStream> output(
1309          generator_context->Open(*basename + ".pb.h"));
1310      io::Printer printer(output.get(), '$', nullptr);
1311      printer.Print(
1312          "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1313          "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n"
1314          "#include \"$forward_to_basename$.pb.h\"  // IWYU pragma: export\n"
1315          "#endif  // PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PB_H\n",
1316          "forward_to_basename", forward_to_basename, "filename_identifier",
1317          FilenameIdentifier(*basename));
1318
1319      if (!options.opensource_runtime) {
1320        // HACK HACK HACK, tech debt from the deeps of proto1 and SWIG
1321        // protocoltype is SWIG'ed and we need to forward
1322        if (*basename == "net/proto/protocoltype") {
1323          printer.Print(
1324              "#ifdef SWIG\n"
1325              "%include \"$forward_to_basename$.pb.h\"\n"
1326              "#endif  // SWIG\n",
1327              "forward_to_basename", forward_to_basename);
1328        }
1329      }
1330    }
1331
1332    {
1333      std::unique_ptr<io::ZeroCopyOutputStream> output(
1334          generator_context->Open(*basename + ".proto.h"));
1335      io::Printer printer(output.get(), '$', nullptr);
1336      printer.Print(
1337          "#ifndef PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1338          "#define PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n"
1339          "#include \"$forward_to_basename$.proto.h\"  // IWYU pragma: "
1340          "export\n"
1341          "#endif  // "
1342          "PROTOBUF_INCLUDED_$filename_identifier$_FORWARD_PROTO_H\n",
1343          "forward_to_basename", forward_to_basename, "filename_identifier",
1344          FilenameIdentifier(*basename));
1345    }
1346
1347    {
1348      std::unique_ptr<io::ZeroCopyOutputStream> output(
1349          generator_context->Open(*basename + ".pb.cc"));
1350      io::Printer printer(output.get(), '$', nullptr);
1351      printer.Print("\n");
1352    }
1353
1354    {
1355      std::unique_ptr<io::ZeroCopyOutputStream> output(
1356          generator_context->Open(*basename + ".pb.h.meta"));
1357    }
1358
1359    {
1360      std::unique_ptr<io::ZeroCopyOutputStream> output(
1361          generator_context->Open(*basename + ".proto.h.meta"));
1362    }
1363
1364    // Abort code generation.
1365    return true;
1366  }
1367}
1368
1369class ParseLoopGenerator {
1370 public:
1371  ParseLoopGenerator(int num_hasbits, const Options& options,
1372                     MessageSCCAnalyzer* scc_analyzer, io::Printer* printer)
1373      : scc_analyzer_(scc_analyzer),
1374        options_(options),
1375        format_(printer),
1376        num_hasbits_(num_hasbits) {}
1377
1378  void GenerateParserLoop(const Descriptor* descriptor) {
1379    format_.Set("classname", ClassName(descriptor));
1380    format_.Set("p_ns", "::" + ProtobufNamespace(options_));
1381    format_.Set("pi_ns",
1382                StrCat("::", ProtobufNamespace(options_), "::internal"));
1383    format_.Set("GOOGLE_PROTOBUF", MacroPrefix(options_));
1384    std::map<std::string, std::string> vars;
1385    SetCommonVars(options_, &vars);
1386    SetUnknkownFieldsVariable(descriptor, options_, &vars);
1387    format_.AddMap(vars);
1388
1389    std::vector<const FieldDescriptor*> ordered_fields;
1390    for (auto field : FieldRange(descriptor)) {
1391      if (IsFieldUsed(field, options_)) {
1392        ordered_fields.push_back(field);
1393      }
1394    }
1395    std::sort(ordered_fields.begin(), ordered_fields.end(),
1396              [](const FieldDescriptor* a, const FieldDescriptor* b) {
1397                return a->number() < b->number();
1398              });
1399
1400    format_(
1401        "const char* $classname$::_InternalParse(const char* ptr, "
1402        "$pi_ns$::ParseContext* ctx) {\n"
1403        "#define CHK_(x) if (PROTOBUF_PREDICT_FALSE(!(x))) goto failure\n");
1404    format_.Indent();
1405    int hasbits_size = 0;
1406    if (num_hasbits_ > 0) {
1407      hasbits_size = (num_hasbits_ + 31) / 32;
1408    }
1409    // For now only optimize small hasbits.
1410    if (hasbits_size != 1) hasbits_size = 0;
1411    if (hasbits_size) {
1412      format_("_Internal::HasBits has_bits{};\n");
1413      format_.Set("has_bits", "has_bits");
1414    } else {
1415      format_.Set("has_bits", "_has_bits_");
1416    }
1417
1418    if (descriptor->file()->options().cc_enable_arenas()) {
1419      format_("$p_ns$::Arena* arena = GetArena(); (void)arena;\n");
1420    }
1421    GenerateParseLoop(descriptor, ordered_fields);
1422    format_.Outdent();
1423    format_("success:\n");
1424    if (hasbits_size) format_("  _has_bits_.Or(has_bits);\n");
1425
1426    format_(
1427        "  return ptr;\n"
1428        "failure:\n"
1429        "  ptr = nullptr;\n"
1430        "  goto success;\n"
1431        "#undef CHK_\n"
1432        "}\n");
1433  }
1434
1435 private:
1436  MessageSCCAnalyzer* scc_analyzer_;
1437  const Options& options_;
1438  Formatter format_;
1439  int num_hasbits_;
1440
1441  using WireFormat = internal::WireFormat;
1442  using WireFormatLite = internal::WireFormatLite;
1443
1444  void GenerateArenaString(const FieldDescriptor* field) {
1445    if (HasHasbit(field)) {
1446      format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1447    }
1448    std::string default_string =
1449        field->default_value_string().empty()
1450            ? "::" + ProtobufNamespace(options_) +
1451                  "::internal::GetEmptyStringAlreadyInited()"
1452            : QualifiedClassName(field->containing_type(), options_) +
1453                  "::" + MakeDefaultName(field) + ".get()";
1454    format_(
1455        "if (arena != nullptr) {\n"
1456        "  ptr = ctx->ReadArenaString(ptr, &$1$_, arena);\n"
1457        "} else {\n"
1458        "  ptr = "
1459        "$pi_ns$::InlineGreedyStringParser($1$_.MutableNoArenaNoDefault(&$2$"
1460        "), ptr, ctx);"
1461        "\n}\n"
1462        "const std::string* str = &$1$_.Get(); (void)str;\n",
1463        FieldName(field), default_string);
1464  }
1465
1466  void GenerateStrings(const FieldDescriptor* field, bool check_utf8) {
1467    FieldOptions::CType ctype = FieldOptions::STRING;
1468    if (!options_.opensource_runtime) {
1469      // Open source doesn't support other ctypes;
1470      ctype = field->options().ctype();
1471    }
1472    if (field->file()->options().cc_enable_arenas() && !field->is_repeated() &&
1473        !options_.opensource_runtime &&
1474        GetOptimizeFor(field->file(), options_) != FileOptions::LITE_RUNTIME &&
1475        // For now only use arena string for strings with empty defaults.
1476        field->default_value_string().empty() &&
1477        !IsStringInlined(field, options_) && !field->real_containing_oneof() &&
1478        ctype == FieldOptions::STRING) {
1479      GenerateArenaString(field);
1480    } else {
1481      std::string name;
1482      switch (ctype) {
1483        case FieldOptions::STRING:
1484          name = "GreedyStringParser";
1485          break;
1486        case FieldOptions::CORD:
1487          name = "CordParser";
1488          break;
1489        case FieldOptions::STRING_PIECE:
1490          name = "StringPieceParser";
1491          break;
1492      }
1493      format_(
1494          "auto str = $1$$2$_$3$();\n"
1495          "ptr = $pi_ns$::Inline$4$(str, ptr, ctx);\n",
1496          HasInternalAccessors(ctype) ? "_internal_" : "",
1497          field->is_repeated() && !field->is_packable() ? "add" : "mutable",
1498          FieldName(field), name);
1499    }
1500    if (!check_utf8) return;  // return if this is a bytes field
1501    auto level = GetUtf8CheckMode(field, options_);
1502    switch (level) {
1503      case NONE:
1504        return;
1505      case VERIFY:
1506        format_("#ifndef NDEBUG\n");
1507        break;
1508      case STRICT:
1509        format_("CHK_(");
1510        break;
1511    }
1512    std::string field_name;
1513    field_name = "nullptr";
1514    if (HasDescriptorMethods(field->file(), options_)) {
1515      field_name = StrCat("\"", field->full_name(), "\"");
1516    }
1517    format_("$pi_ns$::VerifyUTF8(str, $1$)", field_name);
1518    switch (level) {
1519      case NONE:
1520        return;
1521      case VERIFY:
1522        format_(
1523            ";\n"
1524            "#endif  // !NDEBUG\n");
1525        break;
1526      case STRICT:
1527        format_(");\n");
1528        break;
1529    }
1530  }
1531
1532  void GenerateLengthDelim(const FieldDescriptor* field) {
1533    if (field->is_packable()) {
1534      std::string enum_validator;
1535      if (field->type() == FieldDescriptor::TYPE_ENUM &&
1536          !HasPreservingUnknownEnumSemantics(field)) {
1537        enum_validator =
1538            StrCat(", ", QualifiedClassName(field->enum_type(), options_),
1539                         "_IsValid, &_internal_metadata_, ", field->number());
1540        format_(
1541            "ptr = "
1542            "$pi_ns$::Packed$1$Parser<$unknown_fields_type$>(_internal_mutable_"
1543            "$2$(), ptr, "
1544            "ctx$3$);\n",
1545            DeclaredTypeMethodName(field->type()), FieldName(field),
1546            enum_validator);
1547      } else {
1548        format_(
1549            "ptr = $pi_ns$::Packed$1$Parser(_internal_mutable_$2$(), ptr, "
1550            "ctx$3$);\n",
1551            DeclaredTypeMethodName(field->type()), FieldName(field),
1552            enum_validator);
1553      }
1554    } else {
1555      auto field_type = field->type();
1556      switch (field_type) {
1557        case FieldDescriptor::TYPE_STRING:
1558          GenerateStrings(field, true /* utf8 */);
1559          break;
1560        case FieldDescriptor::TYPE_BYTES:
1561          GenerateStrings(field, false /* utf8 */);
1562          break;
1563        case FieldDescriptor::TYPE_MESSAGE: {
1564          if (field->is_map()) {
1565            const FieldDescriptor* val =
1566                field->message_type()->FindFieldByName("value");
1567            GOOGLE_CHECK(val);
1568            if (val->type() == FieldDescriptor::TYPE_ENUM &&
1569                !HasPreservingUnknownEnumSemantics(field)) {
1570              format_(
1571                  "auto object = "
1572                  "::$proto_ns$::internal::InitEnumParseWrapper<$unknown_"
1573                  "fields_type$>("
1574                  "&$1$_, $2$_IsValid, $3$, &_internal_metadata_);\n"
1575                  "ptr = ctx->ParseMessage(&object, ptr);\n",
1576                  FieldName(field), QualifiedClassName(val->enum_type()),
1577                  field->number());
1578            } else {
1579              format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1580                      FieldName(field));
1581            }
1582          } else if (IsLazy(field, options_)) {
1583            if (field->real_containing_oneof()) {
1584              format_(
1585                  "if (!_internal_has_$1$()) {\n"
1586                  "  clear_$2$();\n"
1587                  "  $2$_.$1$_ = ::$proto_ns$::Arena::CreateMessage<\n"
1588                  "      $pi_ns$::LazyField>(GetArena());\n"
1589                  "  set_has_$1$();\n"
1590                  "}\n"
1591                  "ptr = ctx->ParseMessage($2$_.$1$_, ptr);\n",
1592                  FieldName(field), field->containing_oneof()->name());
1593            } else if (HasHasbit(field)) {
1594              format_(
1595                  "_Internal::set_has_$1$(&$has_bits$);\n"
1596                  "ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1597                  FieldName(field));
1598            } else {
1599              format_("ptr = ctx->ParseMessage(&$1$_, ptr);\n",
1600                      FieldName(field));
1601            }
1602          } else if (IsImplicitWeakField(field, options_, scc_analyzer_)) {
1603            if (!field->is_repeated()) {
1604              format_(
1605                  "ptr = ctx->ParseMessage(_Internal::mutable_$1$(this), "
1606                  "ptr);\n",
1607                  FieldName(field));
1608            } else {
1609              format_(
1610                  "ptr = ctx->ParseMessage($1$_.AddWeak(reinterpret_cast<const "
1611                  "::$proto_ns$::MessageLite*>($2$::_$3$_default_instance_ptr_)"
1612                  "), ptr);\n",
1613                  FieldName(field), Namespace(field->message_type(), options_),
1614                  ClassName(field->message_type()));
1615            }
1616          } else if (IsWeak(field, options_)) {
1617            format_(
1618                "ptr = ctx->ParseMessage(_weak_field_map_.MutableMessage($1$,"
1619                " _$classname$_default_instance_.$2$_), ptr);\n",
1620                field->number(), FieldName(field));
1621          } else {
1622            format_("ptr = ctx->ParseMessage(_internal_$1$_$2$(), ptr);\n",
1623                    field->is_repeated() ? "add" : "mutable", FieldName(field));
1624          }
1625          break;
1626        }
1627        default:
1628          GOOGLE_LOG(FATAL) << "Illegal combination for length delimited wiretype "
1629                     << " filed type is " << field->type();
1630      }
1631    }
1632  }
1633
1634  // Convert a 1 or 2 byte varint into the equivalent value upon a direct load.
1635  static uint32 SmallVarintValue(uint32 x) {
1636    GOOGLE_DCHECK(x < 128 * 128);
1637    if (x >= 128) x += (x & 0xFF80) + 128;
1638    return x;
1639  }
1640
1641  static bool ShouldRepeat(const FieldDescriptor* descriptor,
1642                           internal::WireFormatLite::WireType wiretype) {
1643    constexpr int kMaxTwoByteFieldNumber = 16 * 128;
1644    return descriptor->number() < kMaxTwoByteFieldNumber &&
1645           descriptor->is_repeated() &&
1646           (!descriptor->is_packable() ||
1647            wiretype != internal::WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1648  }
1649
1650  void GenerateFieldBody(internal::WireFormatLite::WireType wiretype,
1651                         const FieldDescriptor* field) {
1652    uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1653    switch (wiretype) {
1654      case WireFormatLite::WIRETYPE_VARINT: {
1655        std::string type = PrimitiveTypeName(options_, field->cpp_type());
1656        std::string prefix = field->is_repeated() ? "add" : "set";
1657        if (field->type() == FieldDescriptor::TYPE_ENUM) {
1658          format_(
1659              "$uint64$ val = $pi_ns$::ReadVarint64(&ptr);\n"
1660              "CHK_(ptr);\n");
1661          if (!HasPreservingUnknownEnumSemantics(field)) {
1662            format_("if (PROTOBUF_PREDICT_TRUE($1$_IsValid(val))) {\n",
1663                    QualifiedClassName(field->enum_type(), options_));
1664            format_.Indent();
1665          }
1666          format_("_internal_$1$_$2$(static_cast<$3$>(val));\n", prefix,
1667                  FieldName(field),
1668                  QualifiedClassName(field->enum_type(), options_));
1669          if (!HasPreservingUnknownEnumSemantics(field)) {
1670            format_.Outdent();
1671            format_(
1672                "} else {\n"
1673                "  $pi_ns$::WriteVarint($1$, val, mutable_unknown_fields());\n"
1674                "}\n",
1675                field->number());
1676          }
1677        } else {
1678          std::string size = (field->type() == FieldDescriptor::TYPE_SINT32 ||
1679                              field->type() == FieldDescriptor::TYPE_UINT32)
1680                                 ? "32"
1681                                 : "64";
1682          std::string zigzag;
1683          if ((field->type() == FieldDescriptor::TYPE_SINT32 ||
1684               field->type() == FieldDescriptor::TYPE_SINT64)) {
1685            zigzag = "ZigZag";
1686          }
1687          if (field->is_repeated() || field->real_containing_oneof()) {
1688            std::string prefix = field->is_repeated() ? "add" : "set";
1689            format_(
1690                "_internal_$1$_$2$($pi_ns$::ReadVarint$3$$4$(&ptr));\n"
1691                "CHK_(ptr);\n",
1692                prefix, FieldName(field), zigzag, size);
1693          } else {
1694            if (HasHasbit(field)) {
1695              format_("_Internal::set_has_$1$(&$has_bits$);\n",
1696                      FieldName(field));
1697            }
1698            format_(
1699                "$1$_ = $pi_ns$::ReadVarint$2$$3$(&ptr);\n"
1700                "CHK_(ptr);\n",
1701                FieldName(field), zigzag, size);
1702          }
1703        }
1704        break;
1705      }
1706      case WireFormatLite::WIRETYPE_FIXED32:
1707      case WireFormatLite::WIRETYPE_FIXED64: {
1708        std::string type = PrimitiveTypeName(options_, field->cpp_type());
1709        if (field->is_repeated() || field->real_containing_oneof()) {
1710          std::string prefix = field->is_repeated() ? "add" : "set";
1711          format_(
1712              "_internal_$1$_$2$($pi_ns$::UnalignedLoad<$3$>(ptr));\n"
1713              "ptr += sizeof($3$);\n",
1714              prefix, FieldName(field), type);
1715        } else {
1716          if (HasHasbit(field)) {
1717            format_("_Internal::set_has_$1$(&$has_bits$);\n", FieldName(field));
1718          }
1719          format_(
1720              "$1$_ = $pi_ns$::UnalignedLoad<$2$>(ptr);\n"
1721              "ptr += sizeof($2$);\n",
1722              FieldName(field), type);
1723        }
1724        break;
1725      }
1726      case WireFormatLite::WIRETYPE_LENGTH_DELIMITED: {
1727        GenerateLengthDelim(field);
1728        format_("CHK_(ptr);\n");
1729        break;
1730      }
1731      case WireFormatLite::WIRETYPE_START_GROUP: {
1732        format_(
1733            "ptr = ctx->ParseGroup(_internal_$1$_$2$(), ptr, $3$);\n"
1734            "CHK_(ptr);\n",
1735            field->is_repeated() ? "add" : "mutable", FieldName(field), tag);
1736        break;
1737      }
1738      case WireFormatLite::WIRETYPE_END_GROUP: {
1739        GOOGLE_LOG(FATAL) << "Can't have end group field\n";
1740        break;
1741      }
1742    }  // switch (wire_type)
1743  }
1744
1745  // Returns the tag for this field and in case of repeated packable fields,
1746  // sets a fallback tag in fallback_tag_ptr.
1747  static uint32 ExpectedTag(const FieldDescriptor* field,
1748                            uint32* fallback_tag_ptr) {
1749    uint32 expected_tag;
1750    if (field->is_packable()) {
1751      auto expected_wiretype = WireFormat::WireTypeForFieldType(field->type());
1752      expected_tag =
1753          WireFormatLite::MakeTag(field->number(), expected_wiretype);
1754      GOOGLE_CHECK(expected_wiretype != WireFormatLite::WIRETYPE_LENGTH_DELIMITED);
1755      auto fallback_wiretype = WireFormatLite::WIRETYPE_LENGTH_DELIMITED;
1756      uint32 fallback_tag =
1757          WireFormatLite::MakeTag(field->number(), fallback_wiretype);
1758
1759      if (field->is_packed()) std::swap(expected_tag, fallback_tag);
1760      *fallback_tag_ptr = fallback_tag;
1761    } else {
1762      auto expected_wiretype = WireFormat::WireTypeForField(field);
1763      expected_tag =
1764          WireFormatLite::MakeTag(field->number(), expected_wiretype);
1765    }
1766    return expected_tag;
1767  }
1768
1769  void GenerateParseLoop(
1770      const Descriptor* descriptor,
1771      const std::vector<const FieldDescriptor*>& ordered_fields) {
1772    format_(
1773        "while (!ctx->Done(&ptr)) {\n"
1774        "  $uint32$ tag;\n"
1775        "  ptr = $pi_ns$::ReadTag(ptr, &tag);\n"
1776        "  CHK_(ptr);\n");
1777    if (!ordered_fields.empty()) format_("  switch (tag >> 3) {\n");
1778
1779    format_.Indent();
1780    format_.Indent();
1781
1782    for (const auto* field : ordered_fields) {
1783      PrintFieldComment(format_, field);
1784      format_("case $1$:\n", field->number());
1785      format_.Indent();
1786      uint32 fallback_tag = 0;
1787      uint32 expected_tag = ExpectedTag(field, &fallback_tag);
1788      format_(
1789          "if (PROTOBUF_PREDICT_TRUE(static_cast<$uint8$>(tag) == $1$)) {\n",
1790          expected_tag & 0xFF);
1791      format_.Indent();
1792      auto wiretype = WireFormatLite::GetTagWireType(expected_tag);
1793      uint32 tag = WireFormatLite::MakeTag(field->number(), wiretype);
1794      int tag_size = io::CodedOutputStream::VarintSize32(tag);
1795      bool is_repeat = ShouldRepeat(field, wiretype);
1796      if (is_repeat) {
1797        format_(
1798            "ptr -= $1$;\n"
1799            "do {\n"
1800            "  ptr += $1$;\n",
1801            tag_size);
1802        format_.Indent();
1803      }
1804      GenerateFieldBody(wiretype, field);
1805      if (is_repeat) {
1806        format_.Outdent();
1807        format_(
1808            "  if (!ctx->DataAvailable(ptr)) break;\n"
1809            "} while ($pi_ns$::ExpectTag<$1$>(ptr));\n",
1810            tag);
1811      }
1812      format_.Outdent();
1813      if (fallback_tag) {
1814        format_("} else if (static_cast<$uint8$>(tag) == $1$) {\n",
1815                fallback_tag & 0xFF);
1816        format_.Indent();
1817        GenerateFieldBody(WireFormatLite::GetTagWireType(fallback_tag), field);
1818        format_.Outdent();
1819      }
1820      format_.Outdent();
1821      format_(
1822          "  } else goto handle_unusual;\n"
1823          "  continue;\n");
1824    }  // for loop over ordered fields
1825
1826    // Default case
1827    if (!ordered_fields.empty()) format_("default: {\n");
1828    if (!ordered_fields.empty()) format_("handle_unusual:\n");
1829    format_(
1830        "  if ((tag & 7) == 4 || tag == 0) {\n"
1831        "    ctx->SetLastTag(tag);\n"
1832        "    goto success;\n"
1833        "  }\n");
1834    if (IsMapEntryMessage(descriptor)) {
1835      format_("  continue;\n");
1836    } else {
1837      if (descriptor->extension_range_count() > 0) {
1838        format_("if (");
1839        for (int i = 0; i < descriptor->extension_range_count(); i++) {
1840          const Descriptor::ExtensionRange* range =
1841              descriptor->extension_range(i);
1842          if (i > 0) format_(" ||\n    ");
1843
1844          uint32 start_tag = WireFormatLite::MakeTag(
1845              range->start, static_cast<WireFormatLite::WireType>(0));
1846          uint32 end_tag = WireFormatLite::MakeTag(
1847              range->end, static_cast<WireFormatLite::WireType>(0));
1848
1849          if (range->end > FieldDescriptor::kMaxNumber) {
1850            format_("($1$u <= tag)", start_tag);
1851          } else {
1852            format_("($1$u <= tag && tag < $2$u)", start_tag, end_tag);
1853          }
1854        }
1855        format_(") {\n");
1856        format_(
1857            "  ptr = _extensions_.ParseField(tag, ptr,\n"
1858            "      internal_default_instance(), &_internal_metadata_, ctx);\n"
1859            "  CHK_(ptr != nullptr);\n"
1860            "  continue;\n"
1861            "}\n");
1862      }
1863      format_(
1864          "  ptr = UnknownFieldParse(tag,\n"
1865          "      _internal_metadata_.mutable_unknown_fields<$unknown_"
1866          "fields_type$>(),\n"
1867          "      ptr, ctx);\n"
1868          "  CHK_(ptr != nullptr);\n"
1869          "  continue;\n");
1870    }
1871    if (!ordered_fields.empty()) format_("}\n");  // default case
1872    format_.Outdent();
1873    format_.Outdent();
1874    if (!ordered_fields.empty()) format_("  }  // switch\n");
1875    format_("}  // while\n");
1876  }
1877};
1878
1879void GenerateParserLoop(const Descriptor* descriptor, int num_hasbits,
1880                        const Options& options,
1881                        MessageSCCAnalyzer* scc_analyzer,
1882                        io::Printer* printer) {
1883  ParseLoopGenerator generator(num_hasbits, options, scc_analyzer, printer);
1884  generator.GenerateParserLoop(descriptor);
1885}
1886
1887static bool HasExtensionFromFile(const Message& msg, const FileDescriptor* file,
1888                                 const Options& options,
1889                                 bool* has_opt_codesize_extension) {
1890  std::vector<const FieldDescriptor*> fields;
1891  auto reflection = msg.GetReflection();
1892  reflection->ListFields(msg, &fields);
1893  for (auto field : fields) {
1894    const auto* field_msg = field->message_type();
1895    if (field_msg == nullptr) {
1896      // It so happens that enums Is_Valid are still generated so enums work.
1897      // Only messages have potential problems.
1898      continue;
1899    }
1900    // If this option has an extension set AND that extension is defined in the
1901    // same file we have bootstrap problem.
1902    if (field->is_extension()) {
1903      const auto* msg_extension_file = field->message_type()->file();
1904      if (msg_extension_file == file) return true;
1905      if (has_opt_codesize_extension &&
1906          GetOptimizeFor(msg_extension_file, options) ==
1907              FileOptions::CODE_SIZE) {
1908        *has_opt_codesize_extension = true;
1909      }
1910    }
1911    // Recurse in this field to see if there is a problem in there
1912    if (field->is_repeated()) {
1913      for (int i = 0; i < reflection->FieldSize(msg, field); i++) {
1914        if (HasExtensionFromFile(reflection->GetRepeatedMessage(msg, field, i),
1915                                 file, options, has_opt_codesize_extension)) {
1916          return true;
1917        }
1918      }
1919    } else {
1920      if (HasExtensionFromFile(reflection->GetMessage(msg, field), file,
1921                               options, has_opt_codesize_extension)) {
1922        return true;
1923      }
1924    }
1925  }
1926  return false;
1927}
1928
1929static bool HasBootstrapProblem(const FileDescriptor* file,
1930                                const Options& options,
1931                                bool* has_opt_codesize_extension) {
1932  static auto& cache = *new std::unordered_map<const FileDescriptor*, bool>;
1933  auto it = cache.find(file);
1934  if (it != cache.end()) return it->second;
1935  // In order to build the data structures for the reflective parse, it needs
1936  // to parse the serialized descriptor describing all the messages defined in
1937  // this file. Obviously this presents a bootstrap problem for descriptor
1938  // messages.
1939  if (file->name() == "net/proto2/proto/descriptor.proto" ||
1940      file->name() == "google/protobuf/descriptor.proto") {
1941    return true;
1942  }
1943  // Unfortunately we're not done yet. The descriptor option messages allow
1944  // for extensions. So we need to be able to parse these extensions in order
1945  // to parse the file descriptor for a file that has custom options. This is a
1946  // problem when these custom options extensions are defined in the same file.
1947  FileDescriptorProto linkedin_fd_proto;
1948  const DescriptorPool* pool = file->pool();
1949  const Descriptor* fd_proto_descriptor =
1950      pool->FindMessageTypeByName(linkedin_fd_proto.GetTypeName());
1951  // Not all pools have descriptor.proto in them. In these cases there for sure
1952  // are no custom options.
1953  if (fd_proto_descriptor == nullptr) return false;
1954
1955  // It's easier to inspect file as a proto, because we can use reflection on
1956  // the proto to iterate over all content.
1957  file->CopyTo(&linkedin_fd_proto);
1958
1959  // linkedin_fd_proto is a generated proto linked in the proto compiler. As
1960  // such it doesn't know the extensions that are potentially present in the
1961  // descriptor pool constructed from the protos that are being compiled. These
1962  // custom options are therefore in the unknown fields.
1963  // By building the corresponding FileDescriptorProto in the pool constructed
1964  // by the protos that are being compiled, ie. file's pool, the unknown fields
1965  // are converted to extensions.
1966  DynamicMessageFactory factory(pool);
1967  Message* fd_proto = factory.GetPrototype(fd_proto_descriptor)->New();
1968  fd_proto->ParseFromString(linkedin_fd_proto.SerializeAsString());
1969
1970  bool& res = cache[file];
1971  res = HasExtensionFromFile(*fd_proto, file, options,
1972                             has_opt_codesize_extension);
1973  delete fd_proto;
1974  return res;
1975}
1976
1977FileOptions_OptimizeMode GetOptimizeFor(const FileDescriptor* file,
1978                                        const Options& options,
1979                                        bool* has_opt_codesize_extension) {
1980  if (has_opt_codesize_extension) *has_opt_codesize_extension = false;
1981  switch (options.enforce_mode) {
1982    case EnforceOptimizeMode::kSpeed:
1983      return FileOptions::SPEED;
1984    case EnforceOptimizeMode::kLiteRuntime:
1985      return FileOptions::LITE_RUNTIME;
1986    case EnforceOptimizeMode::kCodeSize:
1987      if (file->options().optimize_for() == FileOptions::LITE_RUNTIME) {
1988        return FileOptions::LITE_RUNTIME;
1989      }
1990      if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1991        return FileOptions::SPEED;
1992      }
1993      return FileOptions::CODE_SIZE;
1994    case EnforceOptimizeMode::kNoEnforcement:
1995      if (file->options().optimize_for() == FileOptions::CODE_SIZE) {
1996        if (HasBootstrapProblem(file, options, has_opt_codesize_extension)) {
1997          GOOGLE_LOG(WARNING) << "Proto states optimize_for = CODE_SIZE, but we "
1998                          "cannot honor that because it contains custom option "
1999                          "extensions defined in the same proto.";
2000          return FileOptions::SPEED;
2001        }
2002      }
2003      return file->options().optimize_for();
2004  }
2005
2006  GOOGLE_LOG(FATAL) << "Unknown optimization enforcement requested.";
2007  // The phony return below serves to silence a warning from GCC 8.
2008  return FileOptions::SPEED;
2009}
2010
2011}  // namespace cpp
2012}  // namespace compiler
2013}  // namespace protobuf
2014}  // namespace google
2015