1/* Amalgamated source file */
2#include "upb.h"
3/*
4* This is where we define macros used across upb.
5*
6* All of these macros are undef'd in port_undef.inc to avoid leaking them to
7* users.
8*
9* The correct usage is:
10*
11*   #include "upb/foobar.h"
12*   #include "upb/baz.h"
13*
14*   // MUST be last included header.
15*   #include "upb/port_def.inc"
16*
17*   // Code for this file.
18*   // <...>
19*
20*   // Can be omitted for .c files, required for .h.
21*   #include "upb/port_undef.inc"
22*
23* This file is private and must not be included by users!
24*/
25#include <stdint.h>
26
27#if UINTPTR_MAX == 0xffffffff
28#define UPB_SIZE(size32, size64) size32
29#else
30#define UPB_SIZE(size32, size64) size64
31#endif
32
33/* If we always read/write as a consistent type to each address, this shouldn't
34 * violate aliasing.
35 */
36#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs)))
37
38#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \
39  *UPB_PTR_AT(msg, case_offset, int) == case_val                              \
40      ? *UPB_PTR_AT(msg, offset, fieldtype)                                   \
41      : default
42
43#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \
44  *UPB_PTR_AT(msg, case_offset, int) = case_val;                             \
45  *UPB_PTR_AT(msg, offset, fieldtype) = value;
46
47#define UPB_MAPTYPE_STRING 0
48
49/* UPB_INLINE: inline if possible, emit standalone code if required. */
50#ifdef __cplusplus
51#define UPB_INLINE inline
52#elif defined (__GNUC__) || defined(__clang__)
53#define UPB_INLINE static __inline__
54#else
55#define UPB_INLINE static
56#endif
57
58/* Hints to the compiler about likely/unlikely branches. */
59#if defined (__GNUC__) || defined(__clang__)
60#define UPB_LIKELY(x) __builtin_expect((x),1)
61#define UPB_UNLIKELY(x) __builtin_expect((x),0)
62#else
63#define UPB_LIKELY(x) (x)
64#define UPB_UNLIKELY(x) (x)
65#endif
66
67/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler
68 * doesn't provide these preprocessor symbols. */
69#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
70#define UPB_BIG_ENDIAN
71#endif
72
73/* Macros for function attributes on compilers that support them. */
74#ifdef __GNUC__
75#define UPB_FORCEINLINE __inline__ __attribute__((always_inline))
76#define UPB_NOINLINE __attribute__((noinline))
77#define UPB_NORETURN __attribute__((__noreturn__))
78#else  /* !defined(__GNUC__) */
79#define UPB_FORCEINLINE
80#define UPB_NOINLINE
81#define UPB_NORETURN
82#endif
83
84#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L
85/* C99/C++11 versions. */
86#include <stdio.h>
87#define _upb_snprintf snprintf
88#define _upb_vsnprintf vsnprintf
89#define _upb_va_copy(a, b) va_copy(a, b)
90#elif defined(_MSC_VER)
91/* Microsoft C/C++ versions. */
92#include <stdarg.h>
93#include <stdio.h>
94#if _MSC_VER < 1900
95int msvc_snprintf(char* s, size_t n, const char* format, ...);
96int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg);
97#define UPB_MSVC_VSNPRINTF
98#define _upb_snprintf msvc_snprintf
99#define _upb_vsnprintf msvc_vsnprintf
100#else
101#define _upb_snprintf snprintf
102#define _upb_vsnprintf vsnprintf
103#endif
104#define _upb_va_copy(a, b) va_copy(a, b)
105#elif defined __GNUC__
106/* A few hacky workarounds for functions not in C89.
107 * For internal use only!
108 * TODO(haberman): fix these by including our own implementations, or finding
109 * another workaround.
110 */
111#define _upb_snprintf __builtin_snprintf
112#define _upb_vsnprintf __builtin_vsnprintf
113#define _upb_va_copy(a, b) __va_copy(a, b)
114#else
115#error Need implementations of [v]snprintf and va_copy
116#endif
117
118#ifdef __cplusplus
119#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \
120    (defined(_MSC_VER) && _MSC_VER >= 1900)
121/* C++11 is present */
122#else
123#error upb requires C++11 for C++ support
124#endif
125#endif
126
127#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y))
128#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y))
129
130#define UPB_UNUSED(var) (void)var
131
132/* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true.
133 */
134#ifdef NDEBUG
135#ifdef __GNUC__
136#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable()
137#else
138#define UPB_ASSUME(expr) do {} if (false && (expr))
139#endif
140#else
141#define UPB_ASSUME(expr) assert(expr)
142#endif
143
144/* UPB_ASSERT(): in release mode, we use the expression without letting it be
145 * evaluated.  This prevents "unused variable" warnings. */
146#ifdef NDEBUG
147#define UPB_ASSERT(expr) do {} while (false && (expr))
148#else
149#define UPB_ASSERT(expr) assert(expr)
150#endif
151
152/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only
153 * exist in debug mode.  This turns into regular assert. */
154#define UPB_ASSERT_DEBUGVAR(expr) assert(expr)
155
156#if defined(__GNUC__) || defined(__clang__)
157#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0)
158#else
159#define UPB_UNREACHABLE() do { assert(0); } while(0)
160#endif
161
162/* UPB_INFINITY representing floating-point positive infinity. */
163#include <math.h>
164#ifdef INFINITY
165#define UPB_INFINITY INFINITY
166#else
167#define UPB_INFINITY (1.0 / 0.0)
168#endif
169
170#include <setjmp.h>
171#include <string.h>
172
173
174
175/* Maps descriptor type -> upb field type.  */
176static const uint8_t desctype_to_fieldtype[] = {
177    -1,               /* invalid descriptor type */
178    UPB_TYPE_DOUBLE,  /* DOUBLE */
179    UPB_TYPE_FLOAT,   /* FLOAT */
180    UPB_TYPE_INT64,   /* INT64 */
181    UPB_TYPE_UINT64,  /* UINT64 */
182    UPB_TYPE_INT32,   /* INT32 */
183    UPB_TYPE_UINT64,  /* FIXED64 */
184    UPB_TYPE_UINT32,  /* FIXED32 */
185    UPB_TYPE_BOOL,    /* BOOL */
186    UPB_TYPE_STRING,  /* STRING */
187    UPB_TYPE_MESSAGE, /* GROUP */
188    UPB_TYPE_MESSAGE, /* MESSAGE */
189    UPB_TYPE_BYTES,   /* BYTES */
190    UPB_TYPE_UINT32,  /* UINT32 */
191    UPB_TYPE_ENUM,    /* ENUM */
192    UPB_TYPE_INT32,   /* SFIXED32 */
193    UPB_TYPE_INT64,   /* SFIXED64 */
194    UPB_TYPE_INT32,   /* SINT32 */
195    UPB_TYPE_INT64,   /* SINT64 */
196};
197
198/* Maps descriptor type -> upb map size.  */
199static const uint8_t desctype_to_mapsize[] = {
200    -1,                 /* invalid descriptor type */
201    8,                  /* DOUBLE */
202    4,                  /* FLOAT */
203    8,                  /* INT64 */
204    8,                  /* UINT64 */
205    4,                  /* INT32 */
206    8,                  /* FIXED64 */
207    4,                  /* FIXED32 */
208    1,                  /* BOOL */
209    UPB_MAPTYPE_STRING, /* STRING */
210    sizeof(void *),     /* GROUP */
211    sizeof(void *),     /* MESSAGE */
212    UPB_MAPTYPE_STRING, /* BYTES */
213    4,                  /* UINT32 */
214    4,                  /* ENUM */
215    4,                  /* SFIXED32 */
216    8,                  /* SFIXED64 */
217    4,                  /* SINT32 */
218    8,                  /* SINT64 */
219};
220
221static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) |
222                                   (1 << UPB_DTYPE_FIXED32) |
223                                   (1 << UPB_DTYPE_SFIXED32);
224
225static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) |
226                                   (1 << UPB_DTYPE_FIXED64) |
227                                   (1 << UPB_DTYPE_SFIXED64);
228
229/* Op: an action to be performed for a wire-type/field-type combination. */
230#define OP_SCALAR_LG2(n) (n)
231#define OP_FIXPCK_LG2(n) (n + 4)
232#define OP_VARPCK_LG2(n) (n + 8)
233#define OP_STRING 4
234#define OP_SUBMSG 5
235
236static const int8_t varint_ops[19] = {
237    -1,               /* field not found */
238    -1,               /* DOUBLE */
239    -1,               /* FLOAT */
240    OP_SCALAR_LG2(3), /* INT64 */
241    OP_SCALAR_LG2(3), /* UINT64 */
242    OP_SCALAR_LG2(2), /* INT32 */
243    -1,               /* FIXED64 */
244    -1,               /* FIXED32 */
245    OP_SCALAR_LG2(0), /* BOOL */
246    -1,               /* STRING */
247    -1,               /* GROUP */
248    -1,               /* MESSAGE */
249    -1,               /* BYTES */
250    OP_SCALAR_LG2(2), /* UINT32 */
251    OP_SCALAR_LG2(2), /* ENUM */
252    -1,               /* SFIXED32 */
253    -1,               /* SFIXED64 */
254    OP_SCALAR_LG2(2), /* SINT32 */
255    OP_SCALAR_LG2(3), /* SINT64 */
256};
257
258static const int8_t delim_ops[37] = {
259    /* For non-repeated field type. */
260    -1,        /* field not found */
261    -1,        /* DOUBLE */
262    -1,        /* FLOAT */
263    -1,        /* INT64 */
264    -1,        /* UINT64 */
265    -1,        /* INT32 */
266    -1,        /* FIXED64 */
267    -1,        /* FIXED32 */
268    -1,        /* BOOL */
269    OP_STRING, /* STRING */
270    -1,        /* GROUP */
271    OP_SUBMSG, /* MESSAGE */
272    OP_STRING, /* BYTES */
273    -1,        /* UINT32 */
274    -1,        /* ENUM */
275    -1,        /* SFIXED32 */
276    -1,        /* SFIXED64 */
277    -1,        /* SINT32 */
278    -1,        /* SINT64 */
279    /* For repeated field type. */
280    OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */
281    OP_FIXPCK_LG2(2), /* REPEATED FLOAT */
282    OP_VARPCK_LG2(3), /* REPEATED INT64 */
283    OP_VARPCK_LG2(3), /* REPEATED UINT64 */
284    OP_VARPCK_LG2(2), /* REPEATED INT32 */
285    OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */
286    OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */
287    OP_VARPCK_LG2(0), /* REPEATED BOOL */
288    OP_STRING,        /* REPEATED STRING */
289    OP_SUBMSG,        /* REPEATED GROUP */
290    OP_SUBMSG,        /* REPEATED MESSAGE */
291    OP_STRING,        /* REPEATED BYTES */
292    OP_VARPCK_LG2(2), /* REPEATED UINT32 */
293    OP_VARPCK_LG2(2), /* REPEATED ENUM */
294    OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */
295    OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */
296    OP_VARPCK_LG2(2), /* REPEATED SINT32 */
297    OP_VARPCK_LG2(3), /* REPEATED SINT64 */
298};
299
300/* Data pertaining to the parse. */
301typedef struct {
302  const char *limit;       /* End of delimited region or end of buffer. */
303  upb_arena *arena;
304  int depth;
305  uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */
306  jmp_buf err;
307} upb_decstate;
308
309typedef union {
310  bool bool_val;
311  int32_t int32_val;
312  int64_t int64_val;
313  uint32_t uint32_val;
314  uint64_t uint64_val;
315  upb_strview str_val;
316} wireval;
317
318static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
319                              const upb_msglayout *layout);
320
321UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); }
322
323static bool decode_reserve(upb_decstate *d, upb_array *arr, int elem) {
324  bool need_realloc = arr->size - arr->len < elem;
325  if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) {
326    decode_err(d);
327  }
328  return need_realloc;
329}
330
331UPB_NOINLINE
332static const char *decode_longvarint64(upb_decstate *d, const char *ptr,
333                                       const char *limit, uint64_t *val) {
334  uint8_t byte;
335  int bitpos = 0;
336  uint64_t out = 0;
337
338  do {
339    if (bitpos >= 70 || ptr == limit) decode_err(d);
340    byte = *ptr;
341    out |= (uint64_t)(byte & 0x7F) << bitpos;
342    ptr++;
343    bitpos += 7;
344  } while (byte & 0x80);
345
346  *val = out;
347  return ptr;
348}
349
350UPB_FORCEINLINE
351static const char *decode_varint64(upb_decstate *d, const char *ptr,
352                                   const char *limit, uint64_t *val) {
353  if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) {
354    *val = (uint8_t)*ptr;
355    return ptr + 1;
356  } else {
357    return decode_longvarint64(d, ptr, limit, val);
358  }
359}
360
361static const char *decode_varint32(upb_decstate *d, const char *ptr,
362                                   const char *limit, uint32_t *val) {
363  uint64_t u64;
364  ptr = decode_varint64(d, ptr, limit, &u64);
365  if (u64 > UINT32_MAX) decode_err(d);
366  *val = (uint32_t)u64;
367  return ptr;
368}
369
370static void decode_munge(int type, wireval *val) {
371  switch (type) {
372    case UPB_DESCRIPTOR_TYPE_BOOL:
373      val->bool_val = val->uint64_val != 0;
374      break;
375    case UPB_DESCRIPTOR_TYPE_SINT32: {
376      uint32_t n = val->uint32_val;
377      val->int32_val = (n >> 1) ^ -(int32_t)(n & 1);
378      break;
379    }
380    case UPB_DESCRIPTOR_TYPE_SINT64: {
381      uint64_t n = val->uint64_val;
382      val->int64_val = (n >> 1) ^ -(int64_t)(n & 1);
383      break;
384    }
385  }
386}
387
388static const upb_msglayout_field *upb_find_field(const upb_msglayout *l,
389                                                 uint32_t field_number) {
390  static upb_msglayout_field none = {0};
391
392  /* Lots of optimization opportunities here. */
393  int i;
394  if (l == NULL) return &none;
395  for (i = 0; i < l->field_count; i++) {
396    if (l->fields[i].number == field_number) {
397      return &l->fields[i];
398    }
399  }
400
401  return &none; /* Unknown field. */
402}
403
404static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout,
405                                 const upb_msglayout_field *field) {
406  const upb_msglayout *subl = layout->submsgs[field->submsg_index];
407  return _upb_msg_new(subl, d->arena);
408}
409
410static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg,
411                            const upb_msglayout *layout,
412                            const upb_msglayout_field *field, upb_strview val) {
413  const upb_msglayout *subl = layout->submsgs[field->submsg_index];
414  const char *saved_limit = d->limit;
415  if (--d->depth < 0) decode_err(d);
416  d->limit = val.data + val.size;
417  decode_msg(d, val.data, submsg, subl);
418  d->limit = saved_limit;
419  if (d->end_group != 0) decode_err(d);
420  d->depth++;
421}
422
423static const char *decode_group(upb_decstate *d, const char *ptr,
424                                upb_msg *submsg, const upb_msglayout *subl,
425                                uint32_t number) {
426  if (--d->depth < 0) decode_err(d);
427  ptr = decode_msg(d, ptr, submsg, subl);
428  if (d->end_group != number) decode_err(d);
429  d->end_group = 0;
430  d->depth++;
431  return ptr;
432}
433
434static const char *decode_togroup(upb_decstate *d, const char *ptr,
435                                  upb_msg *submsg, const upb_msglayout *layout,
436                                  const upb_msglayout_field *field) {
437  const upb_msglayout *subl = layout->submsgs[field->submsg_index];
438  return decode_group(d, ptr, submsg, subl, field->number);
439}
440
441static const char *decode_toarray(upb_decstate *d, const char *ptr,
442                                  upb_msg *msg, const upb_msglayout *layout,
443                                  const upb_msglayout_field *field, wireval val,
444                                  int op) {
445  upb_array **arrp = UPB_PTR_AT(msg, field->offset, void);
446  upb_array *arr = *arrp;
447  void *mem;
448
449  if (!arr) {
450    upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype];
451    arr = _upb_array_new(d->arena, type);
452    if (!arr) decode_err(d);
453    *arrp = arr;
454  }
455
456  decode_reserve(d, arr, 1);
457
458  switch (op) {
459    case OP_SCALAR_LG2(0):
460    case OP_SCALAR_LG2(2):
461    case OP_SCALAR_LG2(3):
462      /* Append scalar value. */
463      mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void);
464      arr->len++;
465      memcpy(mem, &val, 1 << op);
466      return ptr;
467    case OP_STRING:
468      /* Append string. */
469      mem =
470          UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void);
471      arr->len++;
472      memcpy(mem, &val, sizeof(upb_strview));
473      return ptr;
474    case OP_SUBMSG: {
475      /* Append submessage / group. */
476      upb_msg *submsg = decode_newsubmsg(d, layout, field);
477      *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) =
478          submsg;
479      arr->len++;
480      if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) {
481        ptr = decode_togroup(d, ptr, submsg, layout, field);
482      } else {
483        decode_tosubmsg(d, submsg, layout, field, val.str_val);
484      }
485      return ptr;
486    }
487    case OP_FIXPCK_LG2(2):
488    case OP_FIXPCK_LG2(3): {
489      /* Fixed packed. */
490      int lg2 = op - OP_FIXPCK_LG2(0);
491      int mask = (1 << lg2) - 1;
492      int count = val.str_val.size >> lg2;
493      if ((val.str_val.size & mask) != 0) {
494        decode_err(d); /* Length isn't a round multiple of elem size. */
495      }
496      decode_reserve(d, arr, count);
497      mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
498      arr->len += count;
499      memcpy(mem, val.str_val.data, count << op);
500      return ptr;
501    }
502    case OP_VARPCK_LG2(0):
503    case OP_VARPCK_LG2(2):
504    case OP_VARPCK_LG2(3): {
505      /* Varint packed. */
506      int lg2 = op - OP_VARPCK_LG2(0);
507      int scale = 1 << lg2;
508      const char *ptr = val.str_val.data;
509      const char *end = ptr + val.str_val.size;
510      char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
511      while (ptr < end) {
512        wireval elem;
513        ptr = decode_varint64(d, ptr, end, &elem.uint64_val);
514        decode_munge(field->descriptortype, &elem);
515        if (decode_reserve(d, arr, 1)) {
516          out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void);
517        }
518        arr->len++;
519        memcpy(out, &elem, scale);
520        out += scale;
521      }
522      if (ptr != end) decode_err(d);
523      return ptr;
524    }
525    default:
526      UPB_UNREACHABLE();
527  }
528}
529
530static void decode_tomap(upb_decstate *d, upb_msg *msg,
531                         const upb_msglayout *layout,
532                         const upb_msglayout_field *field, wireval val) {
533  upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *);
534  upb_map *map = *map_p;
535  upb_map_entry ent;
536  const upb_msglayout *entry = layout->submsgs[field->submsg_index];
537
538  if (!map) {
539    /* Lazily create map. */
540    const upb_msglayout *entry = layout->submsgs[field->submsg_index];
541    const upb_msglayout_field *key_field = &entry->fields[0];
542    const upb_msglayout_field *val_field = &entry->fields[1];
543    char key_size = desctype_to_mapsize[key_field->descriptortype];
544    char val_size = desctype_to_mapsize[val_field->descriptortype];
545    UPB_ASSERT(key_field->offset == 0);
546    UPB_ASSERT(val_field->offset == sizeof(upb_strview));
547    map = _upb_map_new(d->arena, key_size, val_size);
548    *map_p = map;
549  }
550
551  /* Parse map entry. */
552  memset(&ent, 0, sizeof(ent));
553
554  if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
555      entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) {
556    /* Create proactively to handle the case where it doesn't appear. */
557    ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena);
558  }
559
560  decode_tosubmsg(d, &ent.k, layout, field, val.str_val);
561
562  /* Insert into map. */
563  _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena);
564}
565
566static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg,
567                                const upb_msglayout *layout,
568                                const upb_msglayout_field *field, wireval val,
569                                int op) {
570  void *mem = UPB_PTR_AT(msg, field->offset, void);
571  int type = field->descriptortype;
572
573  /* Set presence if necessary. */
574  if (field->presence < 0) {
575    /* Oneof case */
576    *UPB_PTR_AT(msg, -field->presence, int32_t) = field->number;
577  } else if (field->presence > 0) {
578    /* Hasbit */
579    uint32_t hasbit = field->presence;
580    *UPB_PTR_AT(msg, hasbit / 8, uint8_t) |= (1 << (hasbit % 8));
581  }
582
583  /* Store into message. */
584  switch (op) {
585    case OP_SUBMSG: {
586      upb_msg **submsgp = mem;
587      upb_msg *submsg = *submsgp;
588      if (!submsg) {
589        submsg = decode_newsubmsg(d, layout, field);
590        *submsgp = submsg;
591      }
592      if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) {
593        ptr = decode_togroup(d, ptr, submsg, layout, field);
594      } else {
595        decode_tosubmsg(d, submsg, layout, field, val.str_val);
596      }
597      break;
598    }
599    case OP_STRING:
600      memcpy(mem, &val, sizeof(upb_strview));
601      break;
602    case OP_SCALAR_LG2(3):
603      memcpy(mem, &val, 8);
604      break;
605    case OP_SCALAR_LG2(2):
606      memcpy(mem, &val, 4);
607      break;
608    case OP_SCALAR_LG2(0):
609      memcpy(mem, &val, 1);
610      break;
611    default:
612      UPB_UNREACHABLE();
613  }
614
615  return ptr;
616}
617
618static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg,
619                              const upb_msglayout *layout) {
620  while (ptr < d->limit) {
621    uint32_t tag;
622    const upb_msglayout_field *field;
623    int field_number;
624    int wire_type;
625    const char *field_start = ptr;
626    wireval val;
627    int op;
628
629    ptr = decode_varint32(d, ptr, d->limit, &tag);
630    field_number = tag >> 3;
631    wire_type = tag & 7;
632
633    field = upb_find_field(layout, field_number);
634
635    switch (wire_type) {
636      case UPB_WIRE_TYPE_VARINT:
637        ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val);
638        op = varint_ops[field->descriptortype];
639        decode_munge(field->descriptortype, &val);
640        break;
641      case UPB_WIRE_TYPE_32BIT:
642        if (d->limit - ptr < 4) decode_err(d);
643        memcpy(&val, ptr, 4);
644        ptr += 4;
645        op = OP_SCALAR_LG2(2);
646        if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown;
647        break;
648      case UPB_WIRE_TYPE_64BIT:
649        if (d->limit - ptr < 8) decode_err(d);
650        memcpy(&val, ptr, 8);
651        ptr += 8;
652        op = OP_SCALAR_LG2(3);
653        if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown;
654        break;
655      case UPB_WIRE_TYPE_DELIMITED: {
656        uint32_t size;
657        int ndx = field->descriptortype;
658        if (_upb_isrepeated(field)) ndx += 18;
659        ptr = decode_varint32(d, ptr, d->limit, &size);
660        if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) {
661          decode_err(d); /* Length overflow. */
662        }
663        val.str_val.data = ptr;
664        val.str_val.size = size;
665        ptr += size;
666        op = delim_ops[ndx];
667        break;
668      }
669      case UPB_WIRE_TYPE_START_GROUP:
670        val.int32_val = field_number;
671        op = OP_SUBMSG;
672        if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown;
673        break;
674      case UPB_WIRE_TYPE_END_GROUP:
675        d->end_group = field_number;
676        return ptr;
677      default:
678        decode_err(d);
679    }
680
681    if (op >= 0) {
682      /* Parse, using op for dispatch. */
683      switch (field->label) {
684        case UPB_LABEL_REPEATED:
685        case _UPB_LABEL_PACKED:
686          ptr = decode_toarray(d, ptr, msg, layout, field, val, op);
687          break;
688        case _UPB_LABEL_MAP:
689          decode_tomap(d, msg, layout, field, val);
690          break;
691        default:
692          ptr = decode_tomsg(d, ptr, msg, layout, field, val, op);
693          break;
694      }
695    } else {
696    unknown:
697      /* Skip unknown field. */
698      if (field_number == 0) decode_err(d);
699      if (wire_type == UPB_WIRE_TYPE_START_GROUP) {
700        ptr = decode_group(d, ptr, NULL, NULL, field_number);
701      }
702      if (msg) {
703        if (!_upb_msg_addunknown(msg, field_start, ptr - field_start,
704                                 d->arena)) {
705          decode_err(d);
706        }
707      }
708    }
709  }
710
711  if (ptr != d->limit) decode_err(d);
712  return ptr;
713}
714
715bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l,
716                upb_arena *arena) {
717  upb_decstate state;
718  state.limit = buf + size;
719  state.arena = arena;
720  state.depth = 64;
721  state.end_group = 0;
722
723  if (setjmp(state.err)) return false;
724
725  if (size == 0) return true;
726  decode_msg(&state, buf, msg, l);
727
728  return state.end_group == 0;
729}
730
731#undef OP_SCALAR_LG2
732#undef OP_FIXPCK_LG2
733#undef OP_VARPCK_LG2
734#undef OP_STRING
735#undef OP_SUBMSG
736/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */
737
738
739#include <string.h>
740
741
742
743#define UPB_PB_VARINT_MAX_LEN 10
744#define CHK(x) do { if (!(x)) { return false; } } while(0)
745
746static size_t upb_encode_varint(uint64_t val, char *buf) {
747  size_t i;
748  if (val < 128) { buf[0] = val; return 1; }
749  i = 0;
750  while (val) {
751    uint8_t byte = val & 0x7fU;
752    val >>= 7;
753    if (val) byte |= 0x80U;
754    buf[i++] = byte;
755  }
756  return i;
757}
758
759static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); }
760static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); }
761
762typedef struct {
763  upb_alloc *alloc;
764  char *buf, *ptr, *limit;
765} upb_encstate;
766
767static size_t upb_roundup_pow2(size_t bytes) {
768  size_t ret = 128;
769  while (ret < bytes) {
770    ret *= 2;
771  }
772  return ret;
773}
774
775static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) {
776  size_t old_size = e->limit - e->buf;
777  size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr));
778  char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size);
779  CHK(new_buf);
780
781  /* We want previous data at the end, realloc() put it at the beginning. */
782  if (old_size > 0) {
783    memmove(new_buf + new_size - old_size, e->buf, old_size);
784  }
785
786  e->ptr = new_buf + new_size - (e->limit - e->ptr);
787  e->limit = new_buf + new_size;
788  e->buf = new_buf;
789  return true;
790}
791
792/* Call to ensure that at least "bytes" bytes are available for writing at
793 * e->ptr.  Returns false if the bytes could not be allocated. */
794static bool upb_encode_reserve(upb_encstate *e, size_t bytes) {
795  CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) ||
796      upb_encode_growbuffer(e, bytes));
797
798  e->ptr -= bytes;
799  return true;
800}
801
802/* Writes the given bytes to the buffer, handling reserve/advance. */
803static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) {
804  if (len == 0) return true;
805  CHK(upb_encode_reserve(e, len));
806  memcpy(e->ptr, data, len);
807  return true;
808}
809
810static bool upb_put_fixed64(upb_encstate *e, uint64_t val) {
811  /* TODO(haberman): byte-swap for big endian. */
812  return upb_put_bytes(e, &val, sizeof(uint64_t));
813}
814
815static bool upb_put_fixed32(upb_encstate *e, uint32_t val) {
816  /* TODO(haberman): byte-swap for big endian. */
817  return upb_put_bytes(e, &val, sizeof(uint32_t));
818}
819
820static bool upb_put_varint(upb_encstate *e, uint64_t val) {
821  size_t len;
822  char *start;
823  CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN));
824  len = upb_encode_varint(val, e->ptr);
825  start = e->ptr + UPB_PB_VARINT_MAX_LEN - len;
826  memmove(start, e->ptr, len);
827  e->ptr = start;
828  return true;
829}
830
831static bool upb_put_double(upb_encstate *e, double d) {
832  uint64_t u64;
833  UPB_ASSERT(sizeof(double) == sizeof(uint64_t));
834  memcpy(&u64, &d, sizeof(uint64_t));
835  return upb_put_fixed64(e, u64);
836}
837
838static bool upb_put_float(upb_encstate *e, float d) {
839  uint32_t u32;
840  UPB_ASSERT(sizeof(float) == sizeof(uint32_t));
841  memcpy(&u32, &d, sizeof(uint32_t));
842  return upb_put_fixed32(e, u32);
843}
844
845static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) {
846  uint32_t ret;
847  memcpy(&ret, msg - f->presence, sizeof(ret));
848  return ret;
849}
850
851static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) {
852  uint32_t hasbit = f->presence;
853  UPB_ASSERT(f->presence > 0);
854  return (*UPB_PTR_AT(msg, hasbit / 8, uint8_t)) & (1 << (hasbit % 8));
855}
856
857static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) {
858  return upb_put_varint(e, (field_number << 3) | wire_type);
859}
860
861static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr,
862                               size_t elem_size, uint32_t tag) {
863  size_t bytes = arr->len * elem_size;
864  const char* data = _upb_array_constptr(arr);
865  const char* ptr = data + bytes - elem_size;
866  if (tag) {
867    while (true) {
868      CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag));
869      if (ptr == data) break;
870      ptr -= elem_size;
871    }
872    return true;
873  } else {
874    return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes);
875  }
876}
877
878bool upb_encode_message(upb_encstate *e, const char *msg,
879                        const upb_msglayout *m, size_t *size);
880
881static bool upb_encode_scalarfield(upb_encstate *e, const void *_field_mem,
882                                   const upb_msglayout *m,
883                                   const upb_msglayout_field *f,
884                                   bool skip_zero_value) {
885  const char *field_mem = _field_mem;
886#define CASE(ctype, type, wire_type, encodeval) do { \
887  ctype val = *(ctype*)field_mem; \
888  if (skip_zero_value && val == 0) { \
889    return true; \
890  } \
891  return upb_put_ ## type(e, encodeval) && \
892      upb_put_tag(e, f->number, wire_type); \
893} while(0)
894
895  switch (f->descriptortype) {
896    case UPB_DESCRIPTOR_TYPE_DOUBLE:
897      CASE(double, double, UPB_WIRE_TYPE_64BIT, val);
898    case UPB_DESCRIPTOR_TYPE_FLOAT:
899      CASE(float, float, UPB_WIRE_TYPE_32BIT, val);
900    case UPB_DESCRIPTOR_TYPE_INT64:
901    case UPB_DESCRIPTOR_TYPE_UINT64:
902      CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val);
903    case UPB_DESCRIPTOR_TYPE_UINT32:
904      CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val);
905    case UPB_DESCRIPTOR_TYPE_INT32:
906    case UPB_DESCRIPTOR_TYPE_ENUM:
907      CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val);
908    case UPB_DESCRIPTOR_TYPE_SFIXED64:
909    case UPB_DESCRIPTOR_TYPE_FIXED64:
910      CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val);
911    case UPB_DESCRIPTOR_TYPE_FIXED32:
912    case UPB_DESCRIPTOR_TYPE_SFIXED32:
913      CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val);
914    case UPB_DESCRIPTOR_TYPE_BOOL:
915      CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val);
916    case UPB_DESCRIPTOR_TYPE_SINT32:
917      CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val));
918    case UPB_DESCRIPTOR_TYPE_SINT64:
919      CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val));
920    case UPB_DESCRIPTOR_TYPE_STRING:
921    case UPB_DESCRIPTOR_TYPE_BYTES: {
922      upb_strview view = *(upb_strview*)field_mem;
923      if (skip_zero_value && view.size == 0) {
924        return true;
925      }
926      return upb_put_bytes(e, view.data, view.size) &&
927          upb_put_varint(e, view.size) &&
928          upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
929    }
930    case UPB_DESCRIPTOR_TYPE_GROUP: {
931      size_t size;
932      void *submsg = *(void **)field_mem;
933      const upb_msglayout *subm = m->submsgs[f->submsg_index];
934      if (submsg == NULL) {
935        return true;
936      }
937      return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
938          upb_encode_message(e, submsg, subm, &size) &&
939          upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP);
940    }
941    case UPB_DESCRIPTOR_TYPE_MESSAGE: {
942      size_t size;
943      void *submsg = *(void **)field_mem;
944      const upb_msglayout *subm = m->submsgs[f->submsg_index];
945      if (submsg == NULL) {
946        return true;
947      }
948      return upb_encode_message(e, submsg, subm, &size) &&
949          upb_put_varint(e, size) &&
950          upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED);
951    }
952  }
953#undef CASE
954  UPB_UNREACHABLE();
955}
956
957static bool upb_encode_array(upb_encstate *e, const char *field_mem,
958                             const upb_msglayout *m,
959                             const upb_msglayout_field *f) {
960  const upb_array *arr = *(const upb_array**)field_mem;
961  bool packed = f->label == _UPB_LABEL_PACKED;
962
963  if (arr == NULL || arr->len == 0) {
964    return true;
965  }
966
967#define VARINT_CASE(ctype, encode)                                       \
968  {                                                                      \
969    const ctype *start = _upb_array_constptr(arr);                       \
970    const ctype *ptr = start + arr->len;                                 \
971    size_t pre_len = e->limit - e->ptr;                                  \
972    uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \
973    do {                                                                 \
974      ptr--;                                                             \
975      CHK(upb_put_varint(e, encode));                                    \
976      if (tag) CHK(upb_put_varint(e, tag));                              \
977    } while (ptr != start);                                              \
978    if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len));       \
979  }                                                                      \
980  break;                                                                 \
981  do {                                                                   \
982    ;                                                                    \
983  } while (0)
984
985#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type))
986
987  switch (f->descriptortype) {
988    case UPB_DESCRIPTOR_TYPE_DOUBLE:
989      CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT)));
990      break;
991    case UPB_DESCRIPTOR_TYPE_FLOAT:
992      CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT)));
993      break;
994    case UPB_DESCRIPTOR_TYPE_SFIXED64:
995    case UPB_DESCRIPTOR_TYPE_FIXED64:
996      CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT)));
997      break;
998    case UPB_DESCRIPTOR_TYPE_FIXED32:
999    case UPB_DESCRIPTOR_TYPE_SFIXED32:
1000      CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT)));
1001      break;
1002    case UPB_DESCRIPTOR_TYPE_INT64:
1003    case UPB_DESCRIPTOR_TYPE_UINT64:
1004      VARINT_CASE(uint64_t, *ptr);
1005    case UPB_DESCRIPTOR_TYPE_UINT32:
1006      VARINT_CASE(uint32_t, *ptr);
1007    case UPB_DESCRIPTOR_TYPE_INT32:
1008    case UPB_DESCRIPTOR_TYPE_ENUM:
1009      VARINT_CASE(int32_t, (int64_t)*ptr);
1010    case UPB_DESCRIPTOR_TYPE_BOOL:
1011      VARINT_CASE(bool, *ptr);
1012    case UPB_DESCRIPTOR_TYPE_SINT32:
1013      VARINT_CASE(int32_t, upb_zzencode_32(*ptr));
1014    case UPB_DESCRIPTOR_TYPE_SINT64:
1015      VARINT_CASE(int64_t, upb_zzencode_64(*ptr));
1016    case UPB_DESCRIPTOR_TYPE_STRING:
1017    case UPB_DESCRIPTOR_TYPE_BYTES: {
1018      const upb_strview *start = _upb_array_constptr(arr);
1019      const upb_strview *ptr = start + arr->len;
1020      do {
1021        ptr--;
1022        CHK(upb_put_bytes(e, ptr->data, ptr->size) &&
1023            upb_put_varint(e, ptr->size) &&
1024            upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1025      } while (ptr != start);
1026      return true;
1027    }
1028    case UPB_DESCRIPTOR_TYPE_GROUP: {
1029      const void *const*start = _upb_array_constptr(arr);
1030      const void *const*ptr = start + arr->len;
1031      const upb_msglayout *subm = m->submsgs[f->submsg_index];
1032      do {
1033        size_t size;
1034        ptr--;
1035        CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) &&
1036            upb_encode_message(e, *ptr, subm, &size) &&
1037            upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP));
1038      } while (ptr != start);
1039      return true;
1040    }
1041    case UPB_DESCRIPTOR_TYPE_MESSAGE: {
1042      const void *const*start = _upb_array_constptr(arr);
1043      const void *const*ptr = start + arr->len;
1044      const upb_msglayout *subm = m->submsgs[f->submsg_index];
1045      do {
1046        size_t size;
1047        ptr--;
1048        CHK(upb_encode_message(e, *ptr, subm, &size) &&
1049            upb_put_varint(e, size) &&
1050            upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1051      } while (ptr != start);
1052      return true;
1053    }
1054  }
1055#undef VARINT_CASE
1056
1057  if (packed) {
1058    CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1059  }
1060  return true;
1061}
1062
1063static bool upb_encode_map(upb_encstate *e, const char *field_mem,
1064                           const upb_msglayout *m,
1065                           const upb_msglayout_field *f) {
1066  const upb_map *map = *(const upb_map**)field_mem;
1067  const upb_msglayout *entry = m->submsgs[f->submsg_index];
1068  const upb_msglayout_field *key_field = &entry->fields[0];
1069  const upb_msglayout_field *val_field = &entry->fields[1];
1070  upb_strtable_iter i;
1071  if (map == NULL) {
1072    return true;
1073  }
1074
1075  upb_strtable_begin(&i, &map->table);
1076  for(; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1077    size_t pre_len = e->limit - e->ptr;
1078    size_t size;
1079    upb_strview key = upb_strtable_iter_key(&i);
1080    const upb_value val = upb_strtable_iter_value(&i);
1081    const void *keyp =
1082        map->key_size == UPB_MAPTYPE_STRING ? (void *)&key : key.data;
1083    const void *valp =
1084        map->val_size == UPB_MAPTYPE_STRING ? upb_value_getptr(val) : &val;
1085
1086    CHK(upb_encode_scalarfield(e, valp, entry, val_field, false));
1087    CHK(upb_encode_scalarfield(e, keyp, entry, key_field, false));
1088    size = (e->limit - e->ptr) - pre_len;
1089    CHK(upb_put_varint(e, size));
1090    CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED));
1091  }
1092
1093  return true;
1094}
1095
1096
1097bool upb_encode_message(upb_encstate *e, const char *msg,
1098                        const upb_msglayout *m, size_t *size) {
1099  int i;
1100  size_t pre_len = e->limit - e->ptr;
1101  const char *unknown;
1102  size_t unknown_size;
1103
1104  unknown = upb_msg_getunknown(msg, &unknown_size);
1105
1106  if (unknown) {
1107    upb_put_bytes(e, unknown, unknown_size);
1108  }
1109
1110  for (i = m->field_count - 1; i >= 0; i--) {
1111    const upb_msglayout_field *f = &m->fields[i];
1112
1113    if (_upb_isrepeated(f)) {
1114      CHK(upb_encode_array(e, msg + f->offset, m, f));
1115    } else if (f->label == _UPB_LABEL_MAP) {
1116      CHK(upb_encode_map(e, msg + f->offset, m, f));
1117    } else {
1118      bool skip_empty = false;
1119      if (f->presence == 0) {
1120        /* Proto3 presence. */
1121        skip_empty = true;
1122      } else if (f->presence > 0) {
1123        /* Proto2 presence: hasbit. */
1124        if (!upb_readhasbit(msg, f)) {
1125          continue;
1126        }
1127      } else {
1128        /* Field is in a oneof. */
1129        if (upb_readcase(msg, f) != f->number) {
1130          continue;
1131        }
1132      }
1133      CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty));
1134    }
1135  }
1136
1137  *size = (e->limit - e->ptr) - pre_len;
1138  return true;
1139}
1140
1141char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena,
1142                 size_t *size) {
1143  upb_encstate e;
1144  e.alloc = upb_arena_alloc(arena);
1145  e.buf = NULL;
1146  e.limit = NULL;
1147  e.ptr = NULL;
1148
1149  if (!upb_encode_message(&e, msg, m, size)) {
1150    *size = 0;
1151    return NULL;
1152  }
1153
1154  *size = e.limit - e.ptr;
1155
1156  if (*size == 0) {
1157    static char ch;
1158    return &ch;
1159  } else {
1160    UPB_ASSERT(e.ptr);
1161    return e.ptr;
1162  }
1163}
1164
1165#undef CHK
1166
1167
1168
1169
1170/** upb_msg *******************************************************************/
1171
1172static const char _upb_fieldtype_to_sizelg2[12] = {
1173  0,
1174  0,  /* UPB_TYPE_BOOL */
1175  2,  /* UPB_TYPE_FLOAT */
1176  2,  /* UPB_TYPE_INT32 */
1177  2,  /* UPB_TYPE_UINT32 */
1178  2,  /* UPB_TYPE_ENUM */
1179  UPB_SIZE(2, 3),  /* UPB_TYPE_MESSAGE */
1180  3,  /* UPB_TYPE_DOUBLE */
1181  3,  /* UPB_TYPE_INT64 */
1182  3,  /* UPB_TYPE_UINT64 */
1183  UPB_SIZE(3, 4),  /* UPB_TYPE_STRING */
1184  UPB_SIZE(3, 4),  /* UPB_TYPE_BYTES */
1185};
1186
1187static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) {
1188  UPB_ASSERT(elem_size_lg2 <= 4);
1189  return (uintptr_t)ptr | elem_size_lg2;
1190}
1191
1192static int upb_msg_internalsize(const upb_msglayout *l) {
1193  return sizeof(upb_msg_internal) - l->extendable * sizeof(void *);
1194}
1195
1196static size_t upb_msg_sizeof(const upb_msglayout *l) {
1197  return l->size + upb_msg_internalsize(l);
1198}
1199
1200static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) {
1201  return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
1202}
1203
1204static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) {
1205  return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal);
1206}
1207
1208static upb_msg_internal_withext *upb_msg_getinternalwithext(
1209    upb_msg *msg, const upb_msglayout *l) {
1210  UPB_ASSERT(l->extendable);
1211  return UPB_PTR_AT(msg, -sizeof(upb_msg_internal_withext),
1212                    upb_msg_internal_withext);
1213}
1214
1215upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) {
1216  void *mem = upb_arena_malloc(a, upb_msg_sizeof(l));
1217  upb_msg_internal *in;
1218  upb_msg *msg;
1219
1220  if (!mem) {
1221    return NULL;
1222  }
1223
1224  msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg);
1225
1226  /* Initialize normal members. */
1227  memset(msg, 0, l->size);
1228
1229  /* Initialize internal members. */
1230  in = upb_msg_getinternal(msg);
1231  in->unknown = NULL;
1232  in->unknown_len = 0;
1233  in->unknown_size = 0;
1234
1235  if (l->extendable) {
1236    upb_msg_getinternalwithext(msg, l)->extdict = NULL;
1237  }
1238
1239  return msg;
1240}
1241
1242bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len,
1243                         upb_arena *arena) {
1244  upb_msg_internal *in = upb_msg_getinternal(msg);
1245  if (len > in->unknown_size - in->unknown_len) {
1246    upb_alloc *alloc = upb_arena_alloc(arena);
1247    size_t need = in->unknown_size + len;
1248    size_t newsize = UPB_MAX(in->unknown_size * 2, need);
1249    void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize);
1250    if (!mem) return false;
1251    in->unknown = mem;
1252    in->unknown_size = newsize;
1253  }
1254  memcpy(in->unknown + in->unknown_len, data, len);
1255  in->unknown_len += len;
1256  return true;
1257}
1258
1259const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) {
1260  const upb_msg_internal *in = upb_msg_getinternal_const(msg);
1261  *len = in->unknown_len;
1262  return in->unknown;
1263}
1264
1265/** upb_array *****************************************************************/
1266
1267upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) {
1268  upb_array *arr = upb_arena_malloc(a, sizeof(upb_array));
1269
1270  if (!arr) {
1271    return NULL;
1272  }
1273
1274  arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]);
1275  arr->len = 0;
1276  arr->size = 0;
1277
1278  return arr;
1279}
1280
1281bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) {
1282  size_t new_size = UPB_MAX(arr->size, 4);
1283  int elem_size_lg2 = arr->data & 7;
1284  size_t old_bytes = arr->size << elem_size_lg2;
1285  size_t new_bytes;
1286  void* ptr = _upb_array_ptr(arr);
1287
1288  /* Log2 ceiling of size. */
1289  while (new_size < min_size) new_size *= 2;
1290
1291  new_bytes = new_size << elem_size_lg2;
1292  ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes);
1293
1294  if (!ptr) {
1295    return false;
1296  }
1297
1298  arr->data = tag_arrptr(ptr, elem_size_lg2);
1299  arr->size = new_size;
1300  return true;
1301}
1302
1303static upb_array *getorcreate_array(upb_array **arr_ptr, upb_fieldtype_t type,
1304                                    upb_arena *arena) {
1305  upb_array *arr = *arr_ptr;
1306  if (!arr) {
1307    arr = _upb_array_new(arena, type);
1308    if (!arr) return NULL;
1309    *arr_ptr = arr;
1310  }
1311  return arr;
1312}
1313
1314static bool resize_array(upb_array *arr, size_t size, upb_arena *arena) {
1315  if (size > arr->size && !_upb_array_realloc(arr, size, arena)) {
1316    return false;
1317  }
1318
1319  arr->len = size;
1320  return true;
1321}
1322
1323void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size,
1324                                 upb_fieldtype_t type, upb_arena *arena) {
1325  upb_array *arr = getorcreate_array(arr_ptr, type, arena);
1326  return arr && resize_array(arr, size, arena) ? _upb_array_ptr(arr) : NULL;
1327}
1328
1329bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value,
1330                                upb_fieldtype_t type, upb_arena *arena) {
1331  upb_array *arr = getorcreate_array(arr_ptr, type, arena);
1332  size_t elem = arr->len;
1333  int lg2 = _upb_fieldtype_to_sizelg2[type];
1334  char *data;
1335
1336  if (!arr || !resize_array(arr, elem + 1, arena)) return false;
1337
1338  data = _upb_array_ptr(arr);
1339  memcpy(data + (elem << lg2), value, 1 << lg2);
1340  return true;
1341}
1342
1343/** upb_map *******************************************************************/
1344
1345upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) {
1346  upb_map *map = upb_arena_malloc(a, sizeof(upb_map));
1347
1348  if (!map) {
1349    return NULL;
1350  }
1351
1352  upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a));
1353  map->key_size = key_size;
1354  map->val_size = value_size;
1355
1356  return map;
1357}
1358/*
1359** upb_table Implementation
1360**
1361** Implementation is heavily inspired by Lua's ltable.c.
1362*/
1363
1364
1365#include <string.h>
1366
1367
1368#define UPB_MAXARRSIZE 16  /* 64k. */
1369
1370/* From Chromium. */
1371#define ARRAY_SIZE(x) \
1372    ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x])))))
1373
1374static const double MAX_LOAD = 0.85;
1375
1376/* The minimum utilization of the array part of a mixed hash/array table.  This
1377 * is a speed/memory-usage tradeoff (though it's not straightforward because of
1378 * cache effects).  The lower this is, the more memory we'll use. */
1379static const double MIN_DENSITY = 0.1;
1380
1381bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; }
1382
1383int log2ceil(uint64_t v) {
1384  int ret = 0;
1385  bool pow2 = is_pow2(v);
1386  while (v >>= 1) ret++;
1387  ret = pow2 ? ret : ret + 1;  /* Ceiling. */
1388  return UPB_MIN(UPB_MAXARRSIZE, ret);
1389}
1390
1391char *upb_strdup(const char *s, upb_alloc *a) {
1392  return upb_strdup2(s, strlen(s), a);
1393}
1394
1395char *upb_strdup2(const char *s, size_t len, upb_alloc *a) {
1396  size_t n;
1397  char *p;
1398
1399  /* Prevent overflow errors. */
1400  if (len == SIZE_MAX) return NULL;
1401  /* Always null-terminate, even if binary data; but don't rely on the input to
1402   * have a null-terminating byte since it may be a raw binary buffer. */
1403  n = len + 1;
1404  p = upb_malloc(a, n);
1405  if (p) {
1406    memcpy(p, s, len);
1407    p[len] = 0;
1408  }
1409  return p;
1410}
1411
1412/* A type to represent the lookup key of either a strtable or an inttable. */
1413typedef union {
1414  uintptr_t num;
1415  struct {
1416    const char *str;
1417    size_t len;
1418  } str;
1419} lookupkey_t;
1420
1421static lookupkey_t strkey2(const char *str, size_t len) {
1422  lookupkey_t k;
1423  k.str.str = str;
1424  k.str.len = len;
1425  return k;
1426}
1427
1428static lookupkey_t intkey(uintptr_t key) {
1429  lookupkey_t k;
1430  k.num = key;
1431  return k;
1432}
1433
1434typedef uint32_t hashfunc_t(upb_tabkey key);
1435typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2);
1436
1437/* Base table (shared code) ***************************************************/
1438
1439/* For when we need to cast away const. */
1440static upb_tabent *mutable_entries(upb_table *t) {
1441  return (upb_tabent*)t->entries;
1442}
1443
1444static bool isfull(upb_table *t) {
1445  if (upb_table_size(t) == 0) {
1446    return true;
1447  } else {
1448    return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD;
1449  }
1450}
1451
1452static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) {
1453  size_t bytes;
1454
1455  t->count = 0;
1456  t->size_lg2 = size_lg2;
1457  t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0;
1458  bytes = upb_table_size(t) * sizeof(upb_tabent);
1459  if (bytes > 0) {
1460    t->entries = upb_malloc(a, bytes);
1461    if (!t->entries) return false;
1462    memset(mutable_entries(t), 0, bytes);
1463  } else {
1464    t->entries = NULL;
1465  }
1466  return true;
1467}
1468
1469static void uninit(upb_table *t, upb_alloc *a) {
1470  upb_free(a, mutable_entries(t));
1471}
1472
1473static upb_tabent *emptyent(upb_table *t) {
1474  upb_tabent *e = mutable_entries(t) + upb_table_size(t);
1475  while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); }
1476}
1477
1478static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) {
1479  return (upb_tabent*)upb_getentry(t, hash);
1480}
1481
1482static const upb_tabent *findentry(const upb_table *t, lookupkey_t key,
1483                                   uint32_t hash, eqlfunc_t *eql) {
1484  const upb_tabent *e;
1485
1486  if (t->size_lg2 == 0) return NULL;
1487  e = upb_getentry(t, hash);
1488  if (upb_tabent_isempty(e)) return NULL;
1489  while (1) {
1490    if (eql(e->key, key)) return e;
1491    if ((e = e->next) == NULL) return NULL;
1492  }
1493}
1494
1495static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key,
1496                                     uint32_t hash, eqlfunc_t *eql) {
1497  return (upb_tabent*)findentry(t, key, hash, eql);
1498}
1499
1500static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v,
1501                   uint32_t hash, eqlfunc_t *eql) {
1502  const upb_tabent *e = findentry(t, key, hash, eql);
1503  if (e) {
1504    if (v) {
1505      _upb_value_setval(v, e->val.val);
1506    }
1507    return true;
1508  } else {
1509    return false;
1510  }
1511}
1512
1513/* The given key must not already exist in the table. */
1514static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey,
1515                   upb_value val, uint32_t hash,
1516                   hashfunc_t *hashfunc, eqlfunc_t *eql) {
1517  upb_tabent *mainpos_e;
1518  upb_tabent *our_e;
1519
1520  UPB_ASSERT(findentry(t, key, hash, eql) == NULL);
1521
1522  t->count++;
1523  mainpos_e = getentry_mutable(t, hash);
1524  our_e = mainpos_e;
1525
1526  if (upb_tabent_isempty(mainpos_e)) {
1527    /* Our main position is empty; use it. */
1528    our_e->next = NULL;
1529  } else {
1530    /* Collision. */
1531    upb_tabent *new_e = emptyent(t);
1532    /* Head of collider's chain. */
1533    upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key));
1534    if (chain == mainpos_e) {
1535      /* Existing ent is in its main posisiton (it has the same hash as us, and
1536       * is the head of our chain).  Insert to new ent and append to this chain. */
1537      new_e->next = mainpos_e->next;
1538      mainpos_e->next = new_e;
1539      our_e = new_e;
1540    } else {
1541      /* Existing ent is not in its main position (it is a node in some other
1542       * chain).  This implies that no existing ent in the table has our hash.
1543       * Evict it (updating its chain) and use its ent for head of our chain. */
1544      *new_e = *mainpos_e;  /* copies next. */
1545      while (chain->next != mainpos_e) {
1546        chain = (upb_tabent*)chain->next;
1547        UPB_ASSERT(chain);
1548      }
1549      chain->next = new_e;
1550      our_e = mainpos_e;
1551      our_e->next = NULL;
1552    }
1553  }
1554  our_e->key = tabkey;
1555  our_e->val.val = val.val;
1556  UPB_ASSERT(findentry(t, key, hash, eql) == our_e);
1557}
1558
1559static bool rm(upb_table *t, lookupkey_t key, upb_value *val,
1560               upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) {
1561  upb_tabent *chain = getentry_mutable(t, hash);
1562  if (upb_tabent_isempty(chain)) return false;
1563  if (eql(chain->key, key)) {
1564    /* Element to remove is at the head of its chain. */
1565    t->count--;
1566    if (val) _upb_value_setval(val, chain->val.val);
1567    if (removed) *removed = chain->key;
1568    if (chain->next) {
1569      upb_tabent *move = (upb_tabent*)chain->next;
1570      *chain = *move;
1571      move->key = 0;  /* Make the slot empty. */
1572    } else {
1573      chain->key = 0;  /* Make the slot empty. */
1574    }
1575    return true;
1576  } else {
1577    /* Element to remove is either in a non-head position or not in the
1578     * table. */
1579    while (chain->next && !eql(chain->next->key, key)) {
1580      chain = (upb_tabent*)chain->next;
1581    }
1582    if (chain->next) {
1583      /* Found element to remove. */
1584      upb_tabent *rm = (upb_tabent*)chain->next;
1585      t->count--;
1586      if (val) _upb_value_setval(val, chain->next->val.val);
1587      if (removed) *removed = rm->key;
1588      rm->key = 0;  /* Make the slot empty. */
1589      chain->next = rm->next;
1590      return true;
1591    } else {
1592      /* Element to remove is not in the table. */
1593      return false;
1594    }
1595  }
1596}
1597
1598static size_t next(const upb_table *t, size_t i) {
1599  do {
1600    if (++i >= upb_table_size(t))
1601      return SIZE_MAX;
1602  } while(upb_tabent_isempty(&t->entries[i]));
1603
1604  return i;
1605}
1606
1607static size_t begin(const upb_table *t) {
1608  return next(t, -1);
1609}
1610
1611
1612/* upb_strtable ***************************************************************/
1613
1614/* A simple "subclass" of upb_table that only adds a hash function for strings. */
1615
1616static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) {
1617  uint32_t len = (uint32_t) k2.str.len;
1618  char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1);
1619  if (str == NULL) return 0;
1620  memcpy(str, &len, sizeof(uint32_t));
1621  memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len);
1622  str[sizeof(uint32_t) + k2.str.len] = '\0';
1623  return (uintptr_t)str;
1624}
1625
1626static uint32_t strhash(upb_tabkey key) {
1627  uint32_t len;
1628  char *str = upb_tabstr(key, &len);
1629  return upb_murmur_hash2(str, len, 0);
1630}
1631
1632static bool streql(upb_tabkey k1, lookupkey_t k2) {
1633  uint32_t len;
1634  char *str = upb_tabstr(k1, &len);
1635  return len == k2.str.len && memcmp(str, k2.str.str, len) == 0;
1636}
1637
1638bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) {
1639  return init(&t->t, 2, a);
1640}
1641
1642void upb_strtable_clear(upb_strtable *t) {
1643  size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent);
1644  t->t.count = 0;
1645  memset((char*)t->t.entries, 0, bytes);
1646}
1647
1648void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) {
1649  size_t i;
1650  for (i = 0; i < upb_table_size(&t->t); i++)
1651    upb_free(a, (void*)t->t.entries[i].key);
1652  uninit(&t->t, a);
1653}
1654
1655bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) {
1656  upb_strtable new_table;
1657  upb_strtable_iter i;
1658
1659  if (!init(&new_table.t, size_lg2, a))
1660    return false;
1661  upb_strtable_begin(&i, t);
1662  for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) {
1663    upb_strview key = upb_strtable_iter_key(&i);
1664    upb_strtable_insert3(
1665        &new_table, key.data, key.size,
1666        upb_strtable_iter_value(&i), a);
1667  }
1668  upb_strtable_uninit2(t, a);
1669  *t = new_table;
1670  return true;
1671}
1672
1673bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len,
1674                          upb_value v, upb_alloc *a) {
1675  lookupkey_t key;
1676  upb_tabkey tabkey;
1677  uint32_t hash;
1678
1679  if (isfull(&t->t)) {
1680    /* Need to resize.  New table of double the size, add old elements to it. */
1681    if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) {
1682      return false;
1683    }
1684  }
1685
1686  key = strkey2(k, len);
1687  tabkey = strcopy(key, a);
1688  if (tabkey == 0) return false;
1689
1690  hash = upb_murmur_hash2(key.str.str, key.str.len, 0);
1691  insert(&t->t, key, tabkey, v, hash, &strhash, &streql);
1692  return true;
1693}
1694
1695bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len,
1696                          upb_value *v) {
1697  uint32_t hash = upb_murmur_hash2(key, len, 0);
1698  return lookup(&t->t, strkey2(key, len), v, hash, &streql);
1699}
1700
1701bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len,
1702                         upb_value *val, upb_alloc *alloc) {
1703  uint32_t hash = upb_murmur_hash2(key, len, 0);
1704  upb_tabkey tabkey;
1705  if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) {
1706    if (alloc) {
1707      /* Arena-based allocs don't need to free and won't pass this. */
1708      upb_free(alloc, (void*)tabkey);
1709    }
1710    return true;
1711  } else {
1712    return false;
1713  }
1714}
1715
1716/* Iteration */
1717
1718void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) {
1719  i->t = t;
1720  i->index = begin(&t->t);
1721}
1722
1723void upb_strtable_next(upb_strtable_iter *i) {
1724  i->index = next(&i->t->t, i->index);
1725}
1726
1727bool upb_strtable_done(const upb_strtable_iter *i) {
1728  if (!i->t) return true;
1729  return i->index >= upb_table_size(&i->t->t) ||
1730         upb_tabent_isempty(str_tabent(i));
1731}
1732
1733upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) {
1734  upb_strview key;
1735  uint32_t len;
1736  UPB_ASSERT(!upb_strtable_done(i));
1737  key.data = upb_tabstr(str_tabent(i)->key, &len);
1738  key.size = len;
1739  return key;
1740}
1741
1742upb_value upb_strtable_iter_value(const upb_strtable_iter *i) {
1743  UPB_ASSERT(!upb_strtable_done(i));
1744  return _upb_value_val(str_tabent(i)->val.val);
1745}
1746
1747void upb_strtable_iter_setdone(upb_strtable_iter *i) {
1748  i->t = NULL;
1749  i->index = SIZE_MAX;
1750}
1751
1752bool upb_strtable_iter_isequal(const upb_strtable_iter *i1,
1753                               const upb_strtable_iter *i2) {
1754  if (upb_strtable_done(i1) && upb_strtable_done(i2))
1755    return true;
1756  return i1->t == i2->t && i1->index == i2->index;
1757}
1758
1759
1760/* upb_inttable ***************************************************************/
1761
1762/* For inttables we use a hybrid structure where small keys are kept in an
1763 * array and large keys are put in the hash table. */
1764
1765static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); }
1766
1767static bool inteql(upb_tabkey k1, lookupkey_t k2) {
1768  return k1 == k2.num;
1769}
1770
1771static upb_tabval *mutable_array(upb_inttable *t) {
1772  return (upb_tabval*)t->array;
1773}
1774
1775static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) {
1776  if (key < t->array_size) {
1777    return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL;
1778  } else {
1779    upb_tabent *e =
1780        findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql);
1781    return e ? &e->val : NULL;
1782  }
1783}
1784
1785static const upb_tabval *inttable_val_const(const upb_inttable *t,
1786                                            uintptr_t key) {
1787  return inttable_val((upb_inttable*)t, key);
1788}
1789
1790size_t upb_inttable_count(const upb_inttable *t) {
1791  return t->t.count + t->array_count;
1792}
1793
1794static void check(upb_inttable *t) {
1795  UPB_UNUSED(t);
1796#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG)
1797  {
1798    /* This check is very expensive (makes inserts/deletes O(N)). */
1799    size_t count = 0;
1800    upb_inttable_iter i;
1801    upb_inttable_begin(&i, t);
1802    for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) {
1803      UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL));
1804    }
1805    UPB_ASSERT(count == upb_inttable_count(t));
1806  }
1807#endif
1808}
1809
1810bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2,
1811                            upb_alloc *a) {
1812  size_t array_bytes;
1813
1814  if (!init(&t->t, hsize_lg2, a)) return false;
1815  /* Always make the array part at least 1 long, so that we know key 0
1816   * won't be in the hash part, which simplifies things. */
1817  t->array_size = UPB_MAX(1, asize);
1818  t->array_count = 0;
1819  array_bytes = t->array_size * sizeof(upb_value);
1820  t->array = upb_malloc(a, array_bytes);
1821  if (!t->array) {
1822    uninit(&t->t, a);
1823    return false;
1824  }
1825  memset(mutable_array(t), 0xff, array_bytes);
1826  check(t);
1827  return true;
1828}
1829
1830bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) {
1831  return upb_inttable_sizedinit(t, 0, 4, a);
1832}
1833
1834void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) {
1835  uninit(&t->t, a);
1836  upb_free(a, mutable_array(t));
1837}
1838
1839bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val,
1840                          upb_alloc *a) {
1841  upb_tabval tabval;
1842  tabval.val = val.val;
1843  UPB_ASSERT(upb_arrhas(tabval));  /* This will reject (uint64_t)-1.  Fix this. */
1844
1845  if (key < t->array_size) {
1846    UPB_ASSERT(!upb_arrhas(t->array[key]));
1847    t->array_count++;
1848    mutable_array(t)[key].val = val.val;
1849  } else {
1850    if (isfull(&t->t)) {
1851      /* Need to resize the hash part, but we re-use the array part. */
1852      size_t i;
1853      upb_table new_table;
1854
1855      if (!init(&new_table, t->t.size_lg2 + 1, a)) {
1856        return false;
1857      }
1858
1859      for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) {
1860        const upb_tabent *e = &t->t.entries[i];
1861        uint32_t hash;
1862        upb_value v;
1863
1864        _upb_value_setval(&v, e->val.val);
1865        hash = upb_inthash(e->key);
1866        insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql);
1867      }
1868
1869      UPB_ASSERT(t->t.count == new_table.count);
1870
1871      uninit(&t->t, a);
1872      t->t = new_table;
1873    }
1874    insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql);
1875  }
1876  check(t);
1877  return true;
1878}
1879
1880bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) {
1881  const upb_tabval *table_v = inttable_val_const(t, key);
1882  if (!table_v) return false;
1883  if (v) _upb_value_setval(v, table_v->val);
1884  return true;
1885}
1886
1887bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) {
1888  upb_tabval *table_v = inttable_val(t, key);
1889  if (!table_v) return false;
1890  table_v->val = val.val;
1891  return true;
1892}
1893
1894bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) {
1895  bool success;
1896  if (key < t->array_size) {
1897    if (upb_arrhas(t->array[key])) {
1898      upb_tabval empty = UPB_TABVALUE_EMPTY_INIT;
1899      t->array_count--;
1900      if (val) {
1901        _upb_value_setval(val, t->array[key].val);
1902      }
1903      mutable_array(t)[key] = empty;
1904      success = true;
1905    } else {
1906      success = false;
1907    }
1908  } else {
1909    success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql);
1910  }
1911  check(t);
1912  return success;
1913}
1914
1915bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) {
1916  return upb_inttable_insert2(t, upb_inttable_count(t), val, a);
1917}
1918
1919upb_value upb_inttable_pop(upb_inttable *t) {
1920  upb_value val;
1921  bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val);
1922  UPB_ASSERT(ok);
1923  return val;
1924}
1925
1926bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val,
1927                             upb_alloc *a) {
1928  return upb_inttable_insert2(t, (uintptr_t)key, val, a);
1929}
1930
1931bool upb_inttable_lookupptr(const upb_inttable *t, const void *key,
1932                            upb_value *v) {
1933  return upb_inttable_lookup(t, (uintptr_t)key, v);
1934}
1935
1936bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) {
1937  return upb_inttable_remove(t, (uintptr_t)key, val);
1938}
1939
1940void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) {
1941  /* A power-of-two histogram of the table keys. */
1942  size_t counts[UPB_MAXARRSIZE + 1] = {0};
1943
1944  /* The max key in each bucket. */
1945  uintptr_t max[UPB_MAXARRSIZE + 1] = {0};
1946
1947  upb_inttable_iter i;
1948  size_t arr_count;
1949  int size_lg2;
1950  upb_inttable new_t;
1951
1952  upb_inttable_begin(&i, t);
1953  for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1954    uintptr_t key = upb_inttable_iter_key(&i);
1955    int bucket = log2ceil(key);
1956    max[bucket] = UPB_MAX(max[bucket], key);
1957    counts[bucket]++;
1958  }
1959
1960  /* Find the largest power of two that satisfies the MIN_DENSITY
1961   * definition (while actually having some keys). */
1962  arr_count = upb_inttable_count(t);
1963
1964  for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) {
1965    if (counts[size_lg2] == 0) {
1966      /* We can halve again without losing any entries. */
1967      continue;
1968    } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) {
1969      break;
1970    }
1971
1972    arr_count -= counts[size_lg2];
1973  }
1974
1975  UPB_ASSERT(arr_count <= upb_inttable_count(t));
1976
1977  {
1978    /* Insert all elements into new, perfectly-sized table. */
1979    size_t arr_size = max[size_lg2] + 1;  /* +1 so arr[max] will fit. */
1980    size_t hash_count = upb_inttable_count(t) - arr_count;
1981    size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0;
1982    int hashsize_lg2 = log2ceil(hash_size);
1983
1984    upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a);
1985    upb_inttable_begin(&i, t);
1986    for (; !upb_inttable_done(&i); upb_inttable_next(&i)) {
1987      uintptr_t k = upb_inttable_iter_key(&i);
1988      upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a);
1989    }
1990    UPB_ASSERT(new_t.array_size == arr_size);
1991    UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2);
1992  }
1993  upb_inttable_uninit2(t, a);
1994  *t = new_t;
1995}
1996
1997/* Iteration. */
1998
1999static const upb_tabent *int_tabent(const upb_inttable_iter *i) {
2000  UPB_ASSERT(!i->array_part);
2001  return &i->t->t.entries[i->index];
2002}
2003
2004static upb_tabval int_arrent(const upb_inttable_iter *i) {
2005  UPB_ASSERT(i->array_part);
2006  return i->t->array[i->index];
2007}
2008
2009void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) {
2010  i->t = t;
2011  i->index = -1;
2012  i->array_part = true;
2013  upb_inttable_next(i);
2014}
2015
2016void upb_inttable_next(upb_inttable_iter *iter) {
2017  const upb_inttable *t = iter->t;
2018  if (iter->array_part) {
2019    while (++iter->index < t->array_size) {
2020      if (upb_arrhas(int_arrent(iter))) {
2021        return;
2022      }
2023    }
2024    iter->array_part = false;
2025    iter->index = begin(&t->t);
2026  } else {
2027    iter->index = next(&t->t, iter->index);
2028  }
2029}
2030
2031bool upb_inttable_done(const upb_inttable_iter *i) {
2032  if (!i->t) return true;
2033  if (i->array_part) {
2034    return i->index >= i->t->array_size ||
2035           !upb_arrhas(int_arrent(i));
2036  } else {
2037    return i->index >= upb_table_size(&i->t->t) ||
2038           upb_tabent_isempty(int_tabent(i));
2039  }
2040}
2041
2042uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) {
2043  UPB_ASSERT(!upb_inttable_done(i));
2044  return i->array_part ? i->index : int_tabent(i)->key;
2045}
2046
2047upb_value upb_inttable_iter_value(const upb_inttable_iter *i) {
2048  UPB_ASSERT(!upb_inttable_done(i));
2049  return _upb_value_val(
2050      i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val);
2051}
2052
2053void upb_inttable_iter_setdone(upb_inttable_iter *i) {
2054  i->t = NULL;
2055  i->index = SIZE_MAX;
2056  i->array_part = false;
2057}
2058
2059bool upb_inttable_iter_isequal(const upb_inttable_iter *i1,
2060                                          const upb_inttable_iter *i2) {
2061  if (upb_inttable_done(i1) && upb_inttable_done(i2))
2062    return true;
2063  return i1->t == i2->t && i1->index == i2->index &&
2064         i1->array_part == i2->array_part;
2065}
2066
2067#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__)
2068/* -----------------------------------------------------------------------------
2069 * MurmurHash2, by Austin Appleby (released as public domain).
2070 * Reformatted and C99-ified by Joshua Haberman.
2071 * Note - This code makes a few assumptions about how your machine behaves -
2072 *   1. We can read a 4-byte value from any address without crashing
2073 *   2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t
2074 * And it has a few limitations -
2075 *   1. It will not work incrementally.
2076 *   2. It will not produce the same results on little-endian and big-endian
2077 *      machines. */
2078uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) {
2079  /* 'm' and 'r' are mixing constants generated offline.
2080   * They're not really 'magic', they just happen to work well. */
2081  const uint32_t m = 0x5bd1e995;
2082  const int32_t r = 24;
2083
2084  /* Initialize the hash to a 'random' value */
2085  uint32_t h = seed ^ len;
2086
2087  /* Mix 4 bytes at a time into the hash */
2088  const uint8_t * data = (const uint8_t *)key;
2089  while(len >= 4) {
2090    uint32_t k;
2091    memcpy(&k, data, sizeof(k));
2092
2093    k *= m;
2094    k ^= k >> r;
2095    k *= m;
2096
2097    h *= m;
2098    h ^= k;
2099
2100    data += 4;
2101    len -= 4;
2102  }
2103
2104  /* Handle the last few bytes of the input array */
2105  switch(len) {
2106    case 3: h ^= data[2] << 16;
2107    case 2: h ^= data[1] << 8;
2108    case 1: h ^= data[0]; h *= m;
2109  };
2110
2111  /* Do a few final mixes of the hash to ensure the last few
2112   * bytes are well-incorporated. */
2113  h ^= h >> 13;
2114  h *= m;
2115  h ^= h >> 15;
2116
2117  return h;
2118}
2119
2120#else /* !UPB_UNALIGNED_READS_OK */
2121
2122/* -----------------------------------------------------------------------------
2123 * MurmurHashAligned2, by Austin Appleby
2124 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer
2125 * on certain platforms.
2126 * Performance will be lower than MurmurHash2 */
2127
2128#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; }
2129
2130uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) {
2131  const uint32_t m = 0x5bd1e995;
2132  const int32_t r = 24;
2133  const uint8_t * data = (const uint8_t *)key;
2134  uint32_t h = (uint32_t)(seed ^ len);
2135  uint8_t align = (uintptr_t)data & 3;
2136
2137  if(align && (len >= 4)) {
2138    /* Pre-load the temp registers */
2139    uint32_t t = 0, d = 0;
2140    int32_t sl;
2141    int32_t sr;
2142
2143    switch(align) {
2144      case 1: t |= data[2] << 16;
2145      case 2: t |= data[1] << 8;
2146      case 3: t |= data[0];
2147    }
2148
2149    t <<= (8 * align);
2150
2151    data += 4-align;
2152    len -= 4-align;
2153
2154    sl = 8 * (4-align);
2155    sr = 8 * align;
2156
2157    /* Mix */
2158
2159    while(len >= 4) {
2160      uint32_t k;
2161
2162      d = *(uint32_t *)data;
2163      t = (t >> sr) | (d << sl);
2164
2165      k = t;
2166
2167      MIX(h,k,m);
2168
2169      t = d;
2170
2171      data += 4;
2172      len -= 4;
2173    }
2174
2175    /* Handle leftover data in temp registers */
2176
2177    d = 0;
2178
2179    if(len >= align) {
2180      uint32_t k;
2181
2182      switch(align) {
2183        case 3: d |= data[2] << 16;
2184        case 2: d |= data[1] << 8;
2185        case 1: d |= data[0];
2186      }
2187
2188      k = (t >> sr) | (d << sl);
2189      MIX(h,k,m);
2190
2191      data += align;
2192      len -= align;
2193
2194      /* ----------
2195       * Handle tail bytes */
2196
2197      switch(len) {
2198        case 3: h ^= data[2] << 16;
2199        case 2: h ^= data[1] << 8;
2200        case 1: h ^= data[0]; h *= m;
2201      };
2202    } else {
2203      switch(len) {
2204        case 3: d |= data[2] << 16;
2205        case 2: d |= data[1] << 8;
2206        case 1: d |= data[0];
2207        case 0: h ^= (t >> sr) | (d << sl); h *= m;
2208      }
2209    }
2210
2211    h ^= h >> 13;
2212    h *= m;
2213    h ^= h >> 15;
2214
2215    return h;
2216  } else {
2217    while(len >= 4) {
2218      uint32_t k = *(uint32_t *)data;
2219
2220      MIX(h,k,m);
2221
2222      data += 4;
2223      len -= 4;
2224    }
2225
2226    /* ----------
2227     * Handle tail bytes */
2228
2229    switch(len) {
2230      case 3: h ^= data[2] << 16;
2231      case 2: h ^= data[1] << 8;
2232      case 1: h ^= data[0]; h *= m;
2233    };
2234
2235    h ^= h >> 13;
2236    h *= m;
2237    h ^= h >> 15;
2238
2239    return h;
2240  }
2241}
2242#undef MIX
2243
2244#endif /* UPB_UNALIGNED_READS_OK */
2245
2246
2247#include <errno.h>
2248#include <stdarg.h>
2249#include <stddef.h>
2250#include <stdint.h>
2251#include <stdio.h>
2252#include <stdlib.h>
2253#include <string.h>
2254
2255
2256/* upb_status *****************************************************************/
2257
2258void upb_status_clear(upb_status *status) {
2259  if (!status) return;
2260  status->ok = true;
2261  status->msg[0] = '\0';
2262}
2263
2264bool upb_ok(const upb_status *status) { return status->ok; }
2265
2266const char *upb_status_errmsg(const upb_status *status) { return status->msg; }
2267
2268void upb_status_seterrmsg(upb_status *status, const char *msg) {
2269  if (!status) return;
2270  status->ok = false;
2271  strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1);
2272  status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
2273}
2274
2275void upb_status_seterrf(upb_status *status, const char *fmt, ...) {
2276  va_list args;
2277  va_start(args, fmt);
2278  upb_status_vseterrf(status, fmt, args);
2279  va_end(args);
2280}
2281
2282void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) {
2283  if (!status) return;
2284  status->ok = false;
2285  _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args);
2286  status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0';
2287}
2288
2289/* upb_alloc ******************************************************************/
2290
2291static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize,
2292                                  size_t size) {
2293  UPB_UNUSED(alloc);
2294  UPB_UNUSED(oldsize);
2295  if (size == 0) {
2296    free(ptr);
2297    return NULL;
2298  } else {
2299    return realloc(ptr, size);
2300  }
2301}
2302
2303upb_alloc upb_alloc_global = {&upb_global_allocfunc};
2304
2305/* upb_arena ******************************************************************/
2306
2307/* Be conservative and choose 16 in case anyone is using SSE. */
2308
2309struct upb_arena {
2310  _upb_arena_head head;
2311  char *start;
2312
2313  /* Allocator to allocate arena blocks.  We are responsible for freeing these
2314   * when we are destroyed. */
2315  upb_alloc *block_alloc;
2316
2317  size_t bytes_allocated;
2318  size_t next_block_size;
2319  size_t max_block_size;
2320
2321  /* Linked list of blocks.  Points to an arena_block, defined in env.c */
2322  void *block_head;
2323
2324  /* Cleanup entries.  Pointer to a cleanup_ent, defined in env.c */
2325  void *cleanup_head;
2326};
2327
2328typedef struct mem_block {
2329  struct mem_block *next;
2330  bool owned;
2331  /* Data follows. */
2332} mem_block;
2333
2334typedef struct cleanup_ent {
2335  struct cleanup_ent *next;
2336  upb_cleanup_func *cleanup;
2337  void *ud;
2338} cleanup_ent;
2339
2340static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size,
2341                               bool owned) {
2342  mem_block *block = ptr;
2343
2344  if (a->block_head) {
2345    a->bytes_allocated += a->head.ptr - a->start;
2346  }
2347
2348  block->next = a->block_head;
2349  block->owned = owned;
2350
2351  a->block_head = block;
2352  a->start = (char*)block + _upb_arena_alignup(sizeof(mem_block));
2353  a->head.ptr = a->start;
2354  a->head.end = (char*)block + size;
2355
2356  /* TODO(haberman): ASAN poison. */
2357}
2358
2359static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) {
2360  size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block);
2361  mem_block *block = upb_malloc(a->block_alloc, block_size);
2362
2363  if (!block) {
2364    return NULL;
2365  }
2366
2367  upb_arena_addblock(a, block, block_size, true);
2368  a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size);
2369
2370  return block;
2371}
2372
2373void *_upb_arena_slowmalloc(upb_arena *a, size_t size) {
2374  mem_block *block = upb_arena_allocblock(a, size);
2375  if (!block) return NULL;  /* Out of memory. */
2376  return upb_arena_malloc(a, size);
2377}
2378
2379static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize,
2380                               size_t size) {
2381  upb_arena *a = (upb_arena*)alloc;  /* upb_alloc is initial member. */
2382  void *ret;
2383
2384  if (size == 0) {
2385    return NULL;  /* We are an arena, don't need individual frees. */
2386  }
2387
2388  ret = upb_arena_malloc(a, size);
2389  if (!ret) return NULL;
2390
2391  /* TODO(haberman): special-case if this is a realloc of the last alloc? */
2392
2393  if (oldsize > 0) {
2394    memcpy(ret, ptr, oldsize);  /* Preserve existing data. */
2395  }
2396
2397  /* TODO(haberman): ASAN unpoison. */
2398  return ret;
2399}
2400
2401/* Public Arena API ***********************************************************/
2402
2403#define upb_alignof(type) offsetof (struct { char c; type member; }, member)
2404
2405upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) {
2406  const size_t first_block_overhead = sizeof(upb_arena) + sizeof(mem_block);
2407  upb_arena *a;
2408  bool owned = false;
2409
2410  /* Round block size down to alignof(*a) since we will allocate the arena
2411   * itself at the end. */
2412  n &= ~(upb_alignof(upb_arena) - 1);
2413
2414  if (n < first_block_overhead) {
2415    /* We need to malloc the initial block. */
2416    n = first_block_overhead + 256;
2417    owned = true;
2418    if (!alloc || !(mem = upb_malloc(alloc, n))) {
2419      return NULL;
2420    }
2421  }
2422
2423  a = (void*)((char*)mem + n - sizeof(*a));
2424  n -= sizeof(*a);
2425
2426  a->head.alloc.func = &upb_arena_doalloc;
2427  a->head.ptr = NULL;
2428  a->head.end = NULL;
2429  a->start = NULL;
2430  a->block_alloc = &upb_alloc_global;
2431  a->bytes_allocated = 0;
2432  a->next_block_size = 256;
2433  a->max_block_size = 16384;
2434  a->cleanup_head = NULL;
2435  a->block_head = NULL;
2436  a->block_alloc = alloc;
2437
2438  upb_arena_addblock(a, mem, n, owned);
2439
2440  return a;
2441}
2442
2443#undef upb_alignof
2444
2445void upb_arena_free(upb_arena *a) {
2446  cleanup_ent *ent = a->cleanup_head;
2447  mem_block *block = a->block_head;
2448
2449  while (ent) {
2450    ent->cleanup(ent->ud);
2451    ent = ent->next;
2452  }
2453
2454  /* Must do this after running cleanup functions, because this will delete
2455   * the memory we store our cleanup entries in! */
2456  while (block) {
2457    /* Load first since we are deleting block. */
2458    mem_block *next = block->next;
2459
2460    if (block->owned) {
2461      upb_free(a->block_alloc, block);
2462    }
2463
2464    block = next;
2465  }
2466}
2467
2468bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) {
2469  cleanup_ent *ent = upb_malloc(&a->head.alloc, sizeof(cleanup_ent));
2470  if (!ent) {
2471    return false;  /* Out of memory. */
2472  }
2473
2474  ent->cleanup = func;
2475  ent->ud = ud;
2476  ent->next = a->cleanup_head;
2477  a->cleanup_head = ent;
2478
2479  return true;
2480}
2481
2482size_t upb_arena_bytesallocated(const upb_arena *a) {
2483  return a->bytes_allocated + (a->head.ptr - a->start);
2484}
2485/* This file was generated by upbc (the upb compiler) from the input
2486 * file:
2487 *
2488 *     google/protobuf/descriptor.proto
2489 *
2490 * Do not edit -- your changes will be discarded when the file is
2491 * regenerated. */
2492
2493#include <stddef.h>
2494
2495
2496static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = {
2497  &google_protobuf_FileDescriptorProto_msginit,
2498};
2499
2500static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = {
2501  {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2502};
2503
2504const upb_msglayout google_protobuf_FileDescriptorSet_msginit = {
2505  &google_protobuf_FileDescriptorSet_submsgs[0],
2506  &google_protobuf_FileDescriptorSet__fields[0],
2507  UPB_SIZE(4, 8), 1, false,
2508};
2509
2510static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = {
2511  &google_protobuf_DescriptorProto_msginit,
2512  &google_protobuf_EnumDescriptorProto_msginit,
2513  &google_protobuf_FieldDescriptorProto_msginit,
2514  &google_protobuf_FileOptions_msginit,
2515  &google_protobuf_ServiceDescriptorProto_msginit,
2516  &google_protobuf_SourceCodeInfo_msginit,
2517};
2518
2519static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = {
2520  {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2521  {2, UPB_SIZE(12, 24), 2, 0, 9, 1},
2522  {3, UPB_SIZE(36, 72), 0, 0, 9, 3},
2523  {4, UPB_SIZE(40, 80), 0, 0, 11, 3},
2524  {5, UPB_SIZE(44, 88), 0, 1, 11, 3},
2525  {6, UPB_SIZE(48, 96), 0, 4, 11, 3},
2526  {7, UPB_SIZE(52, 104), 0, 2, 11, 3},
2527  {8, UPB_SIZE(28, 56), 4, 3, 11, 1},
2528  {9, UPB_SIZE(32, 64), 5, 5, 11, 1},
2529  {10, UPB_SIZE(56, 112), 0, 0, 5, 3},
2530  {11, UPB_SIZE(60, 120), 0, 0, 5, 3},
2531  {12, UPB_SIZE(20, 40), 3, 0, 9, 1},
2532};
2533
2534const upb_msglayout google_protobuf_FileDescriptorProto_msginit = {
2535  &google_protobuf_FileDescriptorProto_submsgs[0],
2536  &google_protobuf_FileDescriptorProto__fields[0],
2537  UPB_SIZE(64, 128), 12, false,
2538};
2539
2540static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = {
2541  &google_protobuf_DescriptorProto_msginit,
2542  &google_protobuf_DescriptorProto_ExtensionRange_msginit,
2543  &google_protobuf_DescriptorProto_ReservedRange_msginit,
2544  &google_protobuf_EnumDescriptorProto_msginit,
2545  &google_protobuf_FieldDescriptorProto_msginit,
2546  &google_protobuf_MessageOptions_msginit,
2547  &google_protobuf_OneofDescriptorProto_msginit,
2548};
2549
2550static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = {
2551  {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2552  {2, UPB_SIZE(16, 32), 0, 4, 11, 3},
2553  {3, UPB_SIZE(20, 40), 0, 0, 11, 3},
2554  {4, UPB_SIZE(24, 48), 0, 3, 11, 3},
2555  {5, UPB_SIZE(28, 56), 0, 1, 11, 3},
2556  {6, UPB_SIZE(32, 64), 0, 4, 11, 3},
2557  {7, UPB_SIZE(12, 24), 2, 5, 11, 1},
2558  {8, UPB_SIZE(36, 72), 0, 6, 11, 3},
2559  {9, UPB_SIZE(40, 80), 0, 2, 11, 3},
2560  {10, UPB_SIZE(44, 88), 0, 0, 9, 3},
2561};
2562
2563const upb_msglayout google_protobuf_DescriptorProto_msginit = {
2564  &google_protobuf_DescriptorProto_submsgs[0],
2565  &google_protobuf_DescriptorProto__fields[0],
2566  UPB_SIZE(48, 96), 10, false,
2567};
2568
2569static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = {
2570  &google_protobuf_ExtensionRangeOptions_msginit,
2571};
2572
2573static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = {
2574  {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2575  {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2576  {3, UPB_SIZE(12, 16), 3, 0, 11, 1},
2577};
2578
2579const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = {
2580  &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0],
2581  &google_protobuf_DescriptorProto_ExtensionRange__fields[0],
2582  UPB_SIZE(16, 24), 3, false,
2583};
2584
2585static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = {
2586  {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2587  {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2588};
2589
2590const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = {
2591  NULL,
2592  &google_protobuf_DescriptorProto_ReservedRange__fields[0],
2593  UPB_SIZE(12, 12), 2, false,
2594};
2595
2596static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = {
2597  &google_protobuf_UninterpretedOption_msginit,
2598};
2599
2600static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = {
2601  {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
2602};
2603
2604const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = {
2605  &google_protobuf_ExtensionRangeOptions_submsgs[0],
2606  &google_protobuf_ExtensionRangeOptions__fields[0],
2607  UPB_SIZE(4, 8), 1, false,
2608};
2609
2610static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = {
2611  &google_protobuf_FieldOptions_msginit,
2612};
2613
2614static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = {
2615  {1, UPB_SIZE(36, 40), 6, 0, 9, 1},
2616  {2, UPB_SIZE(44, 56), 7, 0, 9, 1},
2617  {3, UPB_SIZE(24, 24), 3, 0, 5, 1},
2618  {4, UPB_SIZE(8, 8), 1, 0, 14, 1},
2619  {5, UPB_SIZE(16, 16), 2, 0, 14, 1},
2620  {6, UPB_SIZE(52, 72), 8, 0, 9, 1},
2621  {7, UPB_SIZE(60, 88), 9, 0, 9, 1},
2622  {8, UPB_SIZE(76, 120), 11, 0, 11, 1},
2623  {9, UPB_SIZE(28, 28), 4, 0, 5, 1},
2624  {10, UPB_SIZE(68, 104), 10, 0, 9, 1},
2625  {17, UPB_SIZE(32, 32), 5, 0, 8, 1},
2626};
2627
2628const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = {
2629  &google_protobuf_FieldDescriptorProto_submsgs[0],
2630  &google_protobuf_FieldDescriptorProto__fields[0],
2631  UPB_SIZE(80, 128), 11, false,
2632};
2633
2634static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = {
2635  &google_protobuf_OneofOptions_msginit,
2636};
2637
2638static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = {
2639  {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2640  {2, UPB_SIZE(12, 24), 2, 0, 11, 1},
2641};
2642
2643const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = {
2644  &google_protobuf_OneofDescriptorProto_submsgs[0],
2645  &google_protobuf_OneofDescriptorProto__fields[0],
2646  UPB_SIZE(16, 32), 2, false,
2647};
2648
2649static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = {
2650  &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit,
2651  &google_protobuf_EnumOptions_msginit,
2652  &google_protobuf_EnumValueDescriptorProto_msginit,
2653};
2654
2655static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = {
2656  {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2657  {2, UPB_SIZE(16, 32), 0, 2, 11, 3},
2658  {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
2659  {4, UPB_SIZE(20, 40), 0, 0, 11, 3},
2660  {5, UPB_SIZE(24, 48), 0, 0, 9, 3},
2661};
2662
2663const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = {
2664  &google_protobuf_EnumDescriptorProto_submsgs[0],
2665  &google_protobuf_EnumDescriptorProto__fields[0],
2666  UPB_SIZE(32, 64), 5, false,
2667};
2668
2669static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = {
2670  {1, UPB_SIZE(4, 4), 1, 0, 5, 1},
2671  {2, UPB_SIZE(8, 8), 2, 0, 5, 1},
2672};
2673
2674const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = {
2675  NULL,
2676  &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0],
2677  UPB_SIZE(12, 12), 2, false,
2678};
2679
2680static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = {
2681  &google_protobuf_EnumValueOptions_msginit,
2682};
2683
2684static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = {
2685  {1, UPB_SIZE(8, 8), 2, 0, 9, 1},
2686  {2, UPB_SIZE(4, 4), 1, 0, 5, 1},
2687  {3, UPB_SIZE(16, 24), 3, 0, 11, 1},
2688};
2689
2690const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = {
2691  &google_protobuf_EnumValueDescriptorProto_submsgs[0],
2692  &google_protobuf_EnumValueDescriptorProto__fields[0],
2693  UPB_SIZE(24, 32), 3, false,
2694};
2695
2696static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = {
2697  &google_protobuf_MethodDescriptorProto_msginit,
2698  &google_protobuf_ServiceOptions_msginit,
2699};
2700
2701static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = {
2702  {1, UPB_SIZE(4, 8), 1, 0, 9, 1},
2703  {2, UPB_SIZE(16, 32), 0, 0, 11, 3},
2704  {3, UPB_SIZE(12, 24), 2, 1, 11, 1},
2705};
2706
2707const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = {
2708  &google_protobuf_ServiceDescriptorProto_submsgs[0],
2709  &google_protobuf_ServiceDescriptorProto__fields[0],
2710  UPB_SIZE(24, 48), 3, false,
2711};
2712
2713static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = {
2714  &google_protobuf_MethodOptions_msginit,
2715};
2716
2717static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = {
2718  {1, UPB_SIZE(4, 8), 3, 0, 9, 1},
2719  {2, UPB_SIZE(12, 24), 4, 0, 9, 1},
2720  {3, UPB_SIZE(20, 40), 5, 0, 9, 1},
2721  {4, UPB_SIZE(28, 56), 6, 0, 11, 1},
2722  {5, UPB_SIZE(1, 1), 1, 0, 8, 1},
2723  {6, UPB_SIZE(2, 2), 2, 0, 8, 1},
2724};
2725
2726const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = {
2727  &google_protobuf_MethodDescriptorProto_submsgs[0],
2728  &google_protobuf_MethodDescriptorProto__fields[0],
2729  UPB_SIZE(32, 64), 6, false,
2730};
2731
2732static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = {
2733  &google_protobuf_UninterpretedOption_msginit,
2734};
2735
2736static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = {
2737  {1, UPB_SIZE(28, 32), 11, 0, 9, 1},
2738  {8, UPB_SIZE(36, 48), 12, 0, 9, 1},
2739  {9, UPB_SIZE(8, 8), 1, 0, 14, 1},
2740  {10, UPB_SIZE(16, 16), 2, 0, 8, 1},
2741  {11, UPB_SIZE(44, 64), 13, 0, 9, 1},
2742  {16, UPB_SIZE(17, 17), 3, 0, 8, 1},
2743  {17, UPB_SIZE(18, 18), 4, 0, 8, 1},
2744  {18, UPB_SIZE(19, 19), 5, 0, 8, 1},
2745  {20, UPB_SIZE(20, 20), 6, 0, 8, 1},
2746  {23, UPB_SIZE(21, 21), 7, 0, 8, 1},
2747  {27, UPB_SIZE(22, 22), 8, 0, 8, 1},
2748  {31, UPB_SIZE(23, 23), 9, 0, 8, 1},
2749  {36, UPB_SIZE(52, 80), 14, 0, 9, 1},
2750  {37, UPB_SIZE(60, 96), 15, 0, 9, 1},
2751  {39, UPB_SIZE(68, 112), 16, 0, 9, 1},
2752  {40, UPB_SIZE(76, 128), 17, 0, 9, 1},
2753  {41, UPB_SIZE(84, 144), 18, 0, 9, 1},
2754  {42, UPB_SIZE(24, 24), 10, 0, 8, 1},
2755  {44, UPB_SIZE(92, 160), 19, 0, 9, 1},
2756  {45, UPB_SIZE(100, 176), 20, 0, 9, 1},
2757  {999, UPB_SIZE(108, 192), 0, 0, 11, 3},
2758};
2759
2760const upb_msglayout google_protobuf_FileOptions_msginit = {
2761  &google_protobuf_FileOptions_submsgs[0],
2762  &google_protobuf_FileOptions__fields[0],
2763  UPB_SIZE(112, 208), 21, false,
2764};
2765
2766static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = {
2767  &google_protobuf_UninterpretedOption_msginit,
2768};
2769
2770static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = {
2771  {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
2772  {2, UPB_SIZE(2, 2), 2, 0, 8, 1},
2773  {3, UPB_SIZE(3, 3), 3, 0, 8, 1},
2774  {7, UPB_SIZE(4, 4), 4, 0, 8, 1},
2775  {999, UPB_SIZE(8, 8), 0, 0, 11, 3},
2776};
2777
2778const upb_msglayout google_protobuf_MessageOptions_msginit = {
2779  &google_protobuf_MessageOptions_submsgs[0],
2780  &google_protobuf_MessageOptions__fields[0],
2781  UPB_SIZE(12, 16), 5, false,
2782};
2783
2784static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = {
2785  &google_protobuf_UninterpretedOption_msginit,
2786};
2787
2788static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = {
2789  {1, UPB_SIZE(8, 8), 1, 0, 14, 1},
2790  {2, UPB_SIZE(24, 24), 3, 0, 8, 1},
2791  {3, UPB_SIZE(25, 25), 4, 0, 8, 1},
2792  {5, UPB_SIZE(26, 26), 5, 0, 8, 1},
2793  {6, UPB_SIZE(16, 16), 2, 0, 14, 1},
2794  {10, UPB_SIZE(27, 27), 6, 0, 8, 1},
2795  {999, UPB_SIZE(28, 32), 0, 0, 11, 3},
2796};
2797
2798const upb_msglayout google_protobuf_FieldOptions_msginit = {
2799  &google_protobuf_FieldOptions_submsgs[0],
2800  &google_protobuf_FieldOptions__fields[0],
2801  UPB_SIZE(32, 40), 7, false,
2802};
2803
2804static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = {
2805  &google_protobuf_UninterpretedOption_msginit,
2806};
2807
2808static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = {
2809  {999, UPB_SIZE(0, 0), 0, 0, 11, 3},
2810};
2811
2812const upb_msglayout google_protobuf_OneofOptions_msginit = {
2813  &google_protobuf_OneofOptions_submsgs[0],
2814  &google_protobuf_OneofOptions__fields[0],
2815  UPB_SIZE(4, 8), 1, false,
2816};
2817
2818static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = {
2819  &google_protobuf_UninterpretedOption_msginit,
2820};
2821
2822static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = {
2823  {2, UPB_SIZE(1, 1), 1, 0, 8, 1},
2824  {3, UPB_SIZE(2, 2), 2, 0, 8, 1},
2825  {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2826};
2827
2828const upb_msglayout google_protobuf_EnumOptions_msginit = {
2829  &google_protobuf_EnumOptions_submsgs[0],
2830  &google_protobuf_EnumOptions__fields[0],
2831  UPB_SIZE(8, 16), 3, false,
2832};
2833
2834static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = {
2835  &google_protobuf_UninterpretedOption_msginit,
2836};
2837
2838static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = {
2839  {1, UPB_SIZE(1, 1), 1, 0, 8, 1},
2840  {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2841};
2842
2843const upb_msglayout google_protobuf_EnumValueOptions_msginit = {
2844  &google_protobuf_EnumValueOptions_submsgs[0],
2845  &google_protobuf_EnumValueOptions__fields[0],
2846  UPB_SIZE(8, 16), 2, false,
2847};
2848
2849static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = {
2850  &google_protobuf_UninterpretedOption_msginit,
2851};
2852
2853static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = {
2854  {33, UPB_SIZE(1, 1), 1, 0, 8, 1},
2855  {999, UPB_SIZE(4, 8), 0, 0, 11, 3},
2856};
2857
2858const upb_msglayout google_protobuf_ServiceOptions_msginit = {
2859  &google_protobuf_ServiceOptions_submsgs[0],
2860  &google_protobuf_ServiceOptions__fields[0],
2861  UPB_SIZE(8, 16), 2, false,
2862};
2863
2864static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = {
2865  &google_protobuf_UninterpretedOption_msginit,
2866};
2867
2868static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = {
2869  {33, UPB_SIZE(16, 16), 2, 0, 8, 1},
2870  {34, UPB_SIZE(8, 8), 1, 0, 14, 1},
2871  {999, UPB_SIZE(20, 24), 0, 0, 11, 3},
2872};
2873
2874const upb_msglayout google_protobuf_MethodOptions_msginit = {
2875  &google_protobuf_MethodOptions_submsgs[0],
2876  &google_protobuf_MethodOptions__fields[0],
2877  UPB_SIZE(24, 32), 3, false,
2878};
2879
2880static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = {
2881  &google_protobuf_UninterpretedOption_NamePart_msginit,
2882};
2883
2884static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = {
2885  {2, UPB_SIZE(56, 80), 0, 0, 11, 3},
2886  {3, UPB_SIZE(32, 32), 4, 0, 9, 1},
2887  {4, UPB_SIZE(8, 8), 1, 0, 4, 1},
2888  {5, UPB_SIZE(16, 16), 2, 0, 3, 1},
2889  {6, UPB_SIZE(24, 24), 3, 0, 1, 1},
2890  {7, UPB_SIZE(40, 48), 5, 0, 12, 1},
2891  {8, UPB_SIZE(48, 64), 6, 0, 9, 1},
2892};
2893
2894const upb_msglayout google_protobuf_UninterpretedOption_msginit = {
2895  &google_protobuf_UninterpretedOption_submsgs[0],
2896  &google_protobuf_UninterpretedOption__fields[0],
2897  UPB_SIZE(64, 96), 7, false,
2898};
2899
2900static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = {
2901  {1, UPB_SIZE(4, 8), 2, 0, 9, 2},
2902  {2, UPB_SIZE(1, 1), 1, 0, 8, 2},
2903};
2904
2905const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = {
2906  NULL,
2907  &google_protobuf_UninterpretedOption_NamePart__fields[0],
2908  UPB_SIZE(16, 32), 2, false,
2909};
2910
2911static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = {
2912  &google_protobuf_SourceCodeInfo_Location_msginit,
2913};
2914
2915static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = {
2916  {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2917};
2918
2919const upb_msglayout google_protobuf_SourceCodeInfo_msginit = {
2920  &google_protobuf_SourceCodeInfo_submsgs[0],
2921  &google_protobuf_SourceCodeInfo__fields[0],
2922  UPB_SIZE(4, 8), 1, false,
2923};
2924
2925static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = {
2926  {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED},
2927  {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED},
2928  {3, UPB_SIZE(4, 8), 1, 0, 9, 1},
2929  {4, UPB_SIZE(12, 24), 2, 0, 9, 1},
2930  {6, UPB_SIZE(28, 56), 0, 0, 9, 3},
2931};
2932
2933const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = {
2934  NULL,
2935  &google_protobuf_SourceCodeInfo_Location__fields[0],
2936  UPB_SIZE(32, 64), 5, false,
2937};
2938
2939static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = {
2940  &google_protobuf_GeneratedCodeInfo_Annotation_msginit,
2941};
2942
2943static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = {
2944  {1, UPB_SIZE(0, 0), 0, 0, 11, 3},
2945};
2946
2947const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = {
2948  &google_protobuf_GeneratedCodeInfo_submsgs[0],
2949  &google_protobuf_GeneratedCodeInfo__fields[0],
2950  UPB_SIZE(4, 8), 1, false,
2951};
2952
2953static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = {
2954  {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED},
2955  {2, UPB_SIZE(12, 16), 3, 0, 9, 1},
2956  {3, UPB_SIZE(4, 4), 1, 0, 5, 1},
2957  {4, UPB_SIZE(8, 8), 2, 0, 5, 1},
2958};
2959
2960const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = {
2961  NULL,
2962  &google_protobuf_GeneratedCodeInfo_Annotation__fields[0],
2963  UPB_SIZE(24, 48), 4, false,
2964};
2965
2966
2967
2968
2969#include <ctype.h>
2970#include <errno.h>
2971#include <stdlib.h>
2972#include <string.h>
2973
2974
2975typedef struct {
2976  size_t len;
2977  char str[1];  /* Null-terminated string data follows. */
2978} str_t;
2979
2980static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) {
2981  str_t *ret = upb_malloc(alloc, sizeof(*ret) + len);
2982  if (!ret) return NULL;
2983  ret->len = len;
2984  memcpy(ret->str, data, len);
2985  ret->str[len] = '\0';
2986  return ret;
2987}
2988
2989struct upb_fielddef {
2990  const upb_filedef *file;
2991  const upb_msgdef *msgdef;
2992  const char *full_name;
2993  const char *json_name;
2994  union {
2995    int64_t sint;
2996    uint64_t uint;
2997    double dbl;
2998    float flt;
2999    bool boolean;
3000    str_t *str;
3001  } defaultval;
3002  const upb_oneofdef *oneof;
3003  union {
3004    const upb_msgdef *msgdef;
3005    const upb_enumdef *enumdef;
3006    const google_protobuf_FieldDescriptorProto *unresolved;
3007  } sub;
3008  uint32_t number_;
3009  uint16_t index_;
3010  uint16_t layout_index;
3011  uint32_t selector_base;  /* Used to index into a upb::Handlers table. */
3012  bool is_extension_;
3013  bool lazy_;
3014  bool packed_;
3015  bool proto3_optional_;
3016  upb_descriptortype_t type_;
3017  upb_label_t label_;
3018};
3019
3020struct upb_msgdef {
3021  const upb_msglayout *layout;
3022  const upb_filedef *file;
3023  const char *full_name;
3024  uint32_t selector_count;
3025  uint32_t submsg_field_count;
3026
3027  /* Tables for looking up fields by number and name. */
3028  upb_inttable itof;
3029  upb_strtable ntof;
3030
3031  const upb_fielddef *fields;
3032  const upb_oneofdef *oneofs;
3033  int field_count;
3034  int oneof_count;
3035  int real_oneof_count;
3036
3037  /* Is this a map-entry message? */
3038  bool map_entry;
3039  upb_wellknowntype_t well_known_type;
3040
3041  /* TODO(haberman): proper extension ranges (there can be multiple). */
3042};
3043
3044struct upb_enumdef {
3045  const upb_filedef *file;
3046  const char *full_name;
3047  upb_strtable ntoi;
3048  upb_inttable iton;
3049  int32_t defaultval;
3050};
3051
3052struct upb_oneofdef {
3053  const upb_msgdef *parent;
3054  const char *full_name;
3055  uint32_t index;
3056  upb_strtable ntof;
3057  upb_inttable itof;
3058};
3059
3060struct upb_filedef {
3061  const char *name;
3062  const char *package;
3063  const char *phpprefix;
3064  const char *phpnamespace;
3065  upb_syntax_t syntax;
3066
3067  const upb_filedef **deps;
3068  const upb_msgdef *msgs;
3069  const upb_enumdef *enums;
3070  const upb_fielddef *exts;
3071
3072  int dep_count;
3073  int msg_count;
3074  int enum_count;
3075  int ext_count;
3076};
3077
3078struct upb_symtab {
3079  upb_arena *arena;
3080  upb_strtable syms;  /* full_name -> packed def ptr */
3081  upb_strtable files;  /* file_name -> upb_filedef* */
3082};
3083
3084/* Inside a symtab we store tagged pointers to specific def types. */
3085typedef enum {
3086  UPB_DEFTYPE_FIELD = 0,
3087
3088  /* Only inside symtab table. */
3089  UPB_DEFTYPE_MSG = 1,
3090  UPB_DEFTYPE_ENUM = 2,
3091
3092  /* Only inside message table. */
3093  UPB_DEFTYPE_ONEOF = 1,
3094  UPB_DEFTYPE_FIELD_JSONNAME = 2
3095} upb_deftype_t;
3096
3097static const void *unpack_def(upb_value v, upb_deftype_t type) {
3098  uintptr_t num = (uintptr_t)upb_value_getconstptr(v);
3099  return (num & 3) == type ? (const void*)(num & ~3) : NULL;
3100}
3101
3102static upb_value pack_def(const void *ptr, upb_deftype_t type) {
3103  uintptr_t num = (uintptr_t)ptr | type;
3104  return upb_value_constptr((const void*)num);
3105}
3106
3107/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */
3108static bool upb_isbetween(char c, char low, char high) {
3109  return c >= low && c <= high;
3110}
3111
3112static bool upb_isletter(char c) {
3113  return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_';
3114}
3115
3116static bool upb_isalphanum(char c) {
3117  return upb_isletter(c) || upb_isbetween(c, '0', '9');
3118}
3119
3120static bool upb_isident(upb_strview name, bool full, upb_status *s) {
3121  const char *str = name.data;
3122  size_t len = name.size;
3123  bool start = true;
3124  size_t i;
3125  for (i = 0; i < len; i++) {
3126    char c = str[i];
3127    if (c == '.') {
3128      if (start || !full) {
3129        upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str);
3130        return false;
3131      }
3132      start = true;
3133    } else if (start) {
3134      if (!upb_isletter(c)) {
3135        upb_status_seterrf(
3136            s, "invalid name: path components must start with a letter (%s)",
3137            str);
3138        return false;
3139      }
3140      start = false;
3141    } else {
3142      if (!upb_isalphanum(c)) {
3143        upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)",
3144                           str);
3145        return false;
3146      }
3147    }
3148  }
3149  return !start;
3150}
3151
3152static const char *shortdefname(const char *fullname) {
3153  const char *p;
3154
3155  if (fullname == NULL) {
3156    return NULL;
3157  } else if ((p = strrchr(fullname, '.')) == NULL) {
3158    /* No '.' in the name, return the full string. */
3159    return fullname;
3160  } else {
3161    /* Return one past the last '.'. */
3162    return p + 1;
3163  }
3164}
3165
3166/* All submessage fields are lower than all other fields.
3167 * Secondly, fields are increasing in order. */
3168uint32_t field_rank(const upb_fielddef *f) {
3169  uint32_t ret = upb_fielddef_number(f);
3170  const uint32_t high_bit = 1 << 30;
3171  UPB_ASSERT(ret < high_bit);
3172  if (!upb_fielddef_issubmsg(f))
3173    ret |= high_bit;
3174  return ret;
3175}
3176
3177int cmp_fields(const void *p1, const void *p2) {
3178  const upb_fielddef *f1 = *(upb_fielddef*const*)p1;
3179  const upb_fielddef *f2 = *(upb_fielddef*const*)p2;
3180  return field_rank(f1) - field_rank(f2);
3181}
3182
3183/* A few implementation details of handlers.  We put these here to avoid
3184 * a def -> handlers dependency. */
3185
3186#define UPB_STATIC_SELECTOR_COUNT 3  /* Warning: also in upb/handlers.h. */
3187
3188static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) {
3189  return upb_fielddef_isseq(f) ? 2 : 0;
3190}
3191
3192static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) {
3193  uint32_t ret = 1;
3194  if (upb_fielddef_isseq(f)) ret += 2;    /* STARTSEQ/ENDSEQ */
3195  if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */
3196  if (upb_fielddef_issubmsg(f)) {
3197    /* ENDSUBMSG (STARTSUBMSG is at table beginning) */
3198    ret += 0;
3199    if (upb_fielddef_lazy(f)) {
3200      /* STARTSTR/ENDSTR/STRING (for lazy) */
3201      ret += 3;
3202    }
3203  }
3204  return ret;
3205}
3206
3207static void upb_status_setoom(upb_status *status) {
3208  upb_status_seterrmsg(status, "out of memory");
3209}
3210
3211static bool assign_msg_indices(upb_msgdef *m, upb_status *s) {
3212  /* Sort fields.  upb internally relies on UPB_TYPE_MESSAGE fields having the
3213   * lowest indexes, but we do not publicly guarantee this. */
3214  upb_msg_field_iter j;
3215  int i;
3216  uint32_t selector;
3217  int n = upb_msgdef_numfields(m);
3218  upb_fielddef **fields;
3219
3220  if (n == 0) {
3221    m->selector_count = UPB_STATIC_SELECTOR_COUNT;
3222    m->submsg_field_count = 0;
3223    return true;
3224  }
3225
3226  fields = upb_gmalloc(n * sizeof(*fields));
3227  if (!fields) {
3228    upb_status_setoom(s);
3229    return false;
3230  }
3231
3232  m->submsg_field_count = 0;
3233  for(i = 0, upb_msg_field_begin(&j, m);
3234      !upb_msg_field_done(&j);
3235      upb_msg_field_next(&j), i++) {
3236    upb_fielddef *f = upb_msg_iter_field(&j);
3237    UPB_ASSERT(f->msgdef == m);
3238    if (upb_fielddef_issubmsg(f)) {
3239      m->submsg_field_count++;
3240    }
3241    fields[i] = f;
3242  }
3243
3244  qsort(fields, n, sizeof(*fields), cmp_fields);
3245
3246  selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count;
3247  for (i = 0; i < n; i++) {
3248    upb_fielddef *f = fields[i];
3249    f->index_ = i;
3250    f->selector_base = selector + upb_handlers_selectorbaseoffset(f);
3251    selector += upb_handlers_selectorcount(f);
3252  }
3253  m->selector_count = selector;
3254
3255  upb_gfree(fields);
3256  return true;
3257}
3258
3259static bool check_oneofs(upb_msgdef *m, upb_status *s) {
3260  int i;
3261  int first_synthetic = -1;
3262  upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs;
3263
3264  for (i = 0; i < m->oneof_count; i++) {
3265    mutable_oneofs[i].index = i;
3266
3267    if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) {
3268      if (first_synthetic == -1) {
3269        first_synthetic = i;
3270      }
3271    } else {
3272      if (first_synthetic != -1) {
3273        upb_status_seterrf(
3274            s, "Synthetic oneofs must be after all other oneofs: %s",
3275            upb_oneofdef_name(&mutable_oneofs[i]));
3276        return false;
3277      }
3278    }
3279  }
3280
3281  if (first_synthetic == -1) {
3282    m->real_oneof_count = m->oneof_count;
3283  } else {
3284    m->real_oneof_count = first_synthetic;
3285  }
3286
3287  return true;
3288}
3289
3290static void assign_msg_wellknowntype(upb_msgdef *m) {
3291  const char *name = upb_msgdef_fullname(m);
3292  if (name == NULL) {
3293    m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
3294    return;
3295  }
3296  if (!strcmp(name, "google.protobuf.Any")) {
3297    m->well_known_type = UPB_WELLKNOWN_ANY;
3298  } else if (!strcmp(name, "google.protobuf.FieldMask")) {
3299    m->well_known_type = UPB_WELLKNOWN_FIELDMASK;
3300  } else if (!strcmp(name, "google.protobuf.Duration")) {
3301    m->well_known_type = UPB_WELLKNOWN_DURATION;
3302  } else if (!strcmp(name, "google.protobuf.Timestamp")) {
3303    m->well_known_type = UPB_WELLKNOWN_TIMESTAMP;
3304  } else if (!strcmp(name, "google.protobuf.DoubleValue")) {
3305    m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE;
3306  } else if (!strcmp(name, "google.protobuf.FloatValue")) {
3307    m->well_known_type = UPB_WELLKNOWN_FLOATVALUE;
3308  } else if (!strcmp(name, "google.protobuf.Int64Value")) {
3309    m->well_known_type = UPB_WELLKNOWN_INT64VALUE;
3310  } else if (!strcmp(name, "google.protobuf.UInt64Value")) {
3311    m->well_known_type = UPB_WELLKNOWN_UINT64VALUE;
3312  } else if (!strcmp(name, "google.protobuf.Int32Value")) {
3313    m->well_known_type = UPB_WELLKNOWN_INT32VALUE;
3314  } else if (!strcmp(name, "google.protobuf.UInt32Value")) {
3315    m->well_known_type = UPB_WELLKNOWN_UINT32VALUE;
3316  } else if (!strcmp(name, "google.protobuf.BoolValue")) {
3317    m->well_known_type = UPB_WELLKNOWN_BOOLVALUE;
3318  } else if (!strcmp(name, "google.protobuf.StringValue")) {
3319    m->well_known_type = UPB_WELLKNOWN_STRINGVALUE;
3320  } else if (!strcmp(name, "google.protobuf.BytesValue")) {
3321    m->well_known_type = UPB_WELLKNOWN_BYTESVALUE;
3322  } else if (!strcmp(name, "google.protobuf.Value")) {
3323    m->well_known_type = UPB_WELLKNOWN_VALUE;
3324  } else if (!strcmp(name, "google.protobuf.ListValue")) {
3325    m->well_known_type = UPB_WELLKNOWN_LISTVALUE;
3326  } else if (!strcmp(name, "google.protobuf.Struct")) {
3327    m->well_known_type = UPB_WELLKNOWN_STRUCT;
3328  } else {
3329    m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED;
3330  }
3331}
3332
3333
3334/* upb_enumdef ****************************************************************/
3335
3336const char *upb_enumdef_fullname(const upb_enumdef *e) {
3337  return e->full_name;
3338}
3339
3340const char *upb_enumdef_name(const upb_enumdef *e) {
3341  return shortdefname(e->full_name);
3342}
3343
3344const upb_filedef *upb_enumdef_file(const upb_enumdef *e) {
3345  return e->file;
3346}
3347
3348int32_t upb_enumdef_default(const upb_enumdef *e) {
3349  UPB_ASSERT(upb_enumdef_iton(e, e->defaultval));
3350  return e->defaultval;
3351}
3352
3353int upb_enumdef_numvals(const upb_enumdef *e) {
3354  return (int)upb_strtable_count(&e->ntoi);
3355}
3356
3357void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) {
3358  /* We iterate over the ntoi table, to account for duplicate numbers. */
3359  upb_strtable_begin(i, &e->ntoi);
3360}
3361
3362void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); }
3363bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); }
3364
3365bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name,
3366                      size_t len, int32_t *num) {
3367  upb_value v;
3368  if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) {
3369    return false;
3370  }
3371  if (num) *num = upb_value_getint32(v);
3372  return true;
3373}
3374
3375const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) {
3376  upb_value v;
3377  return upb_inttable_lookup32(&def->iton, num, &v) ?
3378      upb_value_getcstr(v) : NULL;
3379}
3380
3381const char *upb_enum_iter_name(upb_enum_iter *iter) {
3382  return upb_strtable_iter_key(iter).data;
3383}
3384
3385int32_t upb_enum_iter_number(upb_enum_iter *iter) {
3386  return upb_value_getint32(upb_strtable_iter_value(iter));
3387}
3388
3389
3390/* upb_fielddef ***************************************************************/
3391
3392const char *upb_fielddef_fullname(const upb_fielddef *f) {
3393  return f->full_name;
3394}
3395
3396upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) {
3397  switch (f->type_) {
3398    case UPB_DESCRIPTOR_TYPE_DOUBLE:
3399      return UPB_TYPE_DOUBLE;
3400    case UPB_DESCRIPTOR_TYPE_FLOAT:
3401      return UPB_TYPE_FLOAT;
3402    case UPB_DESCRIPTOR_TYPE_INT64:
3403    case UPB_DESCRIPTOR_TYPE_SINT64:
3404    case UPB_DESCRIPTOR_TYPE_SFIXED64:
3405      return UPB_TYPE_INT64;
3406    case UPB_DESCRIPTOR_TYPE_INT32:
3407    case UPB_DESCRIPTOR_TYPE_SFIXED32:
3408    case UPB_DESCRIPTOR_TYPE_SINT32:
3409      return UPB_TYPE_INT32;
3410    case UPB_DESCRIPTOR_TYPE_UINT64:
3411    case UPB_DESCRIPTOR_TYPE_FIXED64:
3412      return UPB_TYPE_UINT64;
3413    case UPB_DESCRIPTOR_TYPE_UINT32:
3414    case UPB_DESCRIPTOR_TYPE_FIXED32:
3415      return UPB_TYPE_UINT32;
3416    case UPB_DESCRIPTOR_TYPE_ENUM:
3417      return UPB_TYPE_ENUM;
3418    case UPB_DESCRIPTOR_TYPE_BOOL:
3419      return UPB_TYPE_BOOL;
3420    case UPB_DESCRIPTOR_TYPE_STRING:
3421      return UPB_TYPE_STRING;
3422    case UPB_DESCRIPTOR_TYPE_BYTES:
3423      return UPB_TYPE_BYTES;
3424    case UPB_DESCRIPTOR_TYPE_GROUP:
3425    case UPB_DESCRIPTOR_TYPE_MESSAGE:
3426      return UPB_TYPE_MESSAGE;
3427  }
3428  UPB_UNREACHABLE();
3429}
3430
3431upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) {
3432  return f->type_;
3433}
3434
3435uint32_t upb_fielddef_index(const upb_fielddef *f) {
3436  return f->index_;
3437}
3438
3439upb_label_t upb_fielddef_label(const upb_fielddef *f) {
3440  return f->label_;
3441}
3442
3443uint32_t upb_fielddef_number(const upb_fielddef *f) {
3444  return f->number_;
3445}
3446
3447bool upb_fielddef_isextension(const upb_fielddef *f) {
3448  return f->is_extension_;
3449}
3450
3451bool upb_fielddef_lazy(const upb_fielddef *f) {
3452  return f->lazy_;
3453}
3454
3455bool upb_fielddef_packed(const upb_fielddef *f) {
3456  return f->packed_;
3457}
3458
3459const char *upb_fielddef_name(const upb_fielddef *f) {
3460  return shortdefname(f->full_name);
3461}
3462
3463const char *upb_fielddef_jsonname(const upb_fielddef *f) {
3464  return f->json_name;
3465}
3466
3467uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) {
3468  return f->selector_base;
3469}
3470
3471const upb_filedef *upb_fielddef_file(const upb_fielddef *f) {
3472  return f->file;
3473}
3474
3475const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) {
3476  return f->msgdef;
3477}
3478
3479const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) {
3480  return f->oneof;
3481}
3482
3483const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) {
3484  if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL;
3485  return f->oneof;
3486}
3487
3488static void chkdefaulttype(const upb_fielddef *f, int ctype) {
3489  UPB_UNUSED(f);
3490  UPB_UNUSED(ctype);
3491}
3492
3493int64_t upb_fielddef_defaultint64(const upb_fielddef *f) {
3494  chkdefaulttype(f, UPB_TYPE_INT64);
3495  return f->defaultval.sint;
3496}
3497
3498int32_t upb_fielddef_defaultint32(const upb_fielddef *f) {
3499  chkdefaulttype(f, UPB_TYPE_INT32);
3500  return (int32_t)f->defaultval.sint;
3501}
3502
3503uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) {
3504  chkdefaulttype(f, UPB_TYPE_UINT64);
3505  return f->defaultval.uint;
3506}
3507
3508uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) {
3509  chkdefaulttype(f, UPB_TYPE_UINT32);
3510  return (uint32_t)f->defaultval.uint;
3511}
3512
3513bool upb_fielddef_defaultbool(const upb_fielddef *f) {
3514  chkdefaulttype(f, UPB_TYPE_BOOL);
3515  return f->defaultval.boolean;
3516}
3517
3518float upb_fielddef_defaultfloat(const upb_fielddef *f) {
3519  chkdefaulttype(f, UPB_TYPE_FLOAT);
3520  return f->defaultval.flt;
3521}
3522
3523double upb_fielddef_defaultdouble(const upb_fielddef *f) {
3524  chkdefaulttype(f, UPB_TYPE_DOUBLE);
3525  return f->defaultval.dbl;
3526}
3527
3528const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) {
3529  str_t *str = f->defaultval.str;
3530  UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING ||
3531         upb_fielddef_type(f) == UPB_TYPE_BYTES ||
3532         upb_fielddef_type(f) == UPB_TYPE_ENUM);
3533  if (str) {
3534    if (len) *len = str->len;
3535    return str->str;
3536  } else {
3537    if (len) *len = 0;
3538    return NULL;
3539  }
3540}
3541
3542const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) {
3543  UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_MESSAGE);
3544  return f->sub.msgdef;
3545}
3546
3547const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) {
3548  UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_ENUM);
3549  return f->sub.enumdef;
3550}
3551
3552const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) {
3553  return &f->msgdef->layout->fields[f->layout_index];
3554}
3555
3556bool upb_fielddef_issubmsg(const upb_fielddef *f) {
3557  return upb_fielddef_type(f) == UPB_TYPE_MESSAGE;
3558}
3559
3560bool upb_fielddef_isstring(const upb_fielddef *f) {
3561  return upb_fielddef_type(f) == UPB_TYPE_STRING ||
3562         upb_fielddef_type(f) == UPB_TYPE_BYTES;
3563}
3564
3565bool upb_fielddef_isseq(const upb_fielddef *f) {
3566  return upb_fielddef_label(f) == UPB_LABEL_REPEATED;
3567}
3568
3569bool upb_fielddef_isprimitive(const upb_fielddef *f) {
3570  return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f);
3571}
3572
3573bool upb_fielddef_ismap(const upb_fielddef *f) {
3574  return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) &&
3575         upb_msgdef_mapentry(upb_fielddef_msgsubdef(f));
3576}
3577
3578bool upb_fielddef_hassubdef(const upb_fielddef *f) {
3579  return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM;
3580}
3581
3582bool upb_fielddef_haspresence(const upb_fielddef *f) {
3583  if (upb_fielddef_isseq(f)) return false;
3584  return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) ||
3585         f->file->syntax == UPB_SYNTAX_PROTO2;
3586}
3587
3588static bool between(int32_t x, int32_t low, int32_t high) {
3589  return x >= low && x <= high;
3590}
3591
3592bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); }
3593bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); }
3594bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); }
3595
3596bool upb_fielddef_checkdescriptortype(int32_t type) {
3597  return between(type, 1, 18);
3598}
3599
3600/* upb_msgdef *****************************************************************/
3601
3602const char *upb_msgdef_fullname(const upb_msgdef *m) {
3603  return m->full_name;
3604}
3605
3606const upb_filedef *upb_msgdef_file(const upb_msgdef *m) {
3607  return m->file;
3608}
3609
3610const char *upb_msgdef_name(const upb_msgdef *m) {
3611  return shortdefname(m->full_name);
3612}
3613
3614upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) {
3615  return m->file->syntax;
3616}
3617
3618size_t upb_msgdef_selectorcount(const upb_msgdef *m) {
3619  return m->selector_count;
3620}
3621
3622uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) {
3623  return m->submsg_field_count;
3624}
3625
3626const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) {
3627  upb_value val;
3628  return upb_inttable_lookup32(&m->itof, i, &val) ?
3629      upb_value_getconstptr(val) : NULL;
3630}
3631
3632const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name,
3633                                    size_t len) {
3634  upb_value val;
3635
3636  if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3637    return NULL;
3638  }
3639
3640  return unpack_def(val, UPB_DEFTYPE_FIELD);
3641}
3642
3643const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name,
3644                                    size_t len) {
3645  upb_value val;
3646
3647  if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3648    return NULL;
3649  }
3650
3651  return unpack_def(val, UPB_DEFTYPE_ONEOF);
3652}
3653
3654bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len,
3655                           const upb_fielddef **f, const upb_oneofdef **o) {
3656  upb_value val;
3657
3658  if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3659    return false;
3660  }
3661
3662  *o = unpack_def(val, UPB_DEFTYPE_ONEOF);
3663  *f = unpack_def(val, UPB_DEFTYPE_FIELD);
3664  return *o || *f;  /* False if this was a JSON name. */
3665}
3666
3667const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m,
3668                                              const char *name, size_t len) {
3669  upb_value val;
3670  const upb_fielddef* f;
3671
3672  if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) {
3673    return NULL;
3674  }
3675
3676  f = unpack_def(val, UPB_DEFTYPE_FIELD);
3677  if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME);
3678
3679  return f;
3680}
3681
3682int upb_msgdef_numfields(const upb_msgdef *m) {
3683  return m->field_count;
3684}
3685
3686int upb_msgdef_numoneofs(const upb_msgdef *m) {
3687  return m->oneof_count;
3688}
3689
3690int upb_msgdef_numrealoneofs(const upb_msgdef *m) {
3691  return m->real_oneof_count;
3692}
3693
3694const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) {
3695  return m->layout;
3696}
3697
3698const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) {
3699  if (i >= m->field_count) return NULL;
3700  return &m->fields[i];
3701}
3702
3703bool upb_msgdef_mapentry(const upb_msgdef *m) {
3704  return m->map_entry;
3705}
3706
3707upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) {
3708  return m->well_known_type;
3709}
3710
3711bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) {
3712  upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
3713  return type >= UPB_WELLKNOWN_DOUBLEVALUE &&
3714         type <= UPB_WELLKNOWN_UINT32VALUE;
3715}
3716
3717void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) {
3718  upb_inttable_begin(iter, &m->itof);
3719}
3720
3721void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); }
3722
3723bool upb_msg_field_done(const upb_msg_field_iter *iter) {
3724  return upb_inttable_done(iter);
3725}
3726
3727upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) {
3728  return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
3729}
3730
3731void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) {
3732  upb_inttable_iter_setdone(iter);
3733}
3734
3735bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1,
3736                                const upb_msg_field_iter * iter2) {
3737  return upb_inttable_iter_isequal(iter1, iter2);
3738}
3739
3740void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) {
3741  upb_strtable_begin(iter, &m->ntof);
3742  /* We need to skip past any initial fields. */
3743  while (!upb_strtable_done(iter) &&
3744         !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) {
3745    upb_strtable_next(iter);
3746  }
3747}
3748
3749void upb_msg_oneof_next(upb_msg_oneof_iter *iter) {
3750  /* We need to skip past fields to return only oneofs. */
3751  do {
3752    upb_strtable_next(iter);
3753  } while (!upb_strtable_done(iter) &&
3754           !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF));
3755}
3756
3757bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) {
3758  return upb_strtable_done(iter);
3759}
3760
3761const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) {
3762  return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF);
3763}
3764
3765void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) {
3766  upb_strtable_iter_setdone(iter);
3767}
3768
3769bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1,
3770                                const upb_msg_oneof_iter *iter2) {
3771  return upb_strtable_iter_isequal(iter1, iter2);
3772}
3773
3774/* upb_oneofdef ***************************************************************/
3775
3776const char *upb_oneofdef_name(const upb_oneofdef *o) {
3777  return shortdefname(o->full_name);
3778}
3779
3780const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) {
3781  return o->parent;
3782}
3783
3784int upb_oneofdef_numfields(const upb_oneofdef *o) {
3785  return (int)upb_strtable_count(&o->ntof);
3786}
3787
3788uint32_t upb_oneofdef_index(const upb_oneofdef *o) {
3789  return o->index;
3790}
3791
3792bool upb_oneofdef_issynthetic(const upb_oneofdef *o) {
3793  upb_inttable_iter iter;
3794  const upb_fielddef *f;
3795  upb_inttable_begin(&iter, &o->itof);
3796  if (upb_oneofdef_numfields(o) != 1) return false;
3797  f = upb_value_getptr(upb_inttable_iter_value(&iter));
3798  UPB_ASSERT(f);
3799  return f->proto3_optional_;
3800}
3801
3802const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o,
3803                                      const char *name, size_t length) {
3804  upb_value val;
3805  return upb_strtable_lookup2(&o->ntof, name, length, &val) ?
3806      upb_value_getptr(val) : NULL;
3807}
3808
3809const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) {
3810  upb_value val;
3811  return upb_inttable_lookup32(&o->itof, num, &val) ?
3812      upb_value_getptr(val) : NULL;
3813}
3814
3815void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) {
3816  upb_inttable_begin(iter, &o->itof);
3817}
3818
3819void upb_oneof_next(upb_oneof_iter *iter) {
3820  upb_inttable_next(iter);
3821}
3822
3823bool upb_oneof_done(upb_oneof_iter *iter) {
3824  return upb_inttable_done(iter);
3825}
3826
3827upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) {
3828  return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter));
3829}
3830
3831void upb_oneof_iter_setdone(upb_oneof_iter *iter) {
3832  upb_inttable_iter_setdone(iter);
3833}
3834
3835/* Dynamic Layout Generation. *************************************************/
3836
3837static bool is_power_of_two(size_t val) {
3838  return (val & (val - 1)) == 0;
3839}
3840
3841/* Align up to the given power of 2. */
3842static size_t align_up(size_t val, size_t align) {
3843  UPB_ASSERT(is_power_of_two(align));
3844  return (val + align - 1) & ~(align - 1);
3845}
3846
3847static size_t div_round_up(size_t n, size_t d) {
3848  return (n + d - 1) / d;
3849}
3850
3851static size_t upb_msgval_sizeof(upb_fieldtype_t type) {
3852  switch (type) {
3853    case UPB_TYPE_DOUBLE:
3854    case UPB_TYPE_INT64:
3855    case UPB_TYPE_UINT64:
3856      return 8;
3857    case UPB_TYPE_ENUM:
3858    case UPB_TYPE_INT32:
3859    case UPB_TYPE_UINT32:
3860    case UPB_TYPE_FLOAT:
3861      return 4;
3862    case UPB_TYPE_BOOL:
3863      return 1;
3864    case UPB_TYPE_MESSAGE:
3865      return sizeof(void*);
3866    case UPB_TYPE_BYTES:
3867    case UPB_TYPE_STRING:
3868      return sizeof(upb_strview);
3869  }
3870  UPB_UNREACHABLE();
3871}
3872
3873static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) {
3874  if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) {
3875    upb_map_entry ent;
3876    UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v));
3877    return sizeof(ent.k);
3878  } else if (upb_fielddef_isseq(f)) {
3879    return sizeof(void*);
3880  } else {
3881    return upb_msgval_sizeof(upb_fielddef_type(f));
3882  }
3883}
3884
3885static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) {
3886  uint32_t ret;
3887
3888  l->size = align_up(l->size, size);
3889  ret = l->size;
3890  l->size += size;
3891  return ret;
3892}
3893
3894/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc.
3895 * It computes a dynamic layout for all of the fields in |m|. */
3896static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) {
3897  upb_msglayout *l = (upb_msglayout*)m->layout;
3898  upb_msg_field_iter it;
3899  upb_msg_oneof_iter oit;
3900  size_t hasbit;
3901  size_t submsg_count = m->submsg_field_count;
3902  const upb_msglayout **submsgs;
3903  upb_msglayout_field *fields;
3904  upb_alloc *alloc = upb_arena_alloc(symtab->arena);
3905
3906  memset(l, 0, sizeof(*l));
3907
3908  fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields));
3909  submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs));
3910
3911  if ((!fields && upb_msgdef_numfields(m)) ||
3912      (!submsgs && submsg_count)) {
3913    /* OOM. */
3914    return false;
3915  }
3916
3917  l->field_count = upb_msgdef_numfields(m);
3918  l->fields = fields;
3919  l->submsgs = submsgs;
3920
3921  if (upb_msgdef_mapentry(m)) {
3922    /* TODO(haberman): refactor this method so this special case is more
3923     * elegant. */
3924    const upb_fielddef *key = upb_msgdef_itof(m, 1);
3925    const upb_fielddef *val = upb_msgdef_itof(m, 2);
3926    fields[0].number = 1;
3927    fields[1].number = 2;
3928    fields[0].label = UPB_LABEL_OPTIONAL;
3929    fields[1].label = UPB_LABEL_OPTIONAL;
3930    fields[0].presence = 0;
3931    fields[1].presence = 0;
3932    fields[0].descriptortype = upb_fielddef_descriptortype(key);
3933    fields[1].descriptortype = upb_fielddef_descriptortype(val);
3934    fields[0].offset = 0;
3935    fields[1].offset = sizeof(upb_strview);
3936    fields[1].submsg_index = 0;
3937
3938    if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) {
3939      submsgs[0] = upb_fielddef_msgsubdef(val)->layout;
3940    }
3941
3942    l->field_count = 2;
3943    l->size = 2 * sizeof(upb_strview);align_up(l->size, 8);
3944    return true;
3945  }
3946
3947  /* Allocate data offsets in three stages:
3948   *
3949   * 1. hasbits.
3950   * 2. regular fields.
3951   * 3. oneof fields.
3952   *
3953   * OPT: There is a lot of room for optimization here to minimize the size.
3954   */
3955
3956  /* Allocate hasbits and set basic field attributes. */
3957  submsg_count = 0;
3958  for (upb_msg_field_begin(&it, m), hasbit = 0;
3959       !upb_msg_field_done(&it);
3960       upb_msg_field_next(&it)) {
3961    upb_fielddef* f = upb_msg_iter_field(&it);
3962    upb_msglayout_field *field = &fields[upb_fielddef_index(f)];
3963
3964    field->number = upb_fielddef_number(f);
3965    field->descriptortype = upb_fielddef_descriptortype(f);
3966    field->label = upb_fielddef_label(f);
3967
3968    if (upb_fielddef_ismap(f)) {
3969      field->label = _UPB_LABEL_MAP;
3970    } else if (upb_fielddef_packed(f)) {
3971      field->label = _UPB_LABEL_PACKED;
3972    }
3973
3974    /* TODO: we probably should sort the fields by field number to match the
3975     * output of upbc, and to improve search speed for the table parser. */
3976    f->layout_index = f->index_;
3977
3978    if (upb_fielddef_issubmsg(f)) {
3979      const upb_msgdef *subm = upb_fielddef_msgsubdef(f);
3980      field->submsg_index = submsg_count++;
3981      submsgs[field->submsg_index] = subm->layout;
3982    }
3983
3984    if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) {
3985      /* We don't use hasbit 0, so that 0 can indicate "no presence" in the
3986       * table. This wastes one hasbit, but we don't worry about it for now. */
3987      field->presence = ++hasbit;
3988    } else {
3989      field->presence = 0;
3990    }
3991  }
3992
3993  /* Account for space used by hasbits. */
3994  l->size = div_round_up(hasbit, 8);
3995
3996  /* Allocate non-oneof fields. */
3997  for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it);
3998       upb_msg_field_next(&it)) {
3999    const upb_fielddef* f = upb_msg_iter_field(&it);
4000    size_t field_size = upb_msg_fielddefsize(f);
4001    size_t index = upb_fielddef_index(f);
4002
4003    if (upb_fielddef_realcontainingoneof(f)) {
4004      /* Oneofs are handled separately below. */
4005      continue;
4006    }
4007
4008    fields[index].offset = upb_msglayout_place(l, field_size);
4009  }
4010
4011  /* Allocate oneof fields.  Each oneof field consists of a uint32 for the case
4012   * and space for the actual data. */
4013  for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit);
4014       upb_msg_oneof_next(&oit)) {
4015    const upb_oneofdef* o = upb_msg_iter_oneof(&oit);
4016    upb_oneof_iter fit;
4017
4018    if (upb_oneofdef_issynthetic(o)) continue;
4019
4020    size_t case_size = sizeof(uint32_t);  /* Could potentially optimize this. */
4021    size_t field_size = 0;
4022    uint32_t case_offset;
4023    uint32_t data_offset;
4024
4025    /* Calculate field size: the max of all field sizes. */
4026    for (upb_oneof_begin(&fit, o);
4027         !upb_oneof_done(&fit);
4028         upb_oneof_next(&fit)) {
4029      const upb_fielddef* f = upb_oneof_iter_field(&fit);
4030      field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f));
4031    }
4032
4033    /* Align and allocate case offset. */
4034    case_offset = upb_msglayout_place(l, case_size);
4035    data_offset = upb_msglayout_place(l, field_size);
4036
4037    for (upb_oneof_begin(&fit, o);
4038         !upb_oneof_done(&fit);
4039         upb_oneof_next(&fit)) {
4040      const upb_fielddef* f = upb_oneof_iter_field(&fit);
4041      fields[upb_fielddef_index(f)].offset = data_offset;
4042      fields[upb_fielddef_index(f)].presence = ~case_offset;
4043    }
4044  }
4045
4046  /* Size of the entire structure should be a multiple of its greatest
4047   * alignment.  TODO: track overall alignment for real? */
4048  l->size = align_up(l->size, 8);
4049
4050  return true;
4051}
4052
4053/* Code to build defs from descriptor protos. *********************************/
4054
4055/* There is a question of how much validation to do here.  It will be difficult
4056 * to perfectly match the amount of validation performed by proto2.  But since
4057 * this code is used to directly build defs from Ruby (for example) we do need
4058 * to validate important constraints like uniqueness of names and numbers. */
4059
4060#define CHK(x) if (!(x)) { return false; }
4061#define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; }
4062
4063typedef struct {
4064  const upb_symtab *symtab;
4065  upb_filedef *file;              /* File we are building. */
4066  upb_alloc *alloc;               /* Allocate defs here. */
4067  upb_alloc *tmp;                 /* Alloc for addtab and any other tmp data. */
4068  upb_strtable *addtab;           /* full_name -> packed def ptr for new defs */
4069  const upb_msglayout **layouts;  /* NULL if we should build layouts. */
4070  upb_status *status;             /* Record errors here. */
4071} symtab_addctx;
4072
4073static char* strviewdup(const symtab_addctx *ctx, upb_strview view) {
4074  return upb_strdup2(view.data, view.size, ctx->alloc);
4075}
4076
4077static bool streql2(const char *a, size_t n, const char *b) {
4078  return n == strlen(b) && memcmp(a, b, n) == 0;
4079}
4080
4081static bool streql_view(upb_strview view, const char *b) {
4082  return streql2(view.data, view.size, b);
4083}
4084
4085static const char *makefullname(const symtab_addctx *ctx, const char *prefix,
4086                                upb_strview name) {
4087  if (prefix) {
4088    /* ret = prefix + '.' + name; */
4089    size_t n = strlen(prefix);
4090    char *ret = upb_malloc(ctx->alloc, n + name.size + 2);
4091    CHK_OOM(ret);
4092    strcpy(ret, prefix);
4093    ret[n] = '.';
4094    memcpy(&ret[n + 1], name.data, name.size);
4095    ret[n + 1 + name.size] = '\0';
4096    return ret;
4097  } else {
4098    return strviewdup(ctx, name);
4099  }
4100}
4101
4102size_t getjsonname(const char *name, char *buf, size_t len) {
4103  size_t src, dst = 0;
4104  bool ucase_next = false;
4105
4106#define WRITE(byte) \
4107  ++dst; \
4108  if (dst < len) buf[dst - 1] = byte; \
4109  else if (dst == len) buf[dst - 1] = '\0'
4110
4111  if (!name) {
4112    WRITE('\0');
4113    return 0;
4114  }
4115
4116  /* Implement the transformation as described in the spec:
4117   *   1. upper case all letters after an underscore.
4118   *   2. remove all underscores.
4119   */
4120  for (src = 0; name[src]; src++) {
4121    if (name[src] == '_') {
4122      ucase_next = true;
4123      continue;
4124    }
4125
4126    if (ucase_next) {
4127      WRITE(toupper(name[src]));
4128      ucase_next = false;
4129    } else {
4130      WRITE(name[src]);
4131    }
4132  }
4133
4134  WRITE('\0');
4135  return dst;
4136
4137#undef WRITE
4138}
4139
4140static char* makejsonname(const char* name, upb_alloc *alloc) {
4141  size_t size = getjsonname(name, NULL, 0);
4142  char* json_name = upb_malloc(alloc, size);
4143  getjsonname(name, json_name, size);
4144  return json_name;
4145}
4146
4147static bool symtab_add(const symtab_addctx *ctx, const char *name,
4148                       upb_value v) {
4149  upb_value tmp;
4150  if (upb_strtable_lookup(ctx->addtab, name, &tmp) ||
4151      upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) {
4152    upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name);
4153    return false;
4154  }
4155
4156  CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp));
4157  return true;
4158}
4159
4160/* Given a symbol and the base symbol inside which it is defined, find the
4161 * symbol's definition in t. */
4162static bool resolvename(const upb_strtable *t, const upb_fielddef *f,
4163                        const char *base, upb_strview sym,
4164                        upb_deftype_t type, upb_status *status,
4165                        const void **def) {
4166  if(sym.size == 0) return NULL;
4167  if(sym.data[0] == '.') {
4168    /* Symbols starting with '.' are absolute, so we do a single lookup.
4169     * Slice to omit the leading '.' */
4170    upb_value v;
4171    if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) {
4172      return false;
4173    }
4174
4175    *def = unpack_def(v, type);
4176
4177    if (!*def) {
4178      upb_status_seterrf(status,
4179                         "type mismatch when resolving field %s, name %s",
4180                         f->full_name, sym.data);
4181      return false;
4182    }
4183
4184    return true;
4185  } else {
4186    /* Remove components from base until we find an entry or run out.
4187     * TODO: This branch is totally broken, but currently not used. */
4188    (void)base;
4189    UPB_ASSERT(false);
4190    return false;
4191  }
4192}
4193
4194const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f,
4195                           const char *base, upb_strview sym,
4196                           upb_deftype_t type) {
4197  const void *ret;
4198  if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) &&
4199      !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) {
4200    if (upb_ok(ctx->status)) {
4201      upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data);
4202    }
4203    return false;
4204  }
4205  return ret;
4206}
4207
4208static bool create_oneofdef(
4209    const symtab_addctx *ctx, upb_msgdef *m,
4210    const google_protobuf_OneofDescriptorProto *oneof_proto) {
4211  upb_oneofdef *o;
4212  upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto);
4213  upb_value v;
4214
4215  o = (upb_oneofdef*)&m->oneofs[m->oneof_count++];
4216  o->parent = m;
4217  o->full_name = makefullname(ctx, m->full_name, name);
4218
4219  v = pack_def(o, UPB_DEFTYPE_ONEOF);
4220  CHK_OOM(symtab_add(ctx, o->full_name, v));
4221  CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc));
4222
4223  CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4224  CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4225
4226  return true;
4227}
4228
4229static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len,
4230                          upb_fielddef *f) {
4231  char *end;
4232  char nullz[64];
4233  errno = 0;
4234
4235  switch (upb_fielddef_type(f)) {
4236    case UPB_TYPE_INT32:
4237    case UPB_TYPE_INT64:
4238    case UPB_TYPE_UINT32:
4239    case UPB_TYPE_UINT64:
4240    case UPB_TYPE_DOUBLE:
4241    case UPB_TYPE_FLOAT:
4242      /* Standard C number parsing functions expect null-terminated strings. */
4243      if (len >= sizeof(nullz) - 1) {
4244        return false;
4245      }
4246      memcpy(nullz, str, len);
4247      nullz[len] = '\0';
4248      str = nullz;
4249      break;
4250    default:
4251      break;
4252  }
4253
4254  switch (upb_fielddef_type(f)) {
4255    case UPB_TYPE_INT32: {
4256      long val = strtol(str, &end, 0);
4257      CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end);
4258      f->defaultval.sint = val;
4259      break;
4260    }
4261    case UPB_TYPE_ENUM: {
4262      const upb_enumdef *e = f->sub.enumdef;
4263      int32_t val;
4264      CHK(upb_enumdef_ntoi(e, str, len, &val));
4265      f->defaultval.sint = val;
4266      break;
4267    }
4268    case UPB_TYPE_INT64: {
4269      /* XXX: Need to write our own strtoll, since it's not available in c89. */
4270      int64_t val = strtol(str, &end, 0);
4271      CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end);
4272      f->defaultval.sint = val;
4273      break;
4274    }
4275    case UPB_TYPE_UINT32: {
4276      unsigned long val = strtoul(str, &end, 0);
4277      CHK(val <= UINT32_MAX && errno != ERANGE && !*end);
4278      f->defaultval.uint = val;
4279      break;
4280    }
4281    case UPB_TYPE_UINT64: {
4282      /* XXX: Need to write our own strtoull, since it's not available in c89. */
4283      uint64_t val = strtoul(str, &end, 0);
4284      CHK(val <= UINT64_MAX && errno != ERANGE && !*end);
4285      f->defaultval.uint = val;
4286      break;
4287    }
4288    case UPB_TYPE_DOUBLE: {
4289      double val = strtod(str, &end);
4290      CHK(errno != ERANGE && !*end);
4291      f->defaultval.dbl = val;
4292      break;
4293    }
4294    case UPB_TYPE_FLOAT: {
4295      /* XXX: Need to write our own strtof, since it's not available in c89. */
4296      float val = strtod(str, &end);
4297      CHK(errno != ERANGE && !*end);
4298      f->defaultval.flt = val;
4299      break;
4300    }
4301    case UPB_TYPE_BOOL: {
4302      if (streql2(str, len, "false")) {
4303        f->defaultval.boolean = false;
4304      } else if (streql2(str, len, "true")) {
4305        f->defaultval.boolean = true;
4306      } else {
4307        return false;
4308      }
4309      break;
4310    }
4311    case UPB_TYPE_STRING:
4312      f->defaultval.str = newstr(ctx->alloc, str, len);
4313      break;
4314    case UPB_TYPE_BYTES:
4315      /* XXX: need to interpret the C-escaped value. */
4316      f->defaultval.str = newstr(ctx->alloc, str, len);
4317      break;
4318    case UPB_TYPE_MESSAGE:
4319      /* Should not have a default value. */
4320      return false;
4321  }
4322  return true;
4323}
4324
4325static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) {
4326  switch (upb_fielddef_type(f)) {
4327    case UPB_TYPE_INT32:
4328    case UPB_TYPE_INT64:
4329    case UPB_TYPE_ENUM:
4330      f->defaultval.sint = 0;
4331      break;
4332    case UPB_TYPE_UINT64:
4333    case UPB_TYPE_UINT32:
4334      f->defaultval.uint = 0;
4335      break;
4336    case UPB_TYPE_DOUBLE:
4337    case UPB_TYPE_FLOAT:
4338      f->defaultval.dbl = 0;
4339      break;
4340    case UPB_TYPE_STRING:
4341    case UPB_TYPE_BYTES:
4342      f->defaultval.str = newstr(ctx->alloc, NULL, 0);
4343      break;
4344    case UPB_TYPE_BOOL:
4345      f->defaultval.boolean = false;
4346      break;
4347    case UPB_TYPE_MESSAGE:
4348      break;
4349  }
4350}
4351
4352static bool create_fielddef(
4353    const symtab_addctx *ctx, const char *prefix, upb_msgdef *m,
4354    const google_protobuf_FieldDescriptorProto *field_proto) {
4355  upb_alloc *alloc = ctx->alloc;
4356  upb_fielddef *f;
4357  const google_protobuf_FieldOptions *options;
4358  upb_strview name;
4359  const char *full_name;
4360  const char *json_name;
4361  const char *shortname;
4362  uint32_t field_number;
4363
4364  if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) {
4365    upb_status_seterrmsg(ctx->status, "field has no name");
4366    return false;
4367  }
4368
4369  name = google_protobuf_FieldDescriptorProto_name(field_proto);
4370  CHK(upb_isident(name, false, ctx->status));
4371  full_name = makefullname(ctx, prefix, name);
4372  shortname = shortdefname(full_name);
4373
4374  if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) {
4375    json_name = strviewdup(
4376        ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto));
4377  } else {
4378    json_name = makejsonname(shortname, ctx->alloc);
4379  }
4380
4381  field_number = google_protobuf_FieldDescriptorProto_number(field_proto);
4382
4383  if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) {
4384    upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number);
4385    return false;
4386  }
4387
4388  if (m) {
4389    /* direct message field. */
4390    upb_value v, field_v, json_v;
4391    size_t json_size;
4392
4393    f = (upb_fielddef*)&m->fields[m->field_count++];
4394    f->msgdef = m;
4395    f->is_extension_ = false;
4396
4397    if (upb_strtable_lookup(&m->ntof, shortname, NULL)) {
4398      upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname);
4399      return false;
4400    }
4401
4402    if (upb_strtable_lookup(&m->ntof, json_name, NULL)) {
4403      upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name);
4404      return false;
4405    }
4406
4407    if (upb_inttable_lookup(&m->itof, field_number, NULL)) {
4408      upb_status_seterrf(ctx->status, "duplicate field number (%u)",
4409                         field_number);
4410      return false;
4411    }
4412
4413    field_v = pack_def(f, UPB_DEFTYPE_FIELD);
4414    json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME);
4415    v = upb_value_constptr(f);
4416    json_size = strlen(json_name);
4417
4418    CHK_OOM(
4419        upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc));
4420    CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc));
4421
4422    if (strcmp(shortname, json_name) != 0) {
4423      upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc);
4424    }
4425
4426    if (ctx->layouts) {
4427      const upb_msglayout_field *fields = m->layout->fields;
4428      int count = m->layout->field_count;
4429      bool found = false;
4430      int i;
4431      for (i = 0; i < count; i++) {
4432        if (fields[i].number == field_number) {
4433          f->layout_index = i;
4434          found = true;
4435          break;
4436        }
4437      }
4438      UPB_ASSERT(found);
4439    }
4440  } else {
4441    /* extension field. */
4442    f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++];
4443    f->is_extension_ = true;
4444    CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD)));
4445  }
4446
4447  f->full_name = full_name;
4448  f->json_name = json_name;
4449  f->file = ctx->file;
4450  f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto);
4451  f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto);
4452  f->number_ = field_number;
4453  f->oneof = NULL;
4454  f->proto3_optional_ =
4455      google_protobuf_FieldDescriptorProto_proto3_optional(field_proto);
4456
4457  /* We can't resolve the subdef or (in the case of extensions) the containing
4458   * message yet, because it may not have been defined yet.  We stash a pointer
4459   * to the field_proto until later when we can properly resolve it. */
4460  f->sub.unresolved = field_proto;
4461
4462  if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) {
4463    upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)",
4464                       f->full_name);
4465    return false;
4466  }
4467
4468  if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) {
4469    int oneof_index =
4470        google_protobuf_FieldDescriptorProto_oneof_index(field_proto);
4471    upb_oneofdef *oneof;
4472    upb_value v = upb_value_constptr(f);
4473
4474    if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) {
4475      upb_status_seterrf(ctx->status,
4476                         "fields in oneof must have OPTIONAL label (%s)",
4477                         f->full_name);
4478      return false;
4479    }
4480
4481    if (!m) {
4482      upb_status_seterrf(ctx->status,
4483                         "oneof_index provided for extension field (%s)",
4484                         f->full_name);
4485      return false;
4486    }
4487
4488    if (oneof_index >= m->oneof_count) {
4489      upb_status_seterrf(ctx->status, "oneof_index out of range (%s)",
4490                         f->full_name);
4491      return false;
4492    }
4493
4494    oneof = (upb_oneofdef*)&m->oneofs[oneof_index];
4495    f->oneof = oneof;
4496
4497    CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc));
4498    CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc));
4499  } else {
4500    f->oneof = NULL;
4501  }
4502
4503  if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) {
4504    options = google_protobuf_FieldDescriptorProto_options(field_proto);
4505    f->lazy_ = google_protobuf_FieldOptions_lazy(options);
4506    f->packed_ = google_protobuf_FieldOptions_packed(options);
4507  } else {
4508    f->lazy_ = false;
4509    f->packed_ = false;
4510  }
4511
4512  return true;
4513}
4514
4515static bool create_enumdef(
4516    const symtab_addctx *ctx, const char *prefix,
4517    const google_protobuf_EnumDescriptorProto *enum_proto) {
4518  upb_enumdef *e;
4519  const google_protobuf_EnumValueDescriptorProto *const *values;
4520  upb_strview name;
4521  size_t i, n;
4522
4523  name = google_protobuf_EnumDescriptorProto_name(enum_proto);
4524  CHK(upb_isident(name, false, ctx->status));
4525
4526  e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++];
4527  e->full_name = makefullname(ctx, prefix, name);
4528  CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM)));
4529
4530  CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc));
4531  CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc));
4532
4533  e->file = ctx->file;
4534  e->defaultval = 0;
4535
4536  values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n);
4537
4538  if (n == 0) {
4539    upb_status_seterrf(ctx->status,
4540                       "enums must contain at least one value (%s)",
4541                       e->full_name);
4542    return false;
4543  }
4544
4545  for (i = 0; i < n; i++) {
4546    const google_protobuf_EnumValueDescriptorProto *value = values[i];
4547    upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value);
4548    char *name2 = strviewdup(ctx, name);
4549    int32_t num = google_protobuf_EnumValueDescriptorProto_number(value);
4550    upb_value v = upb_value_int32(num);
4551
4552    if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) {
4553      upb_status_seterrf(ctx->status,
4554                         "for proto3, the first enum value must be zero (%s)",
4555                         e->full_name);
4556      return false;
4557    }
4558
4559    if (upb_strtable_lookup(&e->ntoi, name2, NULL)) {
4560      upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2);
4561      return false;
4562    }
4563
4564    CHK_OOM(name2)
4565    CHK_OOM(
4566        upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc));
4567
4568    if (!upb_inttable_lookup(&e->iton, num, NULL)) {
4569      upb_value v = upb_value_cstr(name2);
4570      CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc));
4571    }
4572  }
4573
4574  upb_inttable_compact2(&e->iton, ctx->alloc);
4575
4576  return true;
4577}
4578
4579static bool create_msgdef(symtab_addctx *ctx, const char *prefix,
4580                          const google_protobuf_DescriptorProto *msg_proto) {
4581  upb_msgdef *m;
4582  const google_protobuf_MessageOptions *options;
4583  const google_protobuf_OneofDescriptorProto *const *oneofs;
4584  const google_protobuf_FieldDescriptorProto *const *fields;
4585  const google_protobuf_EnumDescriptorProto *const *enums;
4586  const google_protobuf_DescriptorProto *const *msgs;
4587  size_t i, n;
4588  upb_strview name;
4589
4590  name = google_protobuf_DescriptorProto_name(msg_proto);
4591  CHK(upb_isident(name, false, ctx->status));
4592
4593  m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++];
4594  m->full_name = makefullname(ctx, prefix, name);
4595  CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG)));
4596
4597  CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4598  CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc));
4599
4600  m->file = ctx->file;
4601  m->map_entry = false;
4602
4603  options = google_protobuf_DescriptorProto_options(msg_proto);
4604
4605  if (options) {
4606    m->map_entry = google_protobuf_MessageOptions_map_entry(options);
4607  }
4608
4609  if (ctx->layouts) {
4610    m->layout = *ctx->layouts;
4611    ctx->layouts++;
4612  } else {
4613    /* Allocate now (to allow cross-linking), populate later. */
4614    m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout));
4615  }
4616
4617  oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n);
4618  m->oneof_count = 0;
4619  m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n);
4620  for (i = 0; i < n; i++) {
4621    CHK(create_oneofdef(ctx, m, oneofs[i]));
4622  }
4623
4624  fields = google_protobuf_DescriptorProto_field(msg_proto, &n);
4625  m->field_count = 0;
4626  m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n);
4627  for (i = 0; i < n; i++) {
4628    CHK(create_fielddef(ctx, m->full_name, m, fields[i]));
4629  }
4630
4631  CHK(assign_msg_indices(m, ctx->status));
4632  CHK(check_oneofs(m, ctx->status));
4633  assign_msg_wellknowntype(m);
4634  upb_inttable_compact2(&m->itof, ctx->alloc);
4635
4636  /* This message is built.  Now build nested messages and enums. */
4637
4638  enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
4639  for (i = 0; i < n; i++) {
4640    CHK(create_enumdef(ctx, m->full_name, enums[i]));
4641  }
4642
4643  msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
4644  for (i = 0; i < n; i++) {
4645    CHK(create_msgdef(ctx, m->full_name, msgs[i]));
4646  }
4647
4648  return true;
4649}
4650
4651typedef struct {
4652  int msg_count;
4653  int enum_count;
4654  int ext_count;
4655} decl_counts;
4656
4657static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto,
4658                               decl_counts *counts) {
4659  const google_protobuf_DescriptorProto *const *msgs;
4660  size_t i, n;
4661
4662  counts->msg_count++;
4663
4664  msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n);
4665  for (i = 0; i < n; i++) {
4666    count_types_in_msg(msgs[i], counts);
4667  }
4668
4669  google_protobuf_DescriptorProto_enum_type(msg_proto, &n);
4670  counts->enum_count += n;
4671
4672  google_protobuf_DescriptorProto_extension(msg_proto, &n);
4673  counts->ext_count += n;
4674}
4675
4676static void count_types_in_file(
4677    const google_protobuf_FileDescriptorProto *file_proto,
4678    decl_counts *counts) {
4679  const google_protobuf_DescriptorProto *const *msgs;
4680  size_t i, n;
4681
4682  msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
4683  for (i = 0; i < n; i++) {
4684    count_types_in_msg(msgs[i], counts);
4685  }
4686
4687  google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
4688  counts->enum_count += n;
4689
4690  google_protobuf_FileDescriptorProto_extension(file_proto, &n);
4691  counts->ext_count += n;
4692}
4693
4694static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix,
4695                             upb_fielddef *f) {
4696  upb_strview name;
4697  const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved;
4698
4699  if (f->is_extension_) {
4700    if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) {
4701      upb_status_seterrf(ctx->status,
4702                         "extension for field '%s' had no extendee",
4703                         f->full_name);
4704      return false;
4705    }
4706
4707    name = google_protobuf_FieldDescriptorProto_extendee(field_proto);
4708    f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
4709    CHK(f->msgdef);
4710  }
4711
4712  if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) &&
4713      !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) {
4714    upb_status_seterrf(ctx->status, "field '%s' is missing type name",
4715                       f->full_name);
4716    return false;
4717  }
4718
4719  name = google_protobuf_FieldDescriptorProto_type_name(field_proto);
4720
4721  if (upb_fielddef_issubmsg(f)) {
4722    f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG);
4723    CHK(f->sub.msgdef);
4724  } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) {
4725    f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM);
4726    CHK(f->sub.enumdef);
4727  }
4728
4729  /* Have to delay resolving of the default value until now because of the enum
4730   * case, since enum defaults are specified with a label. */
4731  if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) {
4732    upb_strview defaultval =
4733        google_protobuf_FieldDescriptorProto_default_value(field_proto);
4734
4735    if (f->file->syntax == UPB_SYNTAX_PROTO3) {
4736      upb_status_seterrf(ctx->status,
4737                         "proto3 fields cannot have explicit defaults (%s)",
4738                         f->full_name);
4739      return false;
4740    }
4741
4742    if (upb_fielddef_issubmsg(f)) {
4743      upb_status_seterrf(ctx->status,
4744                         "message fields cannot have explicit defaults (%s)",
4745                         f->full_name);
4746      return false;
4747    }
4748
4749    if (!parse_default(ctx, defaultval.data, defaultval.size, f)) {
4750      upb_status_seterrf(ctx->status,
4751                         "couldn't parse default '" UPB_STRVIEW_FORMAT
4752                         "' for field (%s)",
4753                         UPB_STRVIEW_ARGS(defaultval), f->full_name);
4754      return false;
4755    }
4756  } else {
4757    set_default_default(ctx, f);
4758  }
4759
4760  return true;
4761}
4762
4763static bool build_filedef(
4764    symtab_addctx *ctx, upb_filedef *file,
4765    const google_protobuf_FileDescriptorProto *file_proto) {
4766  upb_alloc *alloc = ctx->alloc;
4767  const google_protobuf_FileOptions *file_options_proto;
4768  const google_protobuf_DescriptorProto *const *msgs;
4769  const google_protobuf_EnumDescriptorProto *const *enums;
4770  const google_protobuf_FieldDescriptorProto *const *exts;
4771  const upb_strview* strs;
4772  size_t i, n;
4773  decl_counts counts = {0};
4774
4775  count_types_in_file(file_proto, &counts);
4776
4777  file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count);
4778  file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count);
4779  file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count);
4780
4781  CHK_OOM(counts.msg_count == 0 || file->msgs);
4782  CHK_OOM(counts.enum_count == 0 || file->enums);
4783  CHK_OOM(counts.ext_count == 0 || file->exts);
4784
4785  /* We increment these as defs are added. */
4786  file->msg_count = 0;
4787  file->enum_count = 0;
4788  file->ext_count = 0;
4789
4790  if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) {
4791    upb_status_seterrmsg(ctx->status, "File has no name");
4792    return false;
4793  }
4794
4795  file->name =
4796      strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto));
4797  file->phpprefix = NULL;
4798  file->phpnamespace = NULL;
4799
4800  if (google_protobuf_FileDescriptorProto_has_package(file_proto)) {
4801    upb_strview package =
4802        google_protobuf_FileDescriptorProto_package(file_proto);
4803    CHK(upb_isident(package, true, ctx->status));
4804    file->package = strviewdup(ctx, package);
4805  } else {
4806    file->package = NULL;
4807  }
4808
4809  if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) {
4810    upb_strview syntax =
4811        google_protobuf_FileDescriptorProto_syntax(file_proto);
4812
4813    if (streql_view(syntax, "proto2")) {
4814      file->syntax = UPB_SYNTAX_PROTO2;
4815    } else if (streql_view(syntax, "proto3")) {
4816      file->syntax = UPB_SYNTAX_PROTO3;
4817    } else {
4818      upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'",
4819                         UPB_STRVIEW_ARGS(syntax));
4820      return false;
4821    }
4822  } else {
4823    file->syntax = UPB_SYNTAX_PROTO2;
4824  }
4825
4826  /* Read options. */
4827  file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto);
4828  if (file_options_proto) {
4829    if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) {
4830      file->phpprefix = strviewdup(
4831          ctx,
4832          google_protobuf_FileOptions_php_class_prefix(file_options_proto));
4833    }
4834    if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) {
4835      file->phpnamespace = strviewdup(
4836          ctx, google_protobuf_FileOptions_php_namespace(file_options_proto));
4837    }
4838  }
4839
4840  /* Verify dependencies. */
4841  strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n);
4842  file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ;
4843  CHK_OOM(n == 0 || file->deps);
4844
4845  for (i = 0; i < n; i++) {
4846    upb_strview dep_name = strs[i];
4847    upb_value v;
4848    if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data,
4849                              dep_name.size, &v)) {
4850      upb_status_seterrf(ctx->status,
4851                         "Depends on file '" UPB_STRVIEW_FORMAT
4852                         "', but it has not been loaded",
4853                         UPB_STRVIEW_ARGS(dep_name));
4854      return false;
4855    }
4856    file->deps[i] = upb_value_getconstptr(v);
4857  }
4858
4859  /* Create messages. */
4860  msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n);
4861  for (i = 0; i < n; i++) {
4862    CHK(create_msgdef(ctx, file->package, msgs[i]));
4863  }
4864
4865  /* Create enums. */
4866  enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n);
4867  for (i = 0; i < n; i++) {
4868    CHK(create_enumdef(ctx, file->package, enums[i]));
4869  }
4870
4871  /* Create extensions. */
4872  exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n);
4873  file->exts = upb_malloc(alloc, sizeof(*file->exts) * n);
4874  CHK_OOM(n == 0 || file->exts);
4875  for (i = 0; i < n; i++) {
4876    CHK(create_fielddef(ctx, file->package, NULL, exts[i]));
4877  }
4878
4879  /* Now that all names are in the table, build layouts and resolve refs. */
4880  for (i = 0; i < file->ext_count; i++) {
4881    CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i]));
4882  }
4883
4884  for (i = 0; i < file->msg_count; i++) {
4885    const upb_msgdef *m = &file->msgs[i];
4886    int j;
4887    for (j = 0; j < m->field_count; j++) {
4888      CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j]));
4889    }
4890  }
4891
4892  if (!ctx->layouts) {
4893    for (i = 0; i < file->msg_count; i++) {
4894      const upb_msgdef *m = &file->msgs[i];
4895      make_layout(ctx->symtab, m);
4896    }
4897  }
4898
4899  return true;
4900 }
4901
4902static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx,
4903                                 upb_status *status) {
4904  const upb_filedef *file = ctx->file;
4905  upb_alloc *alloc = upb_arena_alloc(s->arena);
4906  upb_strtable_iter iter;
4907
4908  CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name),
4909                               upb_value_constptr(file), alloc));
4910
4911  upb_strtable_begin(&iter, ctx->addtab);
4912  for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) {
4913    upb_strview key = upb_strtable_iter_key(&iter);
4914    upb_value value = upb_strtable_iter_value(&iter);
4915    CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc));
4916  }
4917
4918  return true;
4919}
4920
4921/* upb_filedef ****************************************************************/
4922
4923const char *upb_filedef_name(const upb_filedef *f) {
4924  return f->name;
4925}
4926
4927const char *upb_filedef_package(const upb_filedef *f) {
4928  return f->package;
4929}
4930
4931const char *upb_filedef_phpprefix(const upb_filedef *f) {
4932  return f->phpprefix;
4933}
4934
4935const char *upb_filedef_phpnamespace(const upb_filedef *f) {
4936  return f->phpnamespace;
4937}
4938
4939upb_syntax_t upb_filedef_syntax(const upb_filedef *f) {
4940  return f->syntax;
4941}
4942
4943int upb_filedef_msgcount(const upb_filedef *f) {
4944  return f->msg_count;
4945}
4946
4947int upb_filedef_depcount(const upb_filedef *f) {
4948  return f->dep_count;
4949}
4950
4951int upb_filedef_enumcount(const upb_filedef *f) {
4952  return f->enum_count;
4953}
4954
4955const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) {
4956  return i < 0 || i >= f->dep_count ? NULL : f->deps[i];
4957}
4958
4959const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) {
4960  return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i];
4961}
4962
4963const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) {
4964  return i < 0 || i >= f->enum_count ? NULL : &f->enums[i];
4965}
4966
4967void upb_symtab_free(upb_symtab *s) {
4968  upb_arena_free(s->arena);
4969  upb_gfree(s);
4970}
4971
4972upb_symtab *upb_symtab_new(void) {
4973  upb_symtab *s = upb_gmalloc(sizeof(*s));
4974  upb_alloc *alloc;
4975
4976  if (!s) {
4977    return NULL;
4978  }
4979
4980  s->arena = upb_arena_new();
4981  alloc = upb_arena_alloc(s->arena);
4982
4983  if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) ||
4984      !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) {
4985    upb_arena_free(s->arena);
4986    upb_gfree(s);
4987    s = NULL;
4988  }
4989  return s;
4990}
4991
4992const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) {
4993  upb_value v;
4994  return upb_strtable_lookup(&s->syms, sym, &v) ?
4995      unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
4996}
4997
4998const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym,
4999                                        size_t len) {
5000  upb_value v;
5001  return upb_strtable_lookup2(&s->syms, sym, len, &v) ?
5002      unpack_def(v, UPB_DEFTYPE_MSG) : NULL;
5003}
5004
5005const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) {
5006  upb_value v;
5007  return upb_strtable_lookup(&s->syms, sym, &v) ?
5008      unpack_def(v, UPB_DEFTYPE_ENUM) : NULL;
5009}
5010
5011const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) {
5012  upb_value v;
5013  return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v)
5014                                                  : NULL;
5015}
5016
5017int upb_symtab_filecount(const upb_symtab *s) {
5018  return (int)upb_strtable_count(&s->files);
5019}
5020
5021static const upb_filedef *_upb_symtab_addfile(
5022    upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
5023    const upb_msglayout **layouts, upb_status *status) {
5024  upb_arena *tmparena = upb_arena_new();
5025  upb_strtable addtab;
5026  upb_alloc *alloc = upb_arena_alloc(s->arena);
5027  upb_filedef *file = upb_malloc(alloc, sizeof(*file));
5028  bool ok;
5029  symtab_addctx ctx;
5030
5031  ctx.file = file;
5032  ctx.symtab = s;
5033  ctx.alloc = alloc;
5034  ctx.tmp = upb_arena_alloc(tmparena);
5035  ctx.addtab = &addtab;
5036  ctx.layouts = layouts;
5037  ctx.status = status;
5038
5039  ok = file &&
5040      upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) &&
5041      build_filedef(&ctx, file, file_proto) &&
5042      upb_symtab_addtotabs(s, &ctx, status);
5043
5044  upb_arena_free(tmparena);
5045  return ok ? file : NULL;
5046}
5047
5048const upb_filedef *upb_symtab_addfile(
5049    upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto,
5050    upb_status *status) {
5051  return _upb_symtab_addfile(s, file_proto, NULL, status);
5052}
5053
5054/* Include here since we want most of this file to be stdio-free. */
5055#include <stdio.h>
5056
5057bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) {
5058  /* Since this function should never fail (it would indicate a bug in upb) we
5059   * print errors to stderr instead of returning error status to the user. */
5060  upb_def_init **deps = init->deps;
5061  google_protobuf_FileDescriptorProto *file;
5062  upb_arena *arena;
5063  upb_status status;
5064
5065  upb_status_clear(&status);
5066
5067  if (upb_strtable_lookup(&s->files, init->filename, NULL)) {
5068    return true;
5069  }
5070
5071  arena = upb_arena_new();
5072
5073  for (; *deps; deps++) {
5074    if (!_upb_symtab_loaddefinit(s, *deps)) goto err;
5075  }
5076
5077  file = google_protobuf_FileDescriptorProto_parse(
5078      init->descriptor.data, init->descriptor.size, arena);
5079
5080  if (!file) {
5081    upb_status_seterrf(
5082        &status,
5083        "Failed to parse compiled-in descriptor for file '%s'. This should "
5084        "never happen.",
5085        init->filename);
5086    goto err;
5087  }
5088
5089  if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err;
5090
5091  upb_arena_free(arena);
5092  return true;
5093
5094err:
5095  fprintf(stderr, "Error loading compiled-in descriptor: %s\n",
5096          upb_status_errmsg(&status));
5097  upb_arena_free(arena);
5098  return false;
5099}
5100
5101#undef CHK
5102#undef CHK_OOM
5103
5104
5105#include <string.h>
5106
5107
5108static char field_size[] = {
5109  0,/* 0 */
5110  8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */
5111  4, /* UPB_DESCRIPTOR_TYPE_FLOAT */
5112  8, /* UPB_DESCRIPTOR_TYPE_INT64 */
5113  8, /* UPB_DESCRIPTOR_TYPE_UINT64 */
5114  4, /* UPB_DESCRIPTOR_TYPE_INT32 */
5115  8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */
5116  4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */
5117  1, /* UPB_DESCRIPTOR_TYPE_BOOL */
5118  sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */
5119  sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */
5120  sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */
5121  sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */
5122  4, /* UPB_DESCRIPTOR_TYPE_UINT32 */
5123  4, /* UPB_DESCRIPTOR_TYPE_ENUM */
5124  4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */
5125  8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */
5126  4, /* UPB_DESCRIPTOR_TYPE_SINT32 */
5127  8, /* UPB_DESCRIPTOR_TYPE_SINT64 */
5128};
5129
5130/* Strings/bytes are special-cased in maps. */
5131static char _upb_fieldtype_to_mapsize[12] = {
5132  0,
5133  1,  /* UPB_TYPE_BOOL */
5134  4,  /* UPB_TYPE_FLOAT */
5135  4,  /* UPB_TYPE_INT32 */
5136  4,  /* UPB_TYPE_UINT32 */
5137  4,  /* UPB_TYPE_ENUM */
5138  sizeof(void*),  /* UPB_TYPE_MESSAGE */
5139  8,  /* UPB_TYPE_DOUBLE */
5140  8,  /* UPB_TYPE_INT64 */
5141  8,  /* UPB_TYPE_UINT64 */
5142  0,  /* UPB_TYPE_STRING */
5143  0,  /* UPB_TYPE_BYTES */
5144};
5145
5146/** upb_msg *******************************************************************/
5147
5148upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) {
5149  return _upb_msg_new(upb_msgdef_layout(m), a);
5150}
5151
5152static bool in_oneof(const upb_msglayout_field *field) {
5153  return field->presence < 0;
5154}
5155
5156static uint32_t *oneofcase(const upb_msg *msg,
5157                           const upb_msglayout_field *field) {
5158  UPB_ASSERT(in_oneof(field));
5159  return UPB_PTR_AT(msg, -field->presence, uint32_t);
5160}
5161
5162static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) {
5163  const upb_msglayout_field *field = upb_fielddef_layout(f);
5164  const char *mem = UPB_PTR_AT(msg, field->offset, char);
5165  upb_msgval val = {0};
5166  int size = upb_fielddef_isseq(f) ? sizeof(void *)
5167                                   : field_size[field->descriptortype];
5168  memcpy(&val, mem, size);
5169  return val;
5170}
5171
5172bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) {
5173  const upb_msglayout_field *field = upb_fielddef_layout(f);
5174  if (in_oneof(field)) {
5175    return *oneofcase(msg, field) == field->number;
5176  } else if (field->presence > 0) {
5177    uint32_t hasbit = field->presence;
5178    return *UPB_PTR_AT(msg, hasbit / 8, uint8_t) & (1 << (hasbit % 8));
5179  } else {
5180    UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE ||
5181               field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP);
5182    return _upb_msg_getraw(msg, f).msg_val != NULL;
5183  }
5184}
5185
5186bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) {
5187  upb_oneof_iter i;
5188  const upb_fielddef *f;
5189  const upb_msglayout_field *field;
5190
5191  upb_oneof_begin(&i, o);
5192  if (upb_oneof_done(&i)) return false;
5193  f = upb_oneof_iter_field(&i);
5194  field = upb_fielddef_layout(f);
5195  return *oneofcase(msg, field) != 0;
5196}
5197
5198upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) {
5199  if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) {
5200    return _upb_msg_getraw(msg, f);
5201  } else {
5202    /* TODO(haberman): change upb_fielddef to not require this switch(). */
5203    upb_msgval val = {0};
5204    switch (upb_fielddef_type(f)) {
5205      case UPB_TYPE_INT32:
5206      case UPB_TYPE_ENUM:
5207        val.int32_val = upb_fielddef_defaultint32(f);
5208        break;
5209      case UPB_TYPE_INT64:
5210        val.int64_val = upb_fielddef_defaultint64(f);
5211        break;
5212      case UPB_TYPE_UINT32:
5213        val.uint32_val = upb_fielddef_defaultuint32(f);
5214        break;
5215      case UPB_TYPE_UINT64:
5216        val.uint64_val = upb_fielddef_defaultuint64(f);
5217        break;
5218      case UPB_TYPE_FLOAT:
5219        val.float_val = upb_fielddef_defaultfloat(f);
5220        break;
5221      case UPB_TYPE_DOUBLE:
5222        val.double_val = upb_fielddef_defaultdouble(f);
5223        break;
5224      case UPB_TYPE_BOOL:
5225        val.double_val = upb_fielddef_defaultbool(f);
5226        break;
5227      case UPB_TYPE_STRING:
5228      case UPB_TYPE_BYTES:
5229        val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size);
5230        break;
5231      case UPB_TYPE_MESSAGE:
5232        val.msg_val = NULL;
5233        break;
5234    }
5235    return val;
5236  }
5237}
5238
5239upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f,
5240                              upb_arena *a) {
5241  const upb_msglayout_field *field = upb_fielddef_layout(f);
5242  upb_mutmsgval ret;
5243  char *mem = UPB_PTR_AT(msg, field->offset, char);
5244  bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number;
5245
5246  memcpy(&ret, mem, sizeof(void*));
5247
5248  if (a && (!ret.msg || wrong_oneof)) {
5249    if (upb_fielddef_ismap(f)) {
5250      const upb_msgdef *entry = upb_fielddef_msgsubdef(f);
5251      const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY);
5252      const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE);
5253      ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value));
5254    } else if (upb_fielddef_isseq(f)) {
5255      ret.array = upb_array_new(a, upb_fielddef_type(f));
5256    } else {
5257      UPB_ASSERT(upb_fielddef_issubmsg(f));
5258      ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a);
5259    }
5260
5261    memcpy(mem, &ret, sizeof(void*));
5262
5263    if (wrong_oneof) {
5264      *oneofcase(msg, field) = field->number;
5265    }
5266  }
5267  return ret;
5268}
5269
5270void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val,
5271                 upb_arena *a) {
5272  const upb_msglayout_field *field = upb_fielddef_layout(f);
5273  char *mem = UPB_PTR_AT(msg, field->offset, char);
5274  int size = upb_fielddef_isseq(f) ? sizeof(void *)
5275                                   : field_size[field->descriptortype];
5276  memcpy(mem, &val, size);
5277  if (in_oneof(field)) {
5278    *oneofcase(msg, field) = field->number;
5279  }
5280}
5281
5282bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m,
5283                  const upb_symtab *ext_pool, const upb_fielddef **out_f,
5284                  upb_msgval *out_val, size_t *iter) {
5285  size_t i = *iter;
5286  const upb_msgval zero = {0};
5287  const upb_fielddef *f;
5288  while ((f = _upb_msgdef_field(m, (int)++i)) != NULL) {
5289    upb_msgval val = _upb_msg_getraw(msg, f);
5290
5291    /* Skip field if unset or empty. */
5292    if (upb_fielddef_haspresence(f)) {
5293      if (!upb_msg_has(msg, f)) continue;
5294    } else {
5295      upb_msgval test = val;
5296      if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) {
5297        /* Clear string pointer, only size matters (ptr could be non-NULL). */
5298        test.str_val.data = NULL;
5299      }
5300      /* Continue if NULL or 0. */
5301      if (memcmp(&test, &zero, sizeof(test)) == 0) continue;
5302
5303      /* Continue on empty array or map. */
5304      if (upb_fielddef_ismap(f)) {
5305        if (upb_map_size(test.map_val) == 0) continue;
5306      } else if (upb_fielddef_isseq(f)) {
5307        if (upb_array_size(test.array_val) == 0) continue;
5308      }
5309    }
5310
5311    *out_val = val;
5312    *out_f = f;
5313    *iter = i;
5314    return true;
5315  }
5316  *iter = i;
5317  return false;
5318}
5319
5320/** upb_array *****************************************************************/
5321
5322upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) {
5323  return _upb_array_new(a, type);
5324}
5325
5326size_t upb_array_size(const upb_array *arr) {
5327  return arr->len;
5328}
5329
5330upb_msgval upb_array_get(const upb_array *arr, size_t i) {
5331  upb_msgval ret;
5332  const char* data = _upb_array_constptr(arr);
5333  int lg2 = arr->data & 7;
5334  UPB_ASSERT(i < arr->len);
5335  memcpy(&ret, data + (i << lg2), 1 << lg2);
5336  return ret;
5337}
5338
5339void upb_array_set(upb_array *arr, size_t i, upb_msgval val) {
5340  char* data = _upb_array_ptr(arr);
5341  int lg2 = arr->data & 7;
5342  UPB_ASSERT(i < arr->len);
5343  memcpy(data + (i << lg2), &val, 1 << lg2);
5344}
5345
5346bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) {
5347  if (!_upb_array_realloc(arr, arr->len + 1, arena)) {
5348    return false;
5349  }
5350  arr->len++;
5351  upb_array_set(arr, arr->len - 1, val);
5352  return true;
5353}
5354
5355/* Resizes the array to the given size, reallocating if necessary, and returns a
5356 * pointer to the new array elements. */
5357bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) {
5358  return _upb_array_realloc(arr, size, arena);
5359}
5360
5361/** upb_map *******************************************************************/
5362
5363upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type,
5364                     upb_fieldtype_t value_type) {
5365  return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type],
5366                      _upb_fieldtype_to_mapsize[value_type]);
5367}
5368
5369size_t upb_map_size(const upb_map *map) {
5370  return _upb_map_size(map);
5371}
5372
5373bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) {
5374  return _upb_map_get(map, &key, map->key_size, val, map->val_size);
5375}
5376
5377bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val,
5378                 upb_arena *arena) {
5379  return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena);
5380}
5381
5382bool upb_map_delete(upb_map *map, upb_msgval key) {
5383  return _upb_map_delete(map, &key, map->key_size);
5384}
5385
5386bool upb_mapiter_next(const upb_map *map, size_t *iter) {
5387  return _upb_map_next(map, iter);
5388}
5389
5390/* Returns the key and value for this entry of the map. */
5391upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) {
5392  upb_strtable_iter i;
5393  upb_msgval ret;
5394  i.t = &map->table;
5395  i.index = iter;
5396  _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size);
5397  return ret;
5398}
5399
5400upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) {
5401  upb_strtable_iter i;
5402  upb_msgval ret;
5403  i.t = &map->table;
5404  i.index = iter;
5405  _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size);
5406  return ret;
5407}
5408
5409/* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */
5410/*
5411** TODO(haberman): it's unclear whether a lot of the consistency checks should
5412** UPB_ASSERT() or return false.
5413*/
5414
5415
5416#include <string.h>
5417
5418
5419
5420struct upb_handlers {
5421  upb_handlercache *cache;
5422  const upb_msgdef *msg;
5423  const upb_handlers **sub;
5424  const void *top_closure_type;
5425  upb_handlers_tabent table[1];  /* Dynamically-sized field handler array. */
5426};
5427
5428static void *upb_calloc(upb_arena *arena, size_t size) {
5429  void *mem = upb_malloc(upb_arena_alloc(arena), size);
5430  if (mem) {
5431    memset(mem, 0, size);
5432  }
5433  return mem;
5434}
5435
5436/* Defined for the sole purpose of having a unique pointer value for
5437 * UPB_NO_CLOSURE. */
5438char _upb_noclosure;
5439
5440/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the
5441 * subhandlers for this submessage field. */
5442#define SUBH(h, selector) (h->sub[selector])
5443
5444/* The selector for a submessage field is the field index. */
5445#define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f))
5446
5447static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f,
5448                         upb_handlertype_t type) {
5449  upb_selector_t sel;
5450  bool ok;
5451
5452  ok = upb_handlers_getselector(f, type, &sel);
5453
5454  UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f));
5455  UPB_ASSERT(ok);
5456
5457  return sel;
5458}
5459
5460static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f,
5461                             upb_handlertype_t type) {
5462  int32_t sel = trygetsel(h, f, type);
5463  UPB_ASSERT(sel >= 0);
5464  return sel;
5465}
5466
5467static const void **returntype(upb_handlers *h, const upb_fielddef *f,
5468                               upb_handlertype_t type) {
5469  return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type;
5470}
5471
5472static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f,
5473                  upb_handlertype_t type, upb_func *func,
5474                  const upb_handlerattr *attr) {
5475  upb_handlerattr set_attr = UPB_HANDLERATTR_INIT;
5476  const void *closure_type;
5477  const void **context_closure_type;
5478
5479  UPB_ASSERT(!h->table[sel].func);
5480
5481  if (attr) {
5482    set_attr = *attr;
5483  }
5484
5485  /* Check that the given closure type matches the closure type that has been
5486   * established for this context (if any). */
5487  closure_type = set_attr.closure_type;
5488
5489  if (type == UPB_HANDLER_STRING) {
5490    context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR);
5491  } else if (f && upb_fielddef_isseq(f) &&
5492             type != UPB_HANDLER_STARTSEQ &&
5493             type != UPB_HANDLER_ENDSEQ) {
5494    context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ);
5495  } else {
5496    context_closure_type = &h->top_closure_type;
5497  }
5498
5499  if (closure_type && *context_closure_type &&
5500      closure_type != *context_closure_type) {
5501    return false;
5502  }
5503
5504  if (closure_type)
5505    *context_closure_type = closure_type;
5506
5507  /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer
5508   * matches any pre-existing expectations about what type is expected. */
5509  if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) {
5510    const void *return_type = set_attr.return_closure_type;
5511    const void *table_return_type = h->table[sel].attr.return_closure_type;
5512    if (return_type && table_return_type && return_type != table_return_type) {
5513      return false;
5514    }
5515
5516    if (table_return_type && !return_type) {
5517      set_attr.return_closure_type = table_return_type;
5518    }
5519  }
5520
5521  h->table[sel].func = (upb_func*)func;
5522  h->table[sel].attr = set_attr;
5523  return true;
5524}
5525
5526/* Returns the effective closure type for this handler (which will propagate
5527 * from outer frames if this frame has no START* handler).  Not implemented for
5528 * UPB_HANDLER_STRING at the moment since this is not needed.  Returns NULL is
5529 * the effective closure type is unspecified (either no handler was registered
5530 * to specify it or the handler that was registered did not specify the closure
5531 * type). */
5532const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f,
5533                                   upb_handlertype_t type) {
5534  const void *ret;
5535  upb_selector_t sel;
5536
5537  UPB_ASSERT(type != UPB_HANDLER_STRING);
5538  ret = h->top_closure_type;
5539
5540  if (upb_fielddef_isseq(f) &&
5541      type != UPB_HANDLER_STARTSEQ &&
5542      type != UPB_HANDLER_ENDSEQ &&
5543      h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) {
5544    ret = h->table[sel].attr.return_closure_type;
5545  }
5546
5547  if (type == UPB_HANDLER_STRING &&
5548      h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) {
5549    ret = h->table[sel].attr.return_closure_type;
5550  }
5551
5552  /* The effective type of the submessage; not used yet.
5553   * if (type == SUBMESSAGE &&
5554   *     h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) {
5555   *   ret = h->table[sel].attr.return_closure_type;
5556   * } */
5557
5558  return ret;
5559}
5560
5561/* Checks whether the START* handler specified by f & type is missing even
5562 * though it is required to convert the established type of an outer frame
5563 * ("closure_type") into the established type of an inner frame (represented in
5564 * the return closure type of this handler's attr. */
5565bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type,
5566                upb_status *status) {
5567  const void *closure_type;
5568  const upb_handlerattr *attr;
5569  const void *return_closure_type;
5570
5571  upb_selector_t sel = handlers_getsel(h, f, type);
5572  if (h->table[sel].func) return true;
5573  closure_type = effective_closure_type(h, f, type);
5574  attr = &h->table[sel].attr;
5575  return_closure_type = attr->return_closure_type;
5576  if (closure_type && return_closure_type &&
5577      closure_type != return_closure_type) {
5578    return false;
5579  }
5580  return true;
5581}
5582
5583static upb_handlers *upb_handlers_new(const upb_msgdef *md,
5584                                      upb_handlercache *cache,
5585                                      upb_arena *arena) {
5586  int extra;
5587  upb_handlers *h;
5588
5589  extra =
5590      (int)(sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1));
5591  h = upb_calloc(arena, sizeof(*h) + extra);
5592  if (!h) return NULL;
5593
5594  h->cache = cache;
5595  h->msg = md;
5596
5597  if (upb_msgdef_submsgfieldcount(md) > 0) {
5598    size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub);
5599    h->sub = upb_calloc(arena, bytes);
5600    if (!h->sub) return NULL;
5601  } else {
5602    h->sub = 0;
5603  }
5604
5605  /* calloc() above initialized all handlers to NULL. */
5606  return h;
5607}
5608
5609/* Public interface ***********************************************************/
5610
5611#define SETTER(name, handlerctype, handlertype)                       \
5612  bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \
5613                              handlerctype func,                      \
5614                              const upb_handlerattr *attr) {          \
5615    int32_t sel = trygetsel(h, f, handlertype);                       \
5616    return doset(h, sel, f, handlertype, (upb_func *)func, attr);     \
5617  }
5618
5619SETTER(int32,       upb_int32_handlerfunc*,       UPB_HANDLER_INT32)
5620SETTER(int64,       upb_int64_handlerfunc*,       UPB_HANDLER_INT64)
5621SETTER(uint32,      upb_uint32_handlerfunc*,      UPB_HANDLER_UINT32)
5622SETTER(uint64,      upb_uint64_handlerfunc*,      UPB_HANDLER_UINT64)
5623SETTER(float,       upb_float_handlerfunc*,       UPB_HANDLER_FLOAT)
5624SETTER(double,      upb_double_handlerfunc*,      UPB_HANDLER_DOUBLE)
5625SETTER(bool,        upb_bool_handlerfunc*,        UPB_HANDLER_BOOL)
5626SETTER(startstr,    upb_startstr_handlerfunc*,    UPB_HANDLER_STARTSTR)
5627SETTER(string,      upb_string_handlerfunc*,      UPB_HANDLER_STRING)
5628SETTER(endstr,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSTR)
5629SETTER(startseq,    upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSEQ)
5630SETTER(startsubmsg, upb_startfield_handlerfunc*,  UPB_HANDLER_STARTSUBMSG)
5631SETTER(endsubmsg,   upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSUBMSG)
5632SETTER(endseq,      upb_endfield_handlerfunc*,    UPB_HANDLER_ENDSEQ)
5633
5634#undef SETTER
5635
5636bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func,
5637                             const upb_handlerattr *attr) {
5638  return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32,
5639               (upb_func *)func, attr);
5640}
5641
5642bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func,
5643                              const upb_handlerattr *attr) {
5644  return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
5645               (upb_func *)func, attr);
5646}
5647
5648bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func,
5649                            const upb_handlerattr *attr) {
5650  return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32,
5651               (upb_func *)func, attr);
5652}
5653
5654bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f,
5655                                 const upb_handlers *sub) {
5656  UPB_ASSERT(sub);
5657  UPB_ASSERT(upb_fielddef_issubmsg(f));
5658  if (SUBH_F(h, f)) return false;  /* Can't reset. */
5659  if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) {
5660    return false;
5661  }
5662  SUBH_F(h, f) = sub;
5663  return true;
5664}
5665
5666const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h,
5667                                                const upb_fielddef *f) {
5668  UPB_ASSERT(upb_fielddef_issubmsg(f));
5669  return SUBH_F(h, f);
5670}
5671
5672upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s,
5673                                  const void **handler_data) {
5674  upb_func *ret = (upb_func *)h->table[s].func;
5675  if (ret && handler_data) {
5676    *handler_data = h->table[s].attr.handler_data;
5677  }
5678  return ret;
5679}
5680
5681bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel,
5682                          upb_handlerattr *attr) {
5683  if (!upb_handlers_gethandler(h, sel, NULL))
5684    return false;
5685  *attr = h->table[sel].attr;
5686  return true;
5687}
5688
5689const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h,
5690                                                    upb_selector_t sel) {
5691  /* STARTSUBMSG selector in sel is the field's selector base. */
5692  return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT);
5693}
5694
5695const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; }
5696
5697bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) {
5698  return upb_handlercache_addcleanup(h->cache, p, func);
5699}
5700
5701upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) {
5702  switch (upb_fielddef_type(f)) {
5703    case UPB_TYPE_INT32:
5704    case UPB_TYPE_ENUM: return UPB_HANDLER_INT32;
5705    case UPB_TYPE_INT64: return UPB_HANDLER_INT64;
5706    case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32;
5707    case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64;
5708    case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT;
5709    case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE;
5710    case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL;
5711    default: UPB_ASSERT(false); return -1;  /* Invalid input. */
5712  }
5713}
5714
5715bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type,
5716                              upb_selector_t *s) {
5717  uint32_t selector_base = upb_fielddef_selectorbase(f);
5718  switch (type) {
5719    case UPB_HANDLER_INT32:
5720    case UPB_HANDLER_INT64:
5721    case UPB_HANDLER_UINT32:
5722    case UPB_HANDLER_UINT64:
5723    case UPB_HANDLER_FLOAT:
5724    case UPB_HANDLER_DOUBLE:
5725    case UPB_HANDLER_BOOL:
5726      if (!upb_fielddef_isprimitive(f) ||
5727          upb_handlers_getprimitivehandlertype(f) != type)
5728        return false;
5729      *s = selector_base;
5730      break;
5731    case UPB_HANDLER_STRING:
5732      if (upb_fielddef_isstring(f)) {
5733        *s = selector_base;
5734      } else if (upb_fielddef_lazy(f)) {
5735        *s = selector_base + 3;
5736      } else {
5737        return false;
5738      }
5739      break;
5740    case UPB_HANDLER_STARTSTR:
5741      if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
5742        *s = selector_base + 1;
5743      } else {
5744        return false;
5745      }
5746      break;
5747    case UPB_HANDLER_ENDSTR:
5748      if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) {
5749        *s = selector_base + 2;
5750      } else {
5751        return false;
5752      }
5753      break;
5754    case UPB_HANDLER_STARTSEQ:
5755      if (!upb_fielddef_isseq(f)) return false;
5756      *s = selector_base - 2;
5757      break;
5758    case UPB_HANDLER_ENDSEQ:
5759      if (!upb_fielddef_isseq(f)) return false;
5760      *s = selector_base - 1;
5761      break;
5762    case UPB_HANDLER_STARTSUBMSG:
5763      if (!upb_fielddef_issubmsg(f)) return false;
5764      /* Selectors for STARTSUBMSG are at the beginning of the table so that the
5765       * selector can also be used as an index into the "sub" array of
5766       * subhandlers.  The indexes for the two into these two tables are the
5767       * same, except that in the handler table the static selectors come first. */
5768      *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT;
5769      break;
5770    case UPB_HANDLER_ENDSUBMSG:
5771      if (!upb_fielddef_issubmsg(f)) return false;
5772      *s = selector_base;
5773      break;
5774  }
5775  UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f)));
5776  return true;
5777}
5778
5779/* upb_handlercache ***********************************************************/
5780
5781struct upb_handlercache {
5782  upb_arena *arena;
5783  upb_inttable tab;  /* maps upb_msgdef* -> upb_handlers*. */
5784  upb_handlers_callback *callback;
5785  const void *closure;
5786};
5787
5788const upb_handlers *upb_handlercache_get(upb_handlercache *c,
5789                                         const upb_msgdef *md) {
5790  upb_msg_field_iter i;
5791  upb_value v;
5792  upb_handlers *h;
5793
5794  if (upb_inttable_lookupptr(&c->tab, md, &v)) {
5795    return upb_value_getptr(v);
5796  }
5797
5798  h = upb_handlers_new(md, c, c->arena);
5799  v = upb_value_ptr(h);
5800
5801  if (!h) return NULL;
5802  if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL;
5803
5804  c->callback(c->closure, h);
5805
5806  /* For each submessage field, get or create a handlers object and set it as
5807   * the subhandlers. */
5808  for(upb_msg_field_begin(&i, md);
5809      !upb_msg_field_done(&i);
5810      upb_msg_field_next(&i)) {
5811    upb_fielddef *f = upb_msg_iter_field(&i);
5812
5813    if (upb_fielddef_issubmsg(f)) {
5814      const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
5815      const upb_handlers *sub_mh = upb_handlercache_get(c, subdef);
5816
5817      if (!sub_mh) return NULL;
5818
5819      upb_handlers_setsubhandlers(h, f, sub_mh);
5820    }
5821  }
5822
5823  return h;
5824}
5825
5826
5827upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback,
5828                                       const void *closure) {
5829  upb_handlercache *cache = upb_gmalloc(sizeof(*cache));
5830
5831  if (!cache) return NULL;
5832
5833  cache->arena = upb_arena_new();
5834
5835  cache->callback = callback;
5836  cache->closure = closure;
5837
5838  if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom;
5839
5840  return cache;
5841
5842oom:
5843  upb_gfree(cache);
5844  return NULL;
5845}
5846
5847void upb_handlercache_free(upb_handlercache *cache) {
5848  upb_inttable_uninit(&cache->tab);
5849  upb_arena_free(cache->arena);
5850  upb_gfree(cache);
5851}
5852
5853bool upb_handlercache_addcleanup(upb_handlercache *c, void *p,
5854                                 upb_handlerfree *func) {
5855  return upb_arena_addcleanup(c->arena, p, func);
5856}
5857
5858/* upb_byteshandler ***********************************************************/
5859
5860bool upb_byteshandler_setstartstr(upb_byteshandler *h,
5861                                  upb_startstr_handlerfunc *func, void *d) {
5862  h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func;
5863  h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d;
5864  return true;
5865}
5866
5867bool upb_byteshandler_setstring(upb_byteshandler *h,
5868                                upb_string_handlerfunc *func, void *d) {
5869  h->table[UPB_STRING_SELECTOR].func = (upb_func*)func;
5870  h->table[UPB_STRING_SELECTOR].attr.handler_data = d;
5871  return true;
5872}
5873
5874bool upb_byteshandler_setendstr(upb_byteshandler *h,
5875                                upb_endfield_handlerfunc *func, void *d) {
5876  h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func;
5877  h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d;
5878  return true;
5879}
5880
5881/** Handlers for upb_msg ******************************************************/
5882
5883typedef struct {
5884  size_t offset;
5885  int32_t hasbit;
5886} upb_msg_handlerdata;
5887
5888/* Fallback implementation if the handler is not specialized by the producer. */
5889#define MSG_WRITER(type, ctype)                                               \
5890  bool upb_msg_set ## type (void *c, const void *hd, ctype val) {             \
5891    uint8_t *m = c;                                                           \
5892    const upb_msg_handlerdata *d = hd;                                        \
5893    if (d->hasbit > 0)                                                        \
5894      *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8);                   \
5895    *(ctype*)&m[d->offset] = val;                                             \
5896    return true;                                                              \
5897  }                                                                           \
5898
5899MSG_WRITER(double, double)
5900MSG_WRITER(float,  float)
5901MSG_WRITER(int32,  int32_t)
5902MSG_WRITER(int64,  int64_t)
5903MSG_WRITER(uint32, uint32_t)
5904MSG_WRITER(uint64, uint64_t)
5905MSG_WRITER(bool,   bool)
5906
5907bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f,
5908                              size_t offset, int32_t hasbit) {
5909  upb_handlerattr attr = UPB_HANDLERATTR_INIT;
5910  bool ok;
5911
5912  upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d));
5913  if (!d) return false;
5914  d->offset = offset;
5915  d->hasbit = hasbit;
5916
5917  attr.handler_data = d;
5918  attr.alwaysok = true;
5919  upb_handlers_addcleanup(h, d, upb_gfree);
5920
5921#define TYPE(u, l) \
5922  case UPB_TYPE_##u: \
5923    ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break;
5924
5925  ok = false;
5926
5927  switch (upb_fielddef_type(f)) {
5928    TYPE(INT64,  int64);
5929    TYPE(INT32,  int32);
5930    TYPE(ENUM,   int32);
5931    TYPE(UINT64, uint64);
5932    TYPE(UINT32, uint32);
5933    TYPE(DOUBLE, double);
5934    TYPE(FLOAT,  float);
5935    TYPE(BOOL,   bool);
5936    default: UPB_ASSERT(false); break;
5937  }
5938#undef TYPE
5939
5940  return ok;
5941}
5942
5943bool upb_msg_getscalarhandlerdata(const upb_handlers *h,
5944                                  upb_selector_t s,
5945                                  upb_fieldtype_t *type,
5946                                  size_t *offset,
5947                                  int32_t *hasbit) {
5948  const upb_msg_handlerdata *d;
5949  const void *p;
5950  upb_func *f = upb_handlers_gethandler(h, s, &p);
5951
5952  if ((upb_int64_handlerfunc*)f == upb_msg_setint64) {
5953    *type = UPB_TYPE_INT64;
5954  } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) {
5955    *type = UPB_TYPE_INT32;
5956  } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) {
5957    *type = UPB_TYPE_UINT64;
5958  } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) {
5959    *type = UPB_TYPE_UINT32;
5960  } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) {
5961    *type = UPB_TYPE_DOUBLE;
5962  } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) {
5963    *type = UPB_TYPE_FLOAT;
5964  } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) {
5965    *type = UPB_TYPE_BOOL;
5966  } else {
5967    return false;
5968  }
5969
5970  d = p;
5971  *offset = d->offset;
5972  *hasbit = d->hasbit;
5973  return true;
5974}
5975
5976
5977bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) {
5978  void *subc;
5979  bool ret;
5980  upb_bufhandle handle = UPB_BUFHANDLE_INIT;
5981  handle.buf = buf;
5982  ret = upb_bytessink_start(sink, len, &subc);
5983  if (ret && len != 0) {
5984    ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len);
5985  }
5986  if (ret) {
5987    ret = upb_bytessink_end(sink);
5988  }
5989  return ret;
5990}
5991
5992
5993#ifdef UPB_MSVC_VSNPRINTF
5994/* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and
5995 * vsnprintf. To support them, missing functions are manually implemented
5996 * using the existing secure functions. */
5997int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) {
5998  if (!s) {
5999    return _vscprintf(format, arg);
6000  }
6001  int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg);
6002  if (ret < 0) {
6003	ret = _vscprintf(format, arg);
6004  }
6005  return ret;
6006}
6007
6008int msvc_snprintf(char* s, size_t n, const char* format, ...) {
6009  va_list arg;
6010  va_start(arg, format);
6011  int ret = msvc_vsnprintf(s, n, format, arg);
6012  va_end(arg);
6013  return ret;
6014}
6015#endif
6016/*
6017** protobuf decoder bytecode compiler
6018**
6019** Code to compile a upb::Handlers into bytecode for decoding a protobuf
6020** according to that specific schema and destination handlers.
6021**
6022** Bytecode definition is in decoder.int.h.
6023*/
6024
6025#include <stdarg.h>
6026
6027#ifdef UPB_DUMP_BYTECODE
6028#include <stdio.h>
6029#endif
6030
6031
6032#define MAXLABEL 5
6033#define EMPTYLABEL -1
6034
6035/* upb_pbdecodermethod ********************************************************/
6036
6037static void freemethod(upb_pbdecodermethod *method) {
6038  upb_inttable_uninit(&method->dispatch);
6039  upb_gfree(method);
6040}
6041
6042static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers,
6043                                      mgroup *group) {
6044  upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret));
6045  upb_byteshandler_init(&ret->input_handler_);
6046
6047  ret->group = group;
6048  ret->dest_handlers_ = dest_handlers;
6049  upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64);
6050
6051  return ret;
6052}
6053
6054const upb_handlers *upb_pbdecodermethod_desthandlers(
6055    const upb_pbdecodermethod *m) {
6056  return m->dest_handlers_;
6057}
6058
6059const upb_byteshandler *upb_pbdecodermethod_inputhandler(
6060    const upb_pbdecodermethod *m) {
6061  return &m->input_handler_;
6062}
6063
6064bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) {
6065  return m->is_native_;
6066}
6067
6068
6069/* mgroup *********************************************************************/
6070
6071static void freegroup(mgroup *g) {
6072  upb_inttable_iter i;
6073
6074  upb_inttable_begin(&i, &g->methods);
6075  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6076    freemethod(upb_value_getptr(upb_inttable_iter_value(&i)));
6077  }
6078
6079  upb_inttable_uninit(&g->methods);
6080  upb_gfree(g->bytecode);
6081  upb_gfree(g);
6082}
6083
6084mgroup *newgroup(void) {
6085  mgroup *g = upb_gmalloc(sizeof(*g));
6086  upb_inttable_init(&g->methods, UPB_CTYPE_PTR);
6087  g->bytecode = NULL;
6088  g->bytecode_end = NULL;
6089  return g;
6090}
6091
6092
6093/* bytecode compiler **********************************************************/
6094
6095/* Data used only at compilation time. */
6096typedef struct {
6097  mgroup *group;
6098
6099  uint32_t *pc;
6100  int fwd_labels[MAXLABEL];
6101  int back_labels[MAXLABEL];
6102
6103  /* For fields marked "lazy", parse them lazily or eagerly? */
6104  bool lazy;
6105} compiler;
6106
6107static compiler *newcompiler(mgroup *group, bool lazy) {
6108  compiler *ret = upb_gmalloc(sizeof(*ret));
6109  int i;
6110
6111  ret->group = group;
6112  ret->lazy = lazy;
6113  for (i = 0; i < MAXLABEL; i++) {
6114    ret->fwd_labels[i] = EMPTYLABEL;
6115    ret->back_labels[i] = EMPTYLABEL;
6116  }
6117  return ret;
6118}
6119
6120static void freecompiler(compiler *c) {
6121  upb_gfree(c);
6122}
6123
6124const size_t ptr_words = sizeof(void*) / sizeof(uint32_t);
6125
6126/* How many words an instruction is. */
6127static int instruction_len(uint32_t instr) {
6128  switch (getop(instr)) {
6129    case OP_SETDISPATCH: return 1 + ptr_words;
6130    case OP_TAGN: return 3;
6131    case OP_SETBIGGROUPNUM: return 2;
6132    default: return 1;
6133  }
6134}
6135
6136bool op_has_longofs(int32_t instruction) {
6137  switch (getop(instruction)) {
6138    case OP_CALL:
6139    case OP_BRANCH:
6140    case OP_CHECKDELIM:
6141      return true;
6142    /* The "tag" instructions only have 8 bytes available for the jump target,
6143     * but that is ok because these opcodes only require short jumps. */
6144    case OP_TAG1:
6145    case OP_TAG2:
6146    case OP_TAGN:
6147      return false;
6148    default:
6149      UPB_ASSERT(false);
6150      return false;
6151  }
6152}
6153
6154static int32_t getofs(uint32_t instruction) {
6155  if (op_has_longofs(instruction)) {
6156    return (int32_t)instruction >> 8;
6157  } else {
6158    return (int8_t)(instruction >> 8);
6159  }
6160}
6161
6162static void setofs(uint32_t *instruction, int32_t ofs) {
6163  if (op_has_longofs(*instruction)) {
6164    *instruction = getop(*instruction) | (uint32_t)ofs << 8;
6165  } else {
6166    *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8);
6167  }
6168  UPB_ASSERT(getofs(*instruction) == ofs);  /* Would fail in cases of overflow. */
6169}
6170
6171static uint32_t pcofs(compiler *c) {
6172  return (uint32_t)(c->pc - c->group->bytecode);
6173}
6174
6175/* Defines a local label at the current PC location.  All previous forward
6176 * references are updated to point to this location.  The location is noted
6177 * for any future backward references. */
6178static void label(compiler *c, unsigned int label) {
6179  int val;
6180  uint32_t *codep;
6181
6182  UPB_ASSERT(label < MAXLABEL);
6183  val = c->fwd_labels[label];
6184  codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val;
6185  while (codep) {
6186    int ofs = getofs(*codep);
6187    setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep)));
6188    codep = ofs ? codep + ofs : NULL;
6189  }
6190  c->fwd_labels[label] = EMPTYLABEL;
6191  c->back_labels[label] = pcofs(c);
6192}
6193
6194/* Creates a reference to a numbered label; either a forward reference
6195 * (positive arg) or backward reference (negative arg).  For forward references
6196 * the value returned now is actually a "next" pointer into a linked list of all
6197 * instructions that use this label and will be patched later when the label is
6198 * defined with label().
6199 *
6200 * The returned value is the offset that should be written into the instruction.
6201 */
6202static int32_t labelref(compiler *c, int label) {
6203  UPB_ASSERT(label < MAXLABEL);
6204  if (label == LABEL_DISPATCH) {
6205    /* No resolving required. */
6206    return 0;
6207  } else if (label < 0) {
6208    /* Backward local label.  Relative to the next instruction. */
6209    uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode);
6210    return c->back_labels[-label] - from;
6211  } else {
6212    /* Forward local label: prepend to (possibly-empty) linked list. */
6213    int *lptr = &c->fwd_labels[label];
6214    int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c);
6215    *lptr = pcofs(c);
6216    return ret;
6217  }
6218}
6219
6220static void put32(compiler *c, uint32_t v) {
6221  mgroup *g = c->group;
6222  if (c->pc == g->bytecode_end) {
6223    int ofs = pcofs(c);
6224    size_t oldsize = g->bytecode_end - g->bytecode;
6225    size_t newsize = UPB_MAX(oldsize * 2, 64);
6226    /* TODO(haberman): handle OOM. */
6227    g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t),
6228                                            newsize * sizeof(uint32_t));
6229    g->bytecode_end = g->bytecode + newsize;
6230    c->pc = g->bytecode + ofs;
6231  }
6232  *c->pc++ = v;
6233}
6234
6235static void putop(compiler *c, int op, ...) {
6236  va_list ap;
6237  va_start(ap, op);
6238
6239  switch (op) {
6240    case OP_SETDISPATCH: {
6241      uintptr_t ptr = (uintptr_t)va_arg(ap, void*);
6242      put32(c, OP_SETDISPATCH);
6243      put32(c, (uint32_t)ptr);
6244      if (sizeof(uintptr_t) > sizeof(uint32_t))
6245        put32(c, (uint64_t)ptr >> 32);
6246      break;
6247    }
6248    case OP_STARTMSG:
6249    case OP_ENDMSG:
6250    case OP_PUSHLENDELIM:
6251    case OP_POP:
6252    case OP_SETDELIM:
6253    case OP_HALT:
6254    case OP_RET:
6255    case OP_DISPATCH:
6256      put32(c, op);
6257      break;
6258    case OP_PARSE_DOUBLE:
6259    case OP_PARSE_FLOAT:
6260    case OP_PARSE_INT64:
6261    case OP_PARSE_UINT64:
6262    case OP_PARSE_INT32:
6263    case OP_PARSE_FIXED64:
6264    case OP_PARSE_FIXED32:
6265    case OP_PARSE_BOOL:
6266    case OP_PARSE_UINT32:
6267    case OP_PARSE_SFIXED32:
6268    case OP_PARSE_SFIXED64:
6269    case OP_PARSE_SINT32:
6270    case OP_PARSE_SINT64:
6271    case OP_STARTSEQ:
6272    case OP_ENDSEQ:
6273    case OP_STARTSUBMSG:
6274    case OP_ENDSUBMSG:
6275    case OP_STARTSTR:
6276    case OP_STRING:
6277    case OP_ENDSTR:
6278    case OP_PUSHTAGDELIM:
6279      put32(c, op | va_arg(ap, upb_selector_t) << 8);
6280      break;
6281    case OP_SETBIGGROUPNUM:
6282      put32(c, op);
6283      put32(c, va_arg(ap, int));
6284      break;
6285    case OP_CALL: {
6286      const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *);
6287      put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8);
6288      break;
6289    }
6290    case OP_CHECKDELIM:
6291    case OP_BRANCH: {
6292      uint32_t instruction = op;
6293      int label = va_arg(ap, int);
6294      setofs(&instruction, labelref(c, label));
6295      put32(c, instruction);
6296      break;
6297    }
6298    case OP_TAG1:
6299    case OP_TAG2: {
6300      int label = va_arg(ap, int);
6301      uint64_t tag = va_arg(ap, uint64_t);
6302      uint32_t instruction = (uint32_t)(op | (tag << 16));
6303      UPB_ASSERT(tag <= 0xffff);
6304      setofs(&instruction, labelref(c, label));
6305      put32(c, instruction);
6306      break;
6307    }
6308    case OP_TAGN: {
6309      int label = va_arg(ap, int);
6310      uint64_t tag = va_arg(ap, uint64_t);
6311      uint32_t instruction = op | (upb_value_size(tag) << 16);
6312      setofs(&instruction, labelref(c, label));
6313      put32(c, instruction);
6314      put32(c, (uint32_t)tag);
6315      put32(c, tag >> 32);
6316      break;
6317    }
6318  }
6319
6320  va_end(ap);
6321}
6322
6323#if defined(UPB_DUMP_BYTECODE)
6324
6325const char *upb_pbdecoder_getopname(unsigned int op) {
6326#define QUOTE(x) #x
6327#define EXPAND_AND_QUOTE(x) QUOTE(x)
6328#define OPNAME(x) OP_##x
6329#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x));
6330#define T(x) OP(PARSE_##x)
6331  /* Keep in sync with list in decoder.int.h. */
6332  switch ((opcode)op) {
6333    T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32)
6334    T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64)
6335    OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG)
6336    OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET)
6337    OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM)
6338    OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP)
6339    OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT)
6340  }
6341  return "<unknown op>";
6342#undef OP
6343#undef T
6344}
6345
6346#endif
6347
6348#ifdef UPB_DUMP_BYTECODE
6349
6350static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) {
6351
6352  uint32_t *begin = p;
6353
6354  while (p < end) {
6355    fprintf(f, "%p  %8tx", p, p - begin);
6356    uint32_t instr = *p++;
6357    uint8_t op = getop(instr);
6358    fprintf(f, " %s", upb_pbdecoder_getopname(op));
6359    switch ((opcode)op) {
6360      case OP_SETDISPATCH: {
6361        const upb_inttable *dispatch;
6362        memcpy(&dispatch, p, sizeof(void*));
6363        p += ptr_words;
6364        const upb_pbdecodermethod *method =
6365            (void *)((char *)dispatch -
6366                     offsetof(upb_pbdecodermethod, dispatch));
6367        fprintf(f, " %s", upb_msgdef_fullname(
6368                              upb_handlers_msgdef(method->dest_handlers_)));
6369        break;
6370      }
6371      case OP_DISPATCH:
6372      case OP_STARTMSG:
6373      case OP_ENDMSG:
6374      case OP_PUSHLENDELIM:
6375      case OP_POP:
6376      case OP_SETDELIM:
6377      case OP_HALT:
6378      case OP_RET:
6379        break;
6380      case OP_PARSE_DOUBLE:
6381      case OP_PARSE_FLOAT:
6382      case OP_PARSE_INT64:
6383      case OP_PARSE_UINT64:
6384      case OP_PARSE_INT32:
6385      case OP_PARSE_FIXED64:
6386      case OP_PARSE_FIXED32:
6387      case OP_PARSE_BOOL:
6388      case OP_PARSE_UINT32:
6389      case OP_PARSE_SFIXED32:
6390      case OP_PARSE_SFIXED64:
6391      case OP_PARSE_SINT32:
6392      case OP_PARSE_SINT64:
6393      case OP_STARTSEQ:
6394      case OP_ENDSEQ:
6395      case OP_STARTSUBMSG:
6396      case OP_ENDSUBMSG:
6397      case OP_STARTSTR:
6398      case OP_STRING:
6399      case OP_ENDSTR:
6400      case OP_PUSHTAGDELIM:
6401        fprintf(f, " %d", instr >> 8);
6402        break;
6403      case OP_SETBIGGROUPNUM:
6404        fprintf(f, " %d", *p++);
6405        break;
6406      case OP_CHECKDELIM:
6407      case OP_CALL:
6408      case OP_BRANCH:
6409        fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6410        break;
6411      case OP_TAG1:
6412      case OP_TAG2: {
6413        fprintf(f, " tag:0x%x", instr >> 16);
6414        if (getofs(instr)) {
6415          fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6416        }
6417        break;
6418      }
6419      case OP_TAGN: {
6420        uint64_t tag = *p++;
6421        tag |= (uint64_t)*p++ << 32;
6422        fprintf(f, " tag:0x%llx", (long long)tag);
6423        fprintf(f, " n:%d", instr >> 16);
6424        if (getofs(instr)) {
6425          fprintf(f, " =>0x%tx", p + getofs(instr) - begin);
6426        }
6427        break;
6428      }
6429    }
6430    fputs("\n", f);
6431  }
6432}
6433
6434#endif
6435
6436static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) {
6437  uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type;
6438  uint64_t encoded_tag = upb_vencode32(tag);
6439  /* No tag should be greater than 5 bytes. */
6440  UPB_ASSERT(encoded_tag <= 0xffffffffff);
6441  return encoded_tag;
6442}
6443
6444static void putchecktag(compiler *c, const upb_fielddef *f,
6445                        int wire_type, int dest) {
6446  uint64_t tag = get_encoded_tag(f, wire_type);
6447  switch (upb_value_size(tag)) {
6448    case 1:
6449      putop(c, OP_TAG1, dest, tag);
6450      break;
6451    case 2:
6452      putop(c, OP_TAG2, dest, tag);
6453      break;
6454    default:
6455      putop(c, OP_TAGN, dest, tag);
6456      break;
6457  }
6458}
6459
6460static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) {
6461  upb_selector_t selector;
6462  bool ok = upb_handlers_getselector(f, type, &selector);
6463  UPB_ASSERT(ok);
6464  return selector;
6465}
6466
6467/* Takes an existing, primary dispatch table entry and repacks it with a
6468 * different alternate wire type.  Called when we are inserting a secondary
6469 * dispatch table entry for an alternate wire type. */
6470static uint64_t repack(uint64_t dispatch, int new_wt2) {
6471  uint64_t ofs;
6472  uint8_t wt1;
6473  uint8_t old_wt2;
6474  upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2);
6475  UPB_ASSERT(old_wt2 == NO_WIRE_TYPE);  /* wt2 should not be set yet. */
6476  return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2);
6477}
6478
6479/* Marks the current bytecode position as the dispatch target for this message,
6480 * field, and wire type. */
6481static void dispatchtarget(compiler *c, upb_pbdecodermethod *method,
6482                           const upb_fielddef *f, int wire_type) {
6483  /* Offset is relative to msg base. */
6484  uint64_t ofs = pcofs(c) - method->code_base.ofs;
6485  uint32_t fn = upb_fielddef_number(f);
6486  upb_inttable *d = &method->dispatch;
6487  upb_value v;
6488  if (upb_inttable_remove(d, fn, &v)) {
6489    /* TODO: prioritize based on packed setting in .proto file. */
6490    uint64_t repacked = repack(upb_value_getuint64(v), wire_type);
6491    upb_inttable_insert(d, fn, upb_value_uint64(repacked));
6492    upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs));
6493  } else {
6494    uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE);
6495    upb_inttable_insert(d, fn, upb_value_uint64(val));
6496  }
6497}
6498
6499static void putpush(compiler *c, const upb_fielddef *f) {
6500  if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) {
6501    putop(c, OP_PUSHLENDELIM);
6502  } else {
6503    uint32_t fn = upb_fielddef_number(f);
6504    if (fn >= 1 << 24) {
6505      putop(c, OP_PUSHTAGDELIM, 0);
6506      putop(c, OP_SETBIGGROUPNUM, fn);
6507    } else {
6508      putop(c, OP_PUSHTAGDELIM, fn);
6509    }
6510  }
6511}
6512
6513static upb_pbdecodermethod *find_submethod(const compiler *c,
6514                                           const upb_pbdecodermethod *method,
6515                                           const upb_fielddef *f) {
6516  const upb_handlers *sub =
6517      upb_handlers_getsubhandlers(method->dest_handlers_, f);
6518  upb_value v;
6519  return upb_inttable_lookupptr(&c->group->methods, sub, &v)
6520             ? upb_value_getptr(v)
6521             : NULL;
6522}
6523
6524static void putsel(compiler *c, opcode op, upb_selector_t sel,
6525                   const upb_handlers *h) {
6526  if (upb_handlers_gethandler(h, sel, NULL)) {
6527    putop(c, op, sel);
6528  }
6529}
6530
6531/* Puts an opcode to call a callback, but only if a callback actually exists for
6532 * this field and handler type. */
6533static void maybeput(compiler *c, opcode op, const upb_handlers *h,
6534                     const upb_fielddef *f, upb_handlertype_t type) {
6535  putsel(c, op, getsel(f, type), h);
6536}
6537
6538static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) {
6539  if (!upb_fielddef_lazy(f))
6540    return false;
6541
6542  return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) ||
6543         upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) ||
6544         upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL);
6545}
6546
6547
6548/* bytecode compiler code generation ******************************************/
6549
6550/* Symbolic names for our local labels. */
6551#define LABEL_LOOPSTART 1  /* Top of a repeated field loop. */
6552#define LABEL_LOOPBREAK 2  /* To jump out of a repeated loop */
6553#define LABEL_FIELD     3  /* Jump backward to find the most recent field. */
6554#define LABEL_ENDMSG    4  /* To reach the OP_ENDMSG instr for this msg. */
6555
6556/* Generates bytecode to parse a single non-lazy message field. */
6557static void generate_msgfield(compiler *c, const upb_fielddef *f,
6558                              upb_pbdecodermethod *method) {
6559  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6560  const upb_pbdecodermethod *sub_m = find_submethod(c, method, f);
6561  int wire_type;
6562
6563  if (!sub_m) {
6564    /* Don't emit any code for this field at all; it will be parsed as an
6565     * unknown field.
6566     *
6567     * TODO(haberman): we should change this to parse it as a string field
6568     * instead.  It will probably be faster, but more importantly, once we
6569     * start vending unknown fields, a field shouldn't be treated as unknown
6570     * just because it doesn't have subhandlers registered. */
6571    return;
6572  }
6573
6574  label(c, LABEL_FIELD);
6575
6576  wire_type =
6577      (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE)
6578          ? UPB_WIRE_TYPE_DELIMITED
6579          : UPB_WIRE_TYPE_START_GROUP;
6580
6581  if (upb_fielddef_isseq(f)) {
6582    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6583    putchecktag(c, f, wire_type, LABEL_DISPATCH);
6584   dispatchtarget(c, method, f, wire_type);
6585    putop(c, OP_PUSHTAGDELIM, 0);
6586    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6587   label(c, LABEL_LOOPSTART);
6588    putpush(c, f);
6589    putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6590    putop(c, OP_CALL, sub_m);
6591    putop(c, OP_POP);
6592    maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6593    if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6594      putop(c, OP_SETDELIM);
6595    }
6596    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6597    putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6598    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6599   label(c, LABEL_LOOPBREAK);
6600    putop(c, OP_POP);
6601    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6602  } else {
6603    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6604    putchecktag(c, f, wire_type, LABEL_DISPATCH);
6605   dispatchtarget(c, method, f, wire_type);
6606    putpush(c, f);
6607    putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG));
6608    putop(c, OP_CALL, sub_m);
6609    putop(c, OP_POP);
6610    maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG);
6611    if (wire_type == UPB_WIRE_TYPE_DELIMITED) {
6612      putop(c, OP_SETDELIM);
6613    }
6614  }
6615}
6616
6617/* Generates bytecode to parse a single string or lazy submessage field. */
6618static void generate_delimfield(compiler *c, const upb_fielddef *f,
6619                                upb_pbdecodermethod *method) {
6620  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6621
6622  label(c, LABEL_FIELD);
6623  if (upb_fielddef_isseq(f)) {
6624    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6625    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6626   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6627    putop(c, OP_PUSHTAGDELIM, 0);
6628    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));
6629   label(c, LABEL_LOOPSTART);
6630    putop(c, OP_PUSHLENDELIM);
6631    putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6632    /* Need to emit even if no handler to skip past the string. */
6633    putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
6634    maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
6635    putop(c, OP_POP);
6636    putop(c, OP_SETDELIM);
6637    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6638    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK);
6639    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6640   label(c, LABEL_LOOPBREAK);
6641    putop(c, OP_POP);
6642    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6643  } else {
6644    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6645    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6646   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6647    putop(c, OP_PUSHLENDELIM);
6648    putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR));
6649    putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING));
6650    maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR);
6651    putop(c, OP_POP);
6652    putop(c, OP_SETDELIM);
6653  }
6654}
6655
6656/* Generates bytecode to parse a single primitive field. */
6657static void generate_primitivefield(compiler *c, const upb_fielddef *f,
6658                                    upb_pbdecodermethod *method) {
6659  const upb_handlers *h = upb_pbdecodermethod_desthandlers(method);
6660  upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f);
6661  opcode parse_type;
6662  upb_selector_t sel;
6663  int wire_type;
6664
6665  label(c, LABEL_FIELD);
6666
6667  /* From a decoding perspective, ENUM is the same as INT32. */
6668  if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM)
6669    descriptor_type = UPB_DESCRIPTOR_TYPE_INT32;
6670
6671  parse_type = (opcode)descriptor_type;
6672
6673  /* TODO(haberman): generate packed or non-packed first depending on "packed"
6674   * setting in the fielddef.  This will favor (in speed) whichever was
6675   * specified. */
6676
6677  UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX);
6678  sel = getsel(f, upb_handlers_getprimitivehandlertype(f));
6679  wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
6680  if (upb_fielddef_isseq(f)) {
6681    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6682    putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH);
6683   dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED);
6684    putop(c, OP_PUSHLENDELIM);
6685    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Packed */
6686   label(c, LABEL_LOOPSTART);
6687    putop(c, parse_type, sel);
6688    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6689    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6690   dispatchtarget(c, method, f, wire_type);
6691    putop(c, OP_PUSHTAGDELIM, 0);
6692    putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ));  /* Non-packed */
6693   label(c, LABEL_LOOPSTART);
6694    putop(c, parse_type, sel);
6695    putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK);
6696    putchecktag(c, f, wire_type, LABEL_LOOPBREAK);
6697    putop(c, OP_BRANCH, -LABEL_LOOPSTART);
6698   label(c, LABEL_LOOPBREAK);
6699    putop(c, OP_POP);  /* Packed and non-packed join. */
6700    maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ);
6701    putop(c, OP_SETDELIM);  /* Could remove for non-packed by dup ENDSEQ. */
6702  } else {
6703    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6704    putchecktag(c, f, wire_type, LABEL_DISPATCH);
6705   dispatchtarget(c, method, f, wire_type);
6706    putop(c, parse_type, sel);
6707  }
6708}
6709
6710/* Adds bytecode for parsing the given message to the given decoderplan,
6711 * while adding all dispatch targets to this message's dispatch table. */
6712static void compile_method(compiler *c, upb_pbdecodermethod *method) {
6713  const upb_handlers *h;
6714  const upb_msgdef *md;
6715  uint32_t* start_pc;
6716  upb_msg_field_iter i;
6717  upb_value val;
6718
6719  UPB_ASSERT(method);
6720
6721  /* Clear all entries in the dispatch table. */
6722  upb_inttable_uninit(&method->dispatch);
6723  upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64);
6724
6725  h = upb_pbdecodermethod_desthandlers(method);
6726  md = upb_handlers_msgdef(h);
6727
6728 method->code_base.ofs = pcofs(c);
6729  putop(c, OP_SETDISPATCH, &method->dispatch);
6730  putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h);
6731 label(c, LABEL_FIELD);
6732  start_pc = c->pc;
6733  for(upb_msg_field_begin(&i, md);
6734      !upb_msg_field_done(&i);
6735      upb_msg_field_next(&i)) {
6736    const upb_fielddef *f = upb_msg_iter_field(&i);
6737    upb_fieldtype_t type = upb_fielddef_type(f);
6738
6739    if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) {
6740      generate_msgfield(c, f, method);
6741    } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES ||
6742               type == UPB_TYPE_MESSAGE) {
6743      generate_delimfield(c, f, method);
6744    } else {
6745      generate_primitivefield(c, f, method);
6746    }
6747  }
6748
6749  /* If there were no fields, or if no handlers were defined, we need to
6750   * generate a non-empty loop body so that we can at least dispatch for unknown
6751   * fields and check for the end of the message. */
6752  if (c->pc == start_pc) {
6753    /* Check for end-of-message. */
6754    putop(c, OP_CHECKDELIM, LABEL_ENDMSG);
6755    /* Unconditionally dispatch. */
6756    putop(c, OP_DISPATCH, 0);
6757  }
6758
6759  /* For now we just loop back to the last field of the message (or if none,
6760   * the DISPATCH opcode for the message). */
6761  putop(c, OP_BRANCH, -LABEL_FIELD);
6762
6763  /* Insert both a label and a dispatch table entry for this end-of-msg. */
6764 label(c, LABEL_ENDMSG);
6765  val = upb_value_uint64(pcofs(c) - method->code_base.ofs);
6766  upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val);
6767
6768  putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h);
6769  putop(c, OP_RET);
6770
6771  upb_inttable_compact(&method->dispatch);
6772}
6773
6774/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h".
6775 * Returns the method for these handlers.
6776 *
6777 * Generates a new method for every destination handlers reachable from "h". */
6778static void find_methods(compiler *c, const upb_handlers *h) {
6779  upb_value v;
6780  upb_msg_field_iter i;
6781  const upb_msgdef *md;
6782  upb_pbdecodermethod *method;
6783
6784  if (upb_inttable_lookupptr(&c->group->methods, h, &v))
6785    return;
6786
6787  method = newmethod(h, c->group);
6788  upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method));
6789
6790  /* Find submethods. */
6791  md = upb_handlers_msgdef(h);
6792  for(upb_msg_field_begin(&i, md);
6793      !upb_msg_field_done(&i);
6794      upb_msg_field_next(&i)) {
6795    const upb_fielddef *f = upb_msg_iter_field(&i);
6796    const upb_handlers *sub_h;
6797    if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE &&
6798        (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) {
6799      /* We only generate a decoder method for submessages with handlers.
6800       * Others will be parsed as unknown fields. */
6801      find_methods(c, sub_h);
6802    }
6803  }
6804}
6805
6806/* (Re-)compile bytecode for all messages in "msgs."
6807 * Overwrites any existing bytecode in "c". */
6808static void compile_methods(compiler *c) {
6809  upb_inttable_iter i;
6810
6811  /* Start over at the beginning of the bytecode. */
6812  c->pc = c->group->bytecode;
6813
6814  upb_inttable_begin(&i, &c->group->methods);
6815  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6816    upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i));
6817    compile_method(c, method);
6818  }
6819}
6820
6821static void set_bytecode_handlers(mgroup *g) {
6822  upb_inttable_iter i;
6823  upb_inttable_begin(&i, &g->methods);
6824  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6825    upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i));
6826    upb_byteshandler *h = &m->input_handler_;
6827
6828    m->code_base.ptr = g->bytecode + m->code_base.ofs;
6829
6830    upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr);
6831    upb_byteshandler_setstring(h, upb_pbdecoder_decode, g);
6832    upb_byteshandler_setendstr(h, upb_pbdecoder_end, m);
6833  }
6834}
6835
6836
6837/* TODO(haberman): allow this to be constructed for an arbitrary set of dest
6838 * handlers and other mgroups (but verify we have a transitive closure). */
6839const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) {
6840  mgroup *g;
6841  compiler *c;
6842
6843  g = newgroup();
6844  c = newcompiler(g, lazy);
6845  find_methods(c, dest);
6846
6847  /* We compile in two passes:
6848   * 1. all messages are assigned relative offsets from the beginning of the
6849   *    bytecode (saved in method->code_base).
6850   * 2. forwards OP_CALL instructions can be correctly linked since message
6851   *    offsets have been previously assigned.
6852   *
6853   * Could avoid the second pass by linking OP_CALL instructions somehow. */
6854  compile_methods(c);
6855  compile_methods(c);
6856  g->bytecode_end = c->pc;
6857  freecompiler(c);
6858
6859#ifdef UPB_DUMP_BYTECODE
6860  {
6861    FILE *f = fopen("/tmp/upb-bytecode", "w");
6862    UPB_ASSERT(f);
6863    dumpbc(g->bytecode, g->bytecode_end, stderr);
6864    dumpbc(g->bytecode, g->bytecode_end, f);
6865    fclose(f);
6866
6867    f = fopen("/tmp/upb-bytecode.bin", "wb");
6868    UPB_ASSERT(f);
6869    fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f);
6870    fclose(f);
6871  }
6872#endif
6873
6874  set_bytecode_handlers(g);
6875  return g;
6876}
6877
6878
6879/* upb_pbcodecache ************************************************************/
6880
6881upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) {
6882  upb_pbcodecache *c = upb_gmalloc(sizeof(*c));
6883
6884  if (!c) return NULL;
6885
6886  c->dest = dest;
6887  c->lazy = false;
6888
6889  c->arena = upb_arena_new();
6890  if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL;
6891
6892  return c;
6893}
6894
6895void upb_pbcodecache_free(upb_pbcodecache *c) {
6896  upb_inttable_iter i;
6897
6898  upb_inttable_begin(&i, &c->groups);
6899  for(; !upb_inttable_done(&i); upb_inttable_next(&i)) {
6900    upb_value val = upb_inttable_iter_value(&i);
6901    freegroup((void*)upb_value_getconstptr(val));
6902  }
6903
6904  upb_inttable_uninit(&c->groups);
6905  upb_arena_free(c->arena);
6906  upb_gfree(c);
6907}
6908
6909void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) {
6910  UPB_ASSERT(upb_inttable_count(&c->groups) == 0);
6911  c->lazy = lazy;
6912}
6913
6914const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c,
6915                                               const upb_msgdef *md) {
6916  upb_value v;
6917  bool ok;
6918  const upb_handlers *h;
6919  const mgroup *g;
6920
6921  h = upb_handlercache_get(c->dest, md);
6922  if (upb_inttable_lookupptr(&c->groups, md, &v)) {
6923    g = upb_value_getconstptr(v);
6924  } else {
6925    g = mgroup_new(h, c->lazy);
6926    ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g));
6927    UPB_ASSUME(ok);
6928  }
6929
6930  ok = upb_inttable_lookupptr(&g->methods, h, &v);
6931  UPB_ASSUME(ok);
6932  return upb_value_getptr(v);
6933}
6934/*
6935** upb::Decoder (Bytecode Decoder VM)
6936**
6937** Bytecode must previously have been generated using the bytecode compiler in
6938** compile_decoder.c.  This decoder then walks through the bytecode op-by-op to
6939** parse the input.
6940**
6941** Decoding is fully resumable; we just keep a pointer to the current bytecode
6942** instruction and resume from there.  A fair amount of the logic here is to
6943** handle the fact that values can span buffer seams and we have to be able to
6944** be capable of suspending/resuming from any byte in the stream.  This
6945** sometimes requires keeping a few trailing bytes from the last buffer around
6946** in the "residual" buffer.
6947*/
6948
6949#include <inttypes.h>
6950#include <stddef.h>
6951
6952#ifdef UPB_DUMP_BYTECODE
6953#include <stdio.h>
6954#endif
6955
6956
6957#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d);
6958
6959/* Error messages that are shared between the bytecode and JIT decoders. */
6960const char *kPbDecoderStackOverflow = "Nesting too deep.";
6961const char *kPbDecoderSubmessageTooLong =
6962    "Submessage end extends past enclosing submessage.";
6963
6964/* Error messages shared within this file. */
6965static const char *kUnterminatedVarint = "Unterminated varint.";
6966
6967/* upb_pbdecoder **************************************************************/
6968
6969static opcode halt = OP_HALT;
6970
6971/* A dummy character we can point to when the user passes us a NULL buffer.
6972 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined
6973 * behavior, which would invalidate functions like curbufleft(). */
6974static const char dummy_char;
6975
6976/* Whether an op consumes any of the input buffer. */
6977static bool consumes_input(opcode op) {
6978  switch (op) {
6979    case OP_SETDISPATCH:
6980    case OP_STARTMSG:
6981    case OP_ENDMSG:
6982    case OP_STARTSEQ:
6983    case OP_ENDSEQ:
6984    case OP_STARTSUBMSG:
6985    case OP_ENDSUBMSG:
6986    case OP_STARTSTR:
6987    case OP_ENDSTR:
6988    case OP_PUSHTAGDELIM:
6989    case OP_POP:
6990    case OP_SETDELIM:
6991    case OP_SETBIGGROUPNUM:
6992    case OP_CHECKDELIM:
6993    case OP_CALL:
6994    case OP_RET:
6995    case OP_BRANCH:
6996      return false;
6997    default:
6998      return true;
6999  }
7000}
7001
7002static size_t stacksize(upb_pbdecoder *d, size_t entries) {
7003  UPB_UNUSED(d);
7004  return entries * sizeof(upb_pbdecoder_frame);
7005}
7006
7007static size_t callstacksize(upb_pbdecoder *d, size_t entries) {
7008  UPB_UNUSED(d);
7009
7010  return entries * sizeof(uint32_t*);
7011}
7012
7013
7014static bool in_residual_buf(const upb_pbdecoder *d, const char *p);
7015
7016/* It's unfortunate that we have to micro-manage the compiler with
7017 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily
7018 * specific to one hardware configuration.  But empirically on a Core i7,
7019 * performance increases 30-50% with these annotations.  Every instance where
7020 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in
7021 * benchmarks. */
7022
7023static void seterr(upb_pbdecoder *d, const char *msg) {
7024  upb_status_seterrmsg(d->status, msg);
7025}
7026
7027void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) {
7028  seterr(d, msg);
7029}
7030
7031
7032/* Buffering ******************************************************************/
7033
7034/* We operate on one buffer at a time, which is either the user's buffer passed
7035 * to our "decode" callback or some residual bytes from the previous buffer. */
7036
7037/* How many bytes can be safely read from d->ptr without reading past end-of-buf
7038 * or past the current delimited end. */
7039static size_t curbufleft(const upb_pbdecoder *d) {
7040  UPB_ASSERT(d->data_end >= d->ptr);
7041  return d->data_end - d->ptr;
7042}
7043
7044/* How many bytes are available before end-of-buffer. */
7045static size_t bufleft(const upb_pbdecoder *d) {
7046  return d->end - d->ptr;
7047}
7048
7049/* Overall stream offset of d->ptr. */
7050uint64_t offset(const upb_pbdecoder *d) {
7051  return d->bufstart_ofs + (d->ptr - d->buf);
7052}
7053
7054/* How many bytes are available before the end of this delimited region. */
7055size_t delim_remaining(const upb_pbdecoder *d) {
7056  return d->top->end_ofs - offset(d);
7057}
7058
7059/* Advances d->ptr. */
7060static void advance(upb_pbdecoder *d, size_t len) {
7061  UPB_ASSERT(curbufleft(d) >= len);
7062  d->ptr += len;
7063}
7064
7065static bool in_buf(const char *p, const char *buf, const char *end) {
7066  return p >= buf && p <= end;
7067}
7068
7069static bool in_residual_buf(const upb_pbdecoder *d, const char *p) {
7070  return in_buf(p, d->residual, d->residual_end);
7071}
7072
7073/* Calculates the delim_end value, which is affected by both the current buffer
7074 * and the parsing stack, so must be called whenever either is updated. */
7075static void set_delim_end(upb_pbdecoder *d) {
7076  size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs;
7077  if (delim_ofs <= (size_t)(d->end - d->buf)) {
7078    d->delim_end = d->buf + delim_ofs;
7079    d->data_end = d->delim_end;
7080  } else {
7081    d->data_end = d->end;
7082    d->delim_end = NULL;
7083  }
7084}
7085
7086static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) {
7087  d->ptr = buf;
7088  d->buf = buf;
7089  d->end = end;
7090  set_delim_end(d);
7091}
7092
7093static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) {
7094  UPB_ASSERT(curbufleft(d) == 0);
7095  d->bufstart_ofs += (d->end - d->buf);
7096  switchtobuf(d, buf, buf + len);
7097}
7098
7099static void checkpoint(upb_pbdecoder *d) {
7100  /* The assertion here is in the interests of efficiency, not correctness.
7101   * We are trying to ensure that we don't checkpoint() more often than
7102   * necessary. */
7103  UPB_ASSERT(d->checkpoint != d->ptr);
7104  d->checkpoint = d->ptr;
7105}
7106
7107/* Skips "bytes" bytes in the stream, which may be more than available.  If we
7108 * skip more bytes than are available, we return a long read count to the caller
7109 * indicating how many bytes can be skipped over before passing actual data
7110 * again.  Skipped bytes can pass a NULL buffer and the decoder guarantees they
7111 * won't actually be read.
7112 */
7113static int32_t skip(upb_pbdecoder *d, size_t bytes) {
7114  UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0);
7115  UPB_ASSERT(d->skip == 0);
7116  if (bytes > delim_remaining(d)) {
7117    seterr(d, "Skipped value extended beyond enclosing submessage.");
7118    return (int32_t)upb_pbdecoder_suspend(d);
7119  } else if (bufleft(d) >= bytes) {
7120    /* Skipped data is all in current buffer, and more is still available. */
7121    advance(d, bytes);
7122    d->skip = 0;
7123    return DECODE_OK;
7124  } else {
7125    /* Skipped data extends beyond currently available buffers. */
7126    d->pc = d->last;
7127    d->skip = bytes - curbufleft(d);
7128    d->bufstart_ofs += (d->end - d->buf);
7129    d->residual_end = d->residual;
7130    switchtobuf(d, d->residual, d->residual_end);
7131    return (int32_t)(d->size_param + d->skip);
7132  }
7133}
7134
7135
7136/* Resumes the decoder from an initial state or from a previous suspend. */
7137int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf,
7138                             size_t size, const upb_bufhandle *handle) {
7139  UPB_UNUSED(p);  /* Useless; just for the benefit of the JIT. */
7140
7141  /* d->skip and d->residual_end could probably elegantly be represented
7142   * as a single variable, to more easily represent this invariant. */
7143  UPB_ASSERT(!(d->skip && d->residual_end > d->residual));
7144
7145  /* We need to remember the original size_param, so that the value we return
7146   * is relative to it, even if we do some skipping first. */
7147  d->size_param = size;
7148  d->handle = handle;
7149
7150  /* Have to handle this case specially (ie. not with skip()) because the user
7151   * is allowed to pass a NULL buffer here, which won't allow us to safely
7152   * calculate a d->end or use our normal functions like curbufleft(). */
7153  if (d->skip && d->skip >= size) {
7154    d->skip -= size;
7155    d->bufstart_ofs += size;
7156    buf = &dummy_char;
7157    size = 0;
7158
7159    /* We can't just return now, because we might need to execute some ops
7160     * like CHECKDELIM, which could call some callbacks and pop the stack. */
7161  }
7162
7163  /* We need to pretend that this was the actual buffer param, since some of the
7164   * calculations assume that d->ptr/d->buf is relative to this. */
7165  d->buf_param = buf;
7166
7167  if (!buf) {
7168    /* NULL buf is ok if its entire span is covered by the "skip" above, but
7169     * by this point we know that "skip" doesn't cover the buffer. */
7170    seterr(d, "Passed NULL buffer over non-skippable region.");
7171    return (int32_t)upb_pbdecoder_suspend(d);
7172  }
7173
7174  if (d->residual_end > d->residual) {
7175    /* We have residual bytes from the last buffer. */
7176    UPB_ASSERT(d->ptr == d->residual);
7177  } else {
7178    switchtobuf(d, buf, buf + size);
7179  }
7180
7181  d->checkpoint = d->ptr;
7182
7183  /* Handle skips that don't cover the whole buffer (as above). */
7184  if (d->skip) {
7185    size_t skip_bytes = d->skip;
7186    d->skip = 0;
7187    CHECK_RETURN(skip(d, skip_bytes));
7188    checkpoint(d);
7189  }
7190
7191  /* If we're inside an unknown group, continue to parse unknown values. */
7192  if (d->top->groupnum < 0) {
7193    CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0));
7194    checkpoint(d);
7195  }
7196
7197  return DECODE_OK;
7198}
7199
7200/* Suspends the decoder at the last checkpoint, without saving any residual
7201 * bytes.  If there are any unconsumed bytes, returns a short byte count. */
7202size_t upb_pbdecoder_suspend(upb_pbdecoder *d) {
7203  d->pc = d->last;
7204  if (d->checkpoint == d->residual) {
7205    /* Checkpoint was in residual buf; no user bytes were consumed. */
7206    d->ptr = d->residual;
7207    return 0;
7208  } else {
7209    size_t ret = d->size_param - (d->end - d->checkpoint);
7210    UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7211    UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char);
7212
7213    d->bufstart_ofs += (d->checkpoint - d->buf);
7214    d->residual_end = d->residual;
7215    switchtobuf(d, d->residual, d->residual_end);
7216    return ret;
7217  }
7218}
7219
7220/* Suspends the decoder at the last checkpoint, and saves any unconsumed
7221 * bytes in our residual buffer.  This is necessary if we need more user
7222 * bytes to form a complete value, which might not be contiguous in the
7223 * user's buffers.  Always consumes all user bytes. */
7224static size_t suspend_save(upb_pbdecoder *d) {
7225  /* We hit end-of-buffer before we could parse a full value.
7226   * Save any unconsumed bytes (if any) to the residual buffer. */
7227  d->pc = d->last;
7228
7229  if (d->checkpoint == d->residual) {
7230    /* Checkpoint was in residual buf; append user byte(s) to residual buf. */
7231    UPB_ASSERT((d->residual_end - d->residual) + d->size_param <=
7232           sizeof(d->residual));
7233    if (!in_residual_buf(d, d->ptr)) {
7234      d->bufstart_ofs -= (d->residual_end - d->residual);
7235    }
7236    memcpy(d->residual_end, d->buf_param, d->size_param);
7237    d->residual_end += d->size_param;
7238  } else {
7239    /* Checkpoint was in user buf; old residual bytes not needed. */
7240    size_t save;
7241    UPB_ASSERT(!in_residual_buf(d, d->checkpoint));
7242
7243    d->ptr = d->checkpoint;
7244    save = curbufleft(d);
7245    UPB_ASSERT(save <= sizeof(d->residual));
7246    memcpy(d->residual, d->ptr, save);
7247    d->residual_end = d->residual + save;
7248    d->bufstart_ofs = offset(d);
7249  }
7250
7251  switchtobuf(d, d->residual, d->residual_end);
7252  return d->size_param;
7253}
7254
7255/* Copies the next "bytes" bytes into "buf" and advances the stream.
7256 * Requires that this many bytes are available in the current buffer. */
7257UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf,
7258                                         size_t bytes) {
7259  UPB_ASSERT(bytes <= curbufleft(d));
7260  memcpy(buf, d->ptr, bytes);
7261  advance(d, bytes);
7262}
7263
7264/* Slow path for getting the next "bytes" bytes, regardless of whether they are
7265 * available in the current buffer or not.  Returns a status code as described
7266 * in decoder.int.h. */
7267UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf,
7268                                          size_t bytes) {
7269  const size_t avail = curbufleft(d);
7270  consumebytes(d, buf, avail);
7271  bytes -= avail;
7272  UPB_ASSERT(bytes > 0);
7273  if (in_residual_buf(d, d->ptr)) {
7274    advancetobuf(d, d->buf_param, d->size_param);
7275  }
7276  if (curbufleft(d) >= bytes) {
7277    consumebytes(d, (char *)buf + avail, bytes);
7278    return DECODE_OK;
7279  } else if (d->data_end == d->delim_end) {
7280    seterr(d, "Submessage ended in the middle of a value or group");
7281    return (int32_t)upb_pbdecoder_suspend(d);
7282  } else {
7283    return (int32_t)suspend_save(d);
7284  }
7285}
7286
7287/* Gets the next "bytes" bytes, regardless of whether they are available in the
7288 * current buffer or not.  Returns a status code as described in decoder.int.h.
7289 */
7290UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf,
7291                                        size_t bytes) {
7292  if (curbufleft(d) >= bytes) {
7293    /* Buffer has enough data to satisfy. */
7294    consumebytes(d, buf, bytes);
7295    return DECODE_OK;
7296  } else {
7297    return getbytes_slow(d, buf, bytes);
7298  }
7299}
7300
7301UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf,
7302                                          size_t bytes) {
7303  size_t ret = curbufleft(d);
7304  memcpy(buf, d->ptr, ret);
7305  if (in_residual_buf(d, d->ptr)) {
7306    size_t copy = UPB_MIN(bytes - ret, d->size_param);
7307    memcpy((char *)buf + ret, d->buf_param, copy);
7308    ret += copy;
7309  }
7310  return ret;
7311}
7312
7313UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf,
7314                                        size_t bytes) {
7315  if (curbufleft(d) >= bytes) {
7316    memcpy(buf, d->ptr, bytes);
7317    return bytes;
7318  } else {
7319    return peekbytes_slow(d, buf, bytes);
7320  }
7321}
7322
7323
7324/* Decoding of wire types *****************************************************/
7325
7326/* Slow path for decoding a varint from the current buffer position.
7327 * Returns a status code as described in decoder.int.h. */
7328UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d,
7329                                                      uint64_t *u64) {
7330  uint8_t byte = 0x80;
7331  int bitpos;
7332  *u64 = 0;
7333  for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) {
7334    CHECK_RETURN(getbytes(d, &byte, 1));
7335    *u64 |= (uint64_t)(byte & 0x7F) << bitpos;
7336  }
7337  if(bitpos == 70 && (byte & 0x80)) {
7338    seterr(d, kUnterminatedVarint);
7339    return (int32_t)upb_pbdecoder_suspend(d);
7340  }
7341  return DECODE_OK;
7342}
7343
7344/* Decodes a varint from the current buffer position.
7345 * Returns a status code as described in decoder.int.h. */
7346UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) {
7347  if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) {
7348    *u64 = *d->ptr;
7349    advance(d, 1);
7350    return DECODE_OK;
7351  } else if (curbufleft(d) >= 10) {
7352    /* Fast case. */
7353    upb_decoderet r = upb_vdecode_fast(d->ptr);
7354    if (r.p == NULL) {
7355      seterr(d, kUnterminatedVarint);
7356      return (int32_t)upb_pbdecoder_suspend(d);
7357    }
7358    advance(d, r.p - d->ptr);
7359    *u64 = r.val;
7360    return DECODE_OK;
7361  } else {
7362    /* Slow case -- varint spans buffer seam. */
7363    return upb_pbdecoder_decode_varint_slow(d, u64);
7364  }
7365}
7366
7367/* Decodes a 32-bit varint from the current buffer position.
7368 * Returns a status code as described in decoder.int.h. */
7369UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) {
7370  uint64_t u64;
7371  int32_t ret = decode_varint(d, &u64);
7372  if (ret >= 0) return ret;
7373  if (u64 > UINT32_MAX) {
7374    seterr(d, "Unterminated 32-bit varint");
7375    /* TODO(haberman) guarantee that this function return is >= 0 somehow,
7376     * so we know this path will always be treated as error by our caller.
7377     * Right now the size_t -> int32_t can overflow and produce negative values.
7378     */
7379    *u32 = 0;
7380    return (int32_t)upb_pbdecoder_suspend(d);
7381  }
7382  *u32 = (uint32_t)u64;
7383  return DECODE_OK;
7384}
7385
7386/* Decodes a fixed32 from the current buffer position.
7387 * Returns a status code as described in decoder.int.h.
7388 * TODO: proper byte swapping for big-endian machines. */
7389UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) {
7390  return getbytes(d, u32, 4);
7391}
7392
7393/* Decodes a fixed64 from the current buffer position.
7394 * Returns a status code as described in decoder.int.h.
7395 * TODO: proper byte swapping for big-endian machines. */
7396UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) {
7397  return getbytes(d, u64, 8);
7398}
7399
7400/* Non-static versions of the above functions.
7401 * These are called by the JIT for fallback paths. */
7402int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) {
7403  return decode_fixed32(d, u32);
7404}
7405
7406int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) {
7407  return decode_fixed64(d, u64);
7408}
7409
7410static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; }
7411static float  as_float(uint32_t n)  { float  f; memcpy(&f, &n, 4); return f; }
7412
7413/* Pushes a frame onto the decoder stack. */
7414static bool decoder_push(upb_pbdecoder *d, uint64_t end) {
7415  upb_pbdecoder_frame *fr = d->top;
7416
7417  if (end > fr->end_ofs) {
7418    seterr(d, kPbDecoderSubmessageTooLong);
7419    return false;
7420  } else if (fr == d->limit) {
7421    seterr(d, kPbDecoderStackOverflow);
7422    return false;
7423  }
7424
7425  fr++;
7426  fr->end_ofs = end;
7427  fr->dispatch = NULL;
7428  fr->groupnum = 0;
7429  d->top = fr;
7430  return true;
7431}
7432
7433static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) {
7434  /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence
7435   * field number) prior to hitting any enclosing submessage end, pushing our
7436   * existing delim end prevents us from continuing to parse values from a
7437   * corrupt proto that doesn't give us an END tag in time. */
7438  if (!decoder_push(d, d->top->end_ofs))
7439    return false;
7440  d->top->groupnum = arg;
7441  return true;
7442}
7443
7444/* Pops a frame from the decoder stack. */
7445static void decoder_pop(upb_pbdecoder *d) { d->top--; }
7446
7447UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d,
7448                                                 uint64_t expected) {
7449  uint64_t data = 0;
7450  size_t bytes = upb_value_size(expected);
7451  size_t read = peekbytes(d, &data, bytes);
7452  if (read == bytes && data == expected) {
7453    /* Advance past matched bytes. */
7454    int32_t ok = getbytes(d, &data, read);
7455    UPB_ASSERT(ok < 0);
7456    return DECODE_OK;
7457  } else if (read < bytes && memcmp(&data, &expected, read) == 0) {
7458    return (int32_t)suspend_save(d);
7459  } else {
7460    return DECODE_MISMATCH;
7461  }
7462}
7463
7464int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum,
7465                                  uint8_t wire_type) {
7466  if (fieldnum >= 0)
7467    goto have_tag;
7468
7469  while (true) {
7470    uint32_t tag;
7471    CHECK_RETURN(decode_v32(d, &tag));
7472    wire_type = tag & 0x7;
7473    fieldnum = tag >> 3;
7474
7475have_tag:
7476    if (fieldnum == 0) {
7477      seterr(d, "Saw invalid field number (0)");
7478      return (int32_t)upb_pbdecoder_suspend(d);
7479    }
7480
7481    switch (wire_type) {
7482      case UPB_WIRE_TYPE_32BIT:
7483        CHECK_RETURN(skip(d, 4));
7484        break;
7485      case UPB_WIRE_TYPE_64BIT:
7486        CHECK_RETURN(skip(d, 8));
7487        break;
7488      case UPB_WIRE_TYPE_VARINT: {
7489        uint64_t u64;
7490        CHECK_RETURN(decode_varint(d, &u64));
7491        break;
7492      }
7493      case UPB_WIRE_TYPE_DELIMITED: {
7494        uint32_t len;
7495        CHECK_RETURN(decode_v32(d, &len));
7496        CHECK_RETURN(skip(d, len));
7497        break;
7498      }
7499      case UPB_WIRE_TYPE_START_GROUP:
7500        if (!pushtagdelim(d, -fieldnum)) {
7501          return (int32_t)upb_pbdecoder_suspend(d);
7502        }
7503        break;
7504      case UPB_WIRE_TYPE_END_GROUP:
7505        if (fieldnum == -d->top->groupnum) {
7506          decoder_pop(d);
7507        } else if (fieldnum == d->top->groupnum) {
7508          return DECODE_ENDGROUP;
7509        } else {
7510          seterr(d, "Unmatched ENDGROUP tag.");
7511          return (int32_t)upb_pbdecoder_suspend(d);
7512        }
7513        break;
7514      default:
7515        seterr(d, "Invalid wire type");
7516        return (int32_t)upb_pbdecoder_suspend(d);
7517    }
7518
7519    if (d->top->groupnum >= 0) {
7520      /* TODO: More code needed for handling unknown groups. */
7521      upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint);
7522      return DECODE_OK;
7523    }
7524
7525    /* Unknown group -- continue looping over unknown fields. */
7526    checkpoint(d);
7527  }
7528}
7529
7530static void goto_endmsg(upb_pbdecoder *d) {
7531  upb_value v;
7532  bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v);
7533  UPB_ASSERT(found);
7534  d->pc = d->top->base + upb_value_getuint64(v);
7535}
7536
7537/* Parses a tag and jumps to the corresponding bytecode instruction for this
7538 * field.
7539 *
7540 * If the tag is unknown (or the wire type doesn't match), parses the field as
7541 * unknown.  If the tag is a valid ENDGROUP tag, jumps to the bytecode
7542 * instruction for the end of message. */
7543static int32_t dispatch(upb_pbdecoder *d) {
7544  upb_inttable *dispatch = d->top->dispatch;
7545  uint32_t tag;
7546  uint8_t wire_type;
7547  uint32_t fieldnum;
7548  upb_value val;
7549  int32_t retval;
7550
7551  /* Decode tag. */
7552  CHECK_RETURN(decode_v32(d, &tag));
7553  wire_type = tag & 0x7;
7554  fieldnum = tag >> 3;
7555
7556  /* Lookup tag.  Because of packed/non-packed compatibility, we have to
7557   * check the wire type against two possibilities. */
7558  if (fieldnum != DISPATCH_ENDMSG &&
7559      upb_inttable_lookup32(dispatch, fieldnum, &val)) {
7560    uint64_t v = upb_value_getuint64(val);
7561    if (wire_type == (v & 0xff)) {
7562      d->pc = d->top->base + (v >> 16);
7563      return DECODE_OK;
7564    } else if (wire_type == ((v >> 8) & 0xff)) {
7565      bool found =
7566          upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val);
7567      UPB_ASSERT(found);
7568      d->pc = d->top->base + upb_value_getuint64(val);
7569      return DECODE_OK;
7570    }
7571  }
7572
7573  /* We have some unknown fields (or ENDGROUP) to parse.  The DISPATCH or TAG
7574   * bytecode that triggered this is preceded by a CHECKDELIM bytecode which
7575   * we need to back up to, so that when we're done skipping unknown data we
7576   * can re-check the delimited end. */
7577  d->last--;  /* Necessary if we get suspended */
7578  d->pc = d->last;
7579  UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM);
7580
7581  /* Unknown field or ENDGROUP. */
7582  retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type);
7583
7584  CHECK_RETURN(retval);
7585
7586  if (retval == DECODE_ENDGROUP) {
7587    goto_endmsg(d);
7588    return DECODE_OK;
7589  }
7590
7591  return DECODE_OK;
7592}
7593
7594/* Callers know that the stack is more than one deep because the opcodes that
7595 * call this only occur after PUSH operations. */
7596upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) {
7597  UPB_ASSERT(d->top != d->stack);
7598  return d->top - 1;
7599}
7600
7601
7602/* The main decoding loop *****************************************************/
7603
7604/* The main decoder VM function.  Uses traditional bytecode dispatch loop with a
7605 * switch() statement. */
7606size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group,
7607                      const upb_bufhandle* handle) {
7608
7609#define VMCASE(op, code) \
7610  case op: { code; if (consumes_input(op)) checkpoint(d); break; }
7611#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \
7612  VMCASE(OP_PARSE_ ## type, { \
7613    ctype val; \
7614    CHECK_RETURN(decode_ ## wt(d, &val)); \
7615    upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \
7616  })
7617
7618  while(1) {
7619    int32_t instruction;
7620    opcode op;
7621    uint32_t arg;
7622    int32_t longofs;
7623
7624    d->last = d->pc;
7625    instruction = *d->pc++;
7626    op = getop(instruction);
7627    arg = instruction >> 8;
7628    longofs = arg;
7629    UPB_ASSERT(d->ptr != d->residual_end);
7630    UPB_UNUSED(group);
7631#ifdef UPB_DUMP_BYTECODE
7632    fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d "
7633                    "%x %s (%d)\n",
7634            (int)offset(d),
7635            (int)(d->ptr - d->buf),
7636            (int)(d->data_end - d->ptr),
7637            (int)(d->end - d->ptr),
7638            (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)),
7639            (int)(d->pc - 1 - group->bytecode),
7640            upb_pbdecoder_getopname(op),
7641            arg);
7642#endif
7643    switch (op) {
7644      /* Technically, we are losing data if we see a 32-bit varint that is not
7645       * properly sign-extended.  We could detect this and error about the data
7646       * loss, but proto2 does not do this, so we pass. */
7647      PRIMITIVE_OP(INT32,    varint,  int32,  int32_t,      uint64_t)
7648      PRIMITIVE_OP(INT64,    varint,  int64,  int64_t,      uint64_t)
7649      PRIMITIVE_OP(UINT32,   varint,  uint32, uint32_t,     uint64_t)
7650      PRIMITIVE_OP(UINT64,   varint,  uint64, uint64_t,     uint64_t)
7651      PRIMITIVE_OP(FIXED32,  fixed32, uint32, uint32_t,     uint32_t)
7652      PRIMITIVE_OP(FIXED64,  fixed64, uint64, uint64_t,     uint64_t)
7653      PRIMITIVE_OP(SFIXED32, fixed32, int32,  int32_t,      uint32_t)
7654      PRIMITIVE_OP(SFIXED64, fixed64, int64,  int64_t,      uint64_t)
7655      PRIMITIVE_OP(BOOL,     varint,  bool,   bool,         uint64_t)
7656      PRIMITIVE_OP(DOUBLE,   fixed64, double, as_double,    uint64_t)
7657      PRIMITIVE_OP(FLOAT,    fixed32, float,  as_float,     uint32_t)
7658      PRIMITIVE_OP(SINT32,   varint,  int32,  upb_zzdec_32, uint64_t)
7659      PRIMITIVE_OP(SINT64,   varint,  int64,  upb_zzdec_64, uint64_t)
7660
7661      VMCASE(OP_SETDISPATCH,
7662        d->top->base = d->pc - 1;
7663        memcpy(&d->top->dispatch, d->pc, sizeof(void*));
7664        d->pc += sizeof(void*) / sizeof(uint32_t);
7665      )
7666      VMCASE(OP_STARTMSG,
7667        CHECK_SUSPEND(upb_sink_startmsg(d->top->sink));
7668      )
7669      VMCASE(OP_ENDMSG,
7670        CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status));
7671      )
7672      VMCASE(OP_STARTSEQ,
7673        upb_pbdecoder_frame *outer = outer_frame(d);
7674        CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink));
7675      )
7676      VMCASE(OP_ENDSEQ,
7677        CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg));
7678      )
7679      VMCASE(OP_STARTSUBMSG,
7680        upb_pbdecoder_frame *outer = outer_frame(d);
7681        CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink));
7682      )
7683      VMCASE(OP_ENDSUBMSG,
7684        upb_sink subsink = (d->top + 1)->sink;
7685        CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, subsink, arg));
7686      )
7687      VMCASE(OP_STARTSTR,
7688        uint32_t len = (uint32_t)delim_remaining(d);
7689        upb_pbdecoder_frame *outer = outer_frame(d);
7690        CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink));
7691        if (len == 0) {
7692          d->pc++;  /* Skip OP_STRING. */
7693        }
7694      )
7695      VMCASE(OP_STRING,
7696        uint32_t len = (uint32_t)curbufleft(d);
7697        size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle);
7698        if (n > len) {
7699          if (n > delim_remaining(d)) {
7700            seterr(d, "Tried to skip past end of string.");
7701            return upb_pbdecoder_suspend(d);
7702          } else {
7703            int32_t ret = skip(d, n);
7704            /* This shouldn't return DECODE_OK, because n > len. */
7705            UPB_ASSERT(ret >= 0);
7706            return ret;
7707          }
7708        }
7709        advance(d, n);
7710        if (n < len || d->delim_end == NULL) {
7711          /* We aren't finished with this string yet. */
7712          d->pc--;  /* Repeat OP_STRING. */
7713          if (n > 0) checkpoint(d);
7714          return upb_pbdecoder_suspend(d);
7715        }
7716      )
7717      VMCASE(OP_ENDSTR,
7718        CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg));
7719      )
7720      VMCASE(OP_PUSHTAGDELIM,
7721        CHECK_SUSPEND(pushtagdelim(d, arg));
7722      )
7723      VMCASE(OP_SETBIGGROUPNUM,
7724        d->top->groupnum = *d->pc++;
7725      )
7726      VMCASE(OP_POP,
7727        UPB_ASSERT(d->top > d->stack);
7728        decoder_pop(d);
7729      )
7730      VMCASE(OP_PUSHLENDELIM,
7731        uint32_t len;
7732        CHECK_RETURN(decode_v32(d, &len));
7733        CHECK_SUSPEND(decoder_push(d, offset(d) + len));
7734        set_delim_end(d);
7735      )
7736      VMCASE(OP_SETDELIM,
7737        set_delim_end(d);
7738      )
7739      VMCASE(OP_CHECKDELIM,
7740        /* We are guaranteed of this assert because we never allow ourselves to
7741         * consume bytes beyond data_end, which covers delim_end when non-NULL.
7742         */
7743        UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end));
7744        if (d->ptr == d->delim_end)
7745          d->pc += longofs;
7746      )
7747      VMCASE(OP_CALL,
7748        d->callstack[d->call_len++] = d->pc;
7749        d->pc += longofs;
7750      )
7751      VMCASE(OP_RET,
7752        UPB_ASSERT(d->call_len > 0);
7753        d->pc = d->callstack[--d->call_len];
7754      )
7755      VMCASE(OP_BRANCH,
7756        d->pc += longofs;
7757      )
7758      VMCASE(OP_TAG1,
7759        uint8_t expected;
7760        CHECK_SUSPEND(curbufleft(d) > 0);
7761        expected = (arg >> 8) & 0xff;
7762        if (*d->ptr == expected) {
7763          advance(d, 1);
7764        } else {
7765          int8_t shortofs;
7766         badtag:
7767          shortofs = arg;
7768          if (shortofs == LABEL_DISPATCH) {
7769            CHECK_RETURN(dispatch(d));
7770          } else {
7771            d->pc += shortofs;
7772            break; /* Avoid checkpoint(). */
7773          }
7774        }
7775      )
7776      VMCASE(OP_TAG2,
7777        uint16_t expected;
7778        CHECK_SUSPEND(curbufleft(d) > 0);
7779        expected = (arg >> 8) & 0xffff;
7780        if (curbufleft(d) >= 2) {
7781          uint16_t actual;
7782          memcpy(&actual, d->ptr, 2);
7783          if (expected == actual) {
7784            advance(d, 2);
7785          } else {
7786            goto badtag;
7787          }
7788        } else {
7789          int32_t result = upb_pbdecoder_checktag_slow(d, expected);
7790          if (result == DECODE_MISMATCH) goto badtag;
7791          if (result >= 0) return result;
7792        }
7793      )
7794      VMCASE(OP_TAGN, {
7795        uint64_t expected;
7796        int32_t result;
7797        memcpy(&expected, d->pc, 8);
7798        d->pc += 2;
7799        result = upb_pbdecoder_checktag_slow(d, expected);
7800        if (result == DECODE_MISMATCH) goto badtag;
7801        if (result >= 0) return result;
7802      })
7803      VMCASE(OP_DISPATCH, {
7804        CHECK_RETURN(dispatch(d));
7805      })
7806      VMCASE(OP_HALT, {
7807        return d->size_param;
7808      })
7809    }
7810  }
7811}
7812
7813
7814/* BytesHandler handlers ******************************************************/
7815
7816void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) {
7817  upb_pbdecoder *d = closure;
7818  UPB_UNUSED(size_hint);
7819  d->top->end_ofs = UINT64_MAX;
7820  d->bufstart_ofs = 0;
7821  d->call_len = 1;
7822  d->callstack[0] = &halt;
7823  d->pc = pc;
7824  d->skip = 0;
7825  return d;
7826}
7827
7828bool upb_pbdecoder_end(void *closure, const void *handler_data) {
7829  upb_pbdecoder *d = closure;
7830  const upb_pbdecodermethod *method = handler_data;
7831  uint64_t end;
7832  char dummy;
7833
7834  if (d->residual_end > d->residual) {
7835    seterr(d, "Unexpected EOF: decoder still has buffered unparsed data");
7836    return false;
7837  }
7838
7839  if (d->skip) {
7840    seterr(d, "Unexpected EOF inside skipped data");
7841    return false;
7842  }
7843
7844  if (d->top->end_ofs != UINT64_MAX) {
7845    seterr(d, "Unexpected EOF inside delimited string");
7846    return false;
7847  }
7848
7849  /* The user's end() call indicates that the message ends here. */
7850  end = offset(d);
7851  d->top->end_ofs = end;
7852
7853  {
7854    const uint32_t *p = d->pc;
7855    d->stack->end_ofs = end;
7856    /* Check the previous bytecode, but guard against beginning. */
7857    if (p != method->code_base.ptr) p--;
7858    if (getop(*p) == OP_CHECKDELIM) {
7859      /* Rewind from OP_TAG* to OP_CHECKDELIM. */
7860      UPB_ASSERT(getop(*d->pc) == OP_TAG1 ||
7861             getop(*d->pc) == OP_TAG2 ||
7862             getop(*d->pc) == OP_TAGN ||
7863             getop(*d->pc) == OP_DISPATCH);
7864      d->pc = p;
7865    }
7866    upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL);
7867  }
7868
7869  if (d->call_len != 0) {
7870    seterr(d, "Unexpected EOF inside submessage or group");
7871    return false;
7872  }
7873
7874  return true;
7875}
7876
7877size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf,
7878                            size_t size, const upb_bufhandle *handle) {
7879  int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle);
7880
7881  if (result == DECODE_ENDGROUP) goto_endmsg(decoder);
7882  CHECK_RETURN(result);
7883
7884  return run_decoder_vm(decoder, group, handle);
7885}
7886
7887
7888/* Public API *****************************************************************/
7889
7890void upb_pbdecoder_reset(upb_pbdecoder *d) {
7891  d->top = d->stack;
7892  d->top->groupnum = 0;
7893  d->ptr = d->residual;
7894  d->buf = d->residual;
7895  d->end = d->residual;
7896  d->residual_end = d->residual;
7897}
7898
7899upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m,
7900                                    upb_sink sink, upb_status *status) {
7901  const size_t default_max_nesting = 64;
7902#ifndef NDEBUG
7903  size_t size_before = upb_arena_bytesallocated(a);
7904#endif
7905
7906  upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder));
7907  if (!d) return NULL;
7908
7909  d->method_ = m;
7910  d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting));
7911  d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting));
7912  if (!d->stack || !d->callstack) {
7913    return NULL;
7914  }
7915
7916  d->arena = a;
7917  d->limit = d->stack + default_max_nesting - 1;
7918  d->stack_size = default_max_nesting;
7919  d->status = status;
7920
7921  upb_pbdecoder_reset(d);
7922  upb_bytessink_reset(&d->input_, &m->input_handler_, d);
7923
7924  if (d->method_->dest_handlers_) {
7925    if (sink.handlers != d->method_->dest_handlers_)
7926      return NULL;
7927  }
7928  d->top->sink = sink;
7929
7930  /* If this fails, increase the value in decoder.h. */
7931  UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
7932                      UPB_PB_DECODER_SIZE);
7933  return d;
7934}
7935
7936uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) {
7937  return offset(d);
7938}
7939
7940const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) {
7941  return d->method_;
7942}
7943
7944upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) {
7945  return d->input_;
7946}
7947
7948size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) {
7949  return d->stack_size;
7950}
7951
7952bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) {
7953  UPB_ASSERT(d->top >= d->stack);
7954
7955  if (max < (size_t)(d->top - d->stack)) {
7956    /* Can't set a limit smaller than what we are currently at. */
7957    return false;
7958  }
7959
7960  if (max > d->stack_size) {
7961    /* Need to reallocate stack and callstack to accommodate. */
7962    size_t old_size = stacksize(d, d->stack_size);
7963    size_t new_size = stacksize(d, max);
7964    void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size);
7965    if (!p) {
7966      return false;
7967    }
7968    d->stack = p;
7969
7970    old_size = callstacksize(d, d->stack_size);
7971    new_size = callstacksize(d, max);
7972    p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size);
7973    if (!p) {
7974      return false;
7975    }
7976    d->callstack = p;
7977
7978    d->stack_size = max;
7979  }
7980
7981  d->limit = d->stack + max - 1;
7982  return true;
7983}
7984/*
7985** upb::Encoder
7986**
7987** Since we are implementing pure handlers (ie. without any out-of-band access
7988** to pre-computed lengths), we have to buffer all submessages before we can
7989** emit even their first byte.
7990**
7991** Not knowing the size of submessages also means we can't write a perfect
7992** zero-copy implementation, even with buffering.  Lengths are stored as
7993** varints, which means that we don't know how many bytes to reserve for the
7994** length until we know what the length is.
7995**
7996** This leaves us with three main choices:
7997**
7998** 1. buffer all submessage data in a temporary buffer, then copy it exactly
7999**    once into the output buffer.
8000**
8001** 2. attempt to buffer data directly into the output buffer, estimating how
8002**    many bytes each length will take.  When our guesses are wrong, use
8003**    memmove() to grow or shrink the allotted space.
8004**
8005** 3. buffer directly into the output buffer, allocating a max length
8006**    ahead-of-time for each submessage length.  If we overallocated, we waste
8007**    space, but no memcpy() or memmove() is required.  This approach requires
8008**    defining a maximum size for submessages and rejecting submessages that
8009**    exceed that size.
8010**
8011** (2) and (3) have the potential to have better performance, but they are more
8012** complicated and subtle to implement:
8013**
8014**   (3) requires making an arbitrary choice of the maximum message size; it
8015**       wastes space when submessages are shorter than this and fails
8016**       completely when they are longer.  This makes it more finicky and
8017**       requires configuration based on the input.  It also makes it impossible
8018**       to perfectly match the output of reference encoders that always use the
8019**       optimal amount of space for each length.
8020**
8021**   (2) requires guessing the the size upfront, and if multiple lengths are
8022**       guessed wrong the minimum required number of memmove() operations may
8023**       be complicated to compute correctly.  Implemented properly, it may have
8024**       a useful amortized or average cost, but more investigation is required
8025**       to determine this and what the optimal algorithm is to achieve it.
8026**
8027**   (1) makes you always pay for exactly one copy, but its implementation is
8028**       the simplest and its performance is predictable.
8029**
8030** So for now, we implement (1) only.  If we wish to optimize later, we should
8031** be able to do it without affecting users.
8032**
8033** The strategy is to buffer the segments of data that do *not* depend on
8034** unknown lengths in one buffer, and keep a separate buffer of segment pointers
8035** and lengths.  When the top-level submessage ends, we can go beginning to end,
8036** alternating the writing of lengths with memcpy() of the rest of the data.
8037** At the top level though, no buffering is required.
8038*/
8039
8040
8041
8042/* The output buffer is divided into segments; a segment is a string of data
8043 * that is "ready to go" -- it does not need any varint lengths inserted into
8044 * the middle.  The seams between segments are where varints will be inserted
8045 * once they are known.
8046 *
8047 * We also use the concept of a "run", which is a range of encoded bytes that
8048 * occur at a single submessage level.  Every segment contains one or more runs.
8049 *
8050 * A segment can span messages.  Consider:
8051 *
8052 *                  .--Submessage lengths---------.
8053 *                  |       |                     |
8054 *                  |       V                     V
8055 *                  V      | |---------------    | |-----------------
8056 * Submessages:    | |-----------------------------------------------
8057 * Top-level msg: ------------------------------------------------------------
8058 *
8059 * Segments:          -----   -------------------   -----------------
8060 * Runs:              *----   *--------------*---   *----------------
8061 * (* marks the start)
8062 *
8063 * Note that the top-level menssage is not in any segment because it does not
8064 * have any length preceding it.
8065 *
8066 * A segment is only interrupted when another length needs to be inserted.  So
8067 * observe how the second segment spans both the inner submessage and part of
8068 * the next enclosing message. */
8069typedef struct {
8070  uint32_t msglen;  /* The length to varint-encode before this segment. */
8071  uint32_t seglen;  /* Length of the segment. */
8072} upb_pb_encoder_segment;
8073
8074struct upb_pb_encoder {
8075  upb_arena *arena;
8076
8077  /* Our input and output. */
8078  upb_sink input_;
8079  upb_bytessink output_;
8080
8081  /* The "subclosure" -- used as the inner closure as part of the bytessink
8082   * protocol. */
8083  void *subc;
8084
8085  /* The output buffer and limit, and our current write position.  "buf"
8086   * initially points to "initbuf", but is dynamically allocated if we need to
8087   * grow beyond the initial size. */
8088  char *buf, *ptr, *limit;
8089
8090  /* The beginning of the current run, or undefined if we are at the top
8091   * level. */
8092  char *runbegin;
8093
8094  /* The list of segments we are accumulating. */
8095  upb_pb_encoder_segment *segbuf, *segptr, *seglimit;
8096
8097  /* The stack of enclosing submessages.  Each entry in the stack points to the
8098   * segment where this submessage's length is being accumulated. */
8099  int *stack, *top, *stacklimit;
8100
8101  /* Depth of startmsg/endmsg calls. */
8102  int depth;
8103};
8104
8105/* low-level buffering ********************************************************/
8106
8107/* Low-level functions for interacting with the output buffer. */
8108
8109/* TODO(haberman): handle pushback */
8110static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) {
8111  size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL);
8112  UPB_ASSERT(n == len);
8113}
8114
8115static upb_pb_encoder_segment *top(upb_pb_encoder *e) {
8116  return &e->segbuf[*e->top];
8117}
8118
8119/* Call to ensure that at least "bytes" bytes are available for writing at
8120 * e->ptr.  Returns false if the bytes could not be allocated. */
8121static bool reserve(upb_pb_encoder *e, size_t bytes) {
8122  if ((size_t)(e->limit - e->ptr) < bytes) {
8123    /* Grow buffer. */
8124    char *new_buf;
8125    size_t needed = bytes + (e->ptr - e->buf);
8126    size_t old_size = e->limit - e->buf;
8127
8128    size_t new_size = old_size;
8129
8130    while (new_size < needed) {
8131      new_size *= 2;
8132    }
8133
8134    new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size);
8135
8136    if (new_buf == NULL) {
8137      return false;
8138    }
8139
8140    e->ptr = new_buf + (e->ptr - e->buf);
8141    e->runbegin = new_buf + (e->runbegin - e->buf);
8142    e->limit = new_buf + new_size;
8143    e->buf = new_buf;
8144  }
8145
8146  return true;
8147}
8148
8149/* Call when "bytes" bytes have been writte at e->ptr.  The caller *must* have
8150 * previously called reserve() with at least this many bytes. */
8151static void encoder_advance(upb_pb_encoder *e, size_t bytes) {
8152  UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes);
8153  e->ptr += bytes;
8154}
8155
8156/* Call when all of the bytes for a handler have been written.  Flushes the
8157 * bytes if possible and necessary, returning false if this failed. */
8158static bool commit(upb_pb_encoder *e) {
8159  if (!e->top) {
8160    /* We aren't inside a delimited region.  Flush our accumulated bytes to
8161     * the output.
8162     *
8163     * TODO(haberman): in the future we may want to delay flushing for
8164     * efficiency reasons. */
8165    putbuf(e, e->buf, e->ptr - e->buf);
8166    e->ptr = e->buf;
8167  }
8168
8169  return true;
8170}
8171
8172/* Writes the given bytes to the buffer, handling reserve/advance. */
8173static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) {
8174  if (!reserve(e, len)) {
8175    return false;
8176  }
8177
8178  memcpy(e->ptr, data, len);
8179  encoder_advance(e, len);
8180  return true;
8181}
8182
8183/* Finish the current run by adding the run totals to the segment and message
8184 * length. */
8185static void accumulate(upb_pb_encoder *e) {
8186  size_t run_len;
8187  UPB_ASSERT(e->ptr >= e->runbegin);
8188  run_len = e->ptr - e->runbegin;
8189  e->segptr->seglen += run_len;
8190  top(e)->msglen += run_len;
8191  e->runbegin = e->ptr;
8192}
8193
8194/* Call to indicate the start of delimited region for which the full length is
8195 * not yet known.  All data will be buffered until the length is known.
8196 * Delimited regions may be nested; their lengths will all be tracked properly. */
8197static bool start_delim(upb_pb_encoder *e) {
8198  if (e->top) {
8199    /* We are already buffering, advance to the next segment and push it on the
8200     * stack. */
8201    accumulate(e);
8202
8203    if (++e->top == e->stacklimit) {
8204      /* TODO(haberman): grow stack? */
8205      return false;
8206    }
8207
8208    if (++e->segptr == e->seglimit) {
8209      /* Grow segment buffer. */
8210      size_t old_size =
8211          (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment);
8212      size_t new_size = old_size * 2;
8213      upb_pb_encoder_segment *new_buf =
8214          upb_arena_realloc(e->arena, e->segbuf, old_size, new_size);
8215
8216      if (new_buf == NULL) {
8217        return false;
8218      }
8219
8220      e->segptr = new_buf + (e->segptr - e->segbuf);
8221      e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment));
8222      e->segbuf = new_buf;
8223    }
8224  } else {
8225    /* We were previously at the top level, start buffering. */
8226    e->segptr = e->segbuf;
8227    e->top = e->stack;
8228    e->runbegin = e->ptr;
8229  }
8230
8231  *e->top = (int)(e->segptr - e->segbuf);
8232  e->segptr->seglen = 0;
8233  e->segptr->msglen = 0;
8234
8235  return true;
8236}
8237
8238/* Call to indicate the end of a delimited region.  We now know the length of
8239 * the delimited region.  If we are not nested inside any other delimited
8240 * regions, we can now emit all of the buffered data we accumulated. */
8241static bool end_delim(upb_pb_encoder *e) {
8242  size_t msglen;
8243  accumulate(e);
8244  msglen = top(e)->msglen;
8245
8246  if (e->top == e->stack) {
8247    /* All lengths are now available, emit all buffered data. */
8248    char buf[UPB_PB_VARINT_MAX_LEN];
8249    upb_pb_encoder_segment *s;
8250    const char *ptr = e->buf;
8251    for (s = e->segbuf; s <= e->segptr; s++) {
8252      size_t lenbytes = upb_vencode64(s->msglen, buf);
8253      putbuf(e, buf, lenbytes);
8254      putbuf(e, ptr, s->seglen);
8255      ptr += s->seglen;
8256    }
8257
8258    e->ptr = e->buf;
8259    e->top = NULL;
8260  } else {
8261    /* Need to keep buffering; propagate length info into enclosing
8262     * submessages. */
8263    --e->top;
8264    top(e)->msglen += msglen + upb_varint_size(msglen);
8265  }
8266
8267  return true;
8268}
8269
8270
8271/* tag_t **********************************************************************/
8272
8273/* A precomputed (pre-encoded) tag and length. */
8274
8275typedef struct {
8276  uint8_t bytes;
8277  char tag[7];
8278} tag_t;
8279
8280/* Allocates a new tag for this field, and sets it in these handlerattr. */
8281static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt,
8282                    upb_handlerattr *attr) {
8283  uint32_t n = upb_fielddef_number(f);
8284
8285  tag_t *tag = upb_gmalloc(sizeof(tag_t));
8286  tag->bytes = upb_vencode64((n << 3) | wt, tag->tag);
8287
8288  attr->handler_data = tag;
8289  upb_handlers_addcleanup(h, tag, upb_gfree);
8290}
8291
8292static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) {
8293  return encode_bytes(e, tag->tag, tag->bytes);
8294}
8295
8296
8297/* encoding of wire types *****************************************************/
8298
8299static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) {
8300  /* TODO(haberman): byte-swap for big endian. */
8301  return encode_bytes(e, &val, sizeof(uint64_t));
8302}
8303
8304static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) {
8305  /* TODO(haberman): byte-swap for big endian. */
8306  return encode_bytes(e, &val, sizeof(uint32_t));
8307}
8308
8309static bool encode_varint(upb_pb_encoder *e, uint64_t val) {
8310  if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) {
8311    return false;
8312  }
8313
8314  encoder_advance(e, upb_vencode64(val, e->ptr));
8315  return true;
8316}
8317
8318static uint64_t dbl2uint64(double d) {
8319  uint64_t ret;
8320  memcpy(&ret, &d, sizeof(uint64_t));
8321  return ret;
8322}
8323
8324static uint32_t flt2uint32(float d) {
8325  uint32_t ret;
8326  memcpy(&ret, &d, sizeof(uint32_t));
8327  return ret;
8328}
8329
8330
8331/* encoding of proto types ****************************************************/
8332
8333static bool startmsg(void *c, const void *hd) {
8334  upb_pb_encoder *e = c;
8335  UPB_UNUSED(hd);
8336  if (e->depth++ == 0) {
8337    upb_bytessink_start(e->output_, 0, &e->subc);
8338  }
8339  return true;
8340}
8341
8342static bool endmsg(void *c, const void *hd, upb_status *status) {
8343  upb_pb_encoder *e = c;
8344  UPB_UNUSED(hd);
8345  UPB_UNUSED(status);
8346  if (--e->depth == 0) {
8347    upb_bytessink_end(e->output_);
8348  }
8349  return true;
8350}
8351
8352static void *encode_startdelimfield(void *c, const void *hd) {
8353  bool ok = encode_tag(c, hd) && commit(c) && start_delim(c);
8354  return ok ? c : UPB_BREAK;
8355}
8356
8357static bool encode_unknown(void *c, const void *hd, const char *buf,
8358                           size_t len) {
8359  UPB_UNUSED(hd);
8360  return encode_bytes(c, buf, len) && commit(c);
8361}
8362
8363static bool encode_enddelimfield(void *c, const void *hd) {
8364  UPB_UNUSED(hd);
8365  return end_delim(c);
8366}
8367
8368static void *encode_startgroup(void *c, const void *hd) {
8369  return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK;
8370}
8371
8372static bool encode_endgroup(void *c, const void *hd) {
8373  return encode_tag(c, hd) && commit(c);
8374}
8375
8376static void *encode_startstr(void *c, const void *hd, size_t size_hint) {
8377  UPB_UNUSED(size_hint);
8378  return encode_startdelimfield(c, hd);
8379}
8380
8381static size_t encode_strbuf(void *c, const void *hd, const char *buf,
8382                            size_t len, const upb_bufhandle *h) {
8383  UPB_UNUSED(hd);
8384  UPB_UNUSED(h);
8385  return encode_bytes(c, buf, len) ? len : 0;
8386}
8387
8388#define T(type, ctype, convert, encode)                                  \
8389  static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \
8390    return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e);  \
8391  }                                                                      \
8392  static bool encode_packed_##type(void *e, const void *hd, ctype val) { \
8393    UPB_UNUSED(hd);                                                      \
8394    return encode(e, (convert)(val));                                    \
8395  }
8396
8397T(double,   double,   dbl2uint64,   encode_fixed64)
8398T(float,    float,    flt2uint32,   encode_fixed32)
8399T(int64,    int64_t,  uint64_t,     encode_varint)
8400T(int32,    int32_t,  int64_t,      encode_varint)
8401T(fixed64,  uint64_t, uint64_t,     encode_fixed64)
8402T(fixed32,  uint32_t, uint32_t,     encode_fixed32)
8403T(bool,     bool,     bool,         encode_varint)
8404T(uint32,   uint32_t, uint32_t,     encode_varint)
8405T(uint64,   uint64_t, uint64_t,     encode_varint)
8406T(enum,     int32_t,  uint32_t,     encode_varint)
8407T(sfixed32, int32_t,  uint32_t,     encode_fixed32)
8408T(sfixed64, int64_t,  uint64_t,     encode_fixed64)
8409T(sint32,   int32_t,  upb_zzenc_32, encode_varint)
8410T(sint64,   int64_t,  upb_zzenc_64, encode_varint)
8411
8412#undef T
8413
8414
8415/* code to build the handlers *************************************************/
8416
8417#include <stdio.h>
8418static void newhandlers_callback(const void *closure, upb_handlers *h) {
8419  const upb_msgdef *m;
8420  upb_msg_field_iter i;
8421
8422  UPB_UNUSED(closure);
8423
8424  upb_handlers_setstartmsg(h, startmsg, NULL);
8425  upb_handlers_setendmsg(h, endmsg, NULL);
8426  upb_handlers_setunknown(h, encode_unknown, NULL);
8427
8428  m = upb_handlers_msgdef(h);
8429  for(upb_msg_field_begin(&i, m);
8430      !upb_msg_field_done(&i);
8431      upb_msg_field_next(&i)) {
8432    const upb_fielddef *f = upb_msg_iter_field(&i);
8433    bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) &&
8434                  upb_fielddef_packed(f);
8435    upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8436    upb_wiretype_t wt =
8437        packed ? UPB_WIRE_TYPE_DELIMITED
8438               : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)];
8439
8440    /* Pre-encode the tag for this field. */
8441    new_tag(h, f, wt, &attr);
8442
8443    if (packed) {
8444      upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr);
8445      upb_handlers_setendseq(h, f, encode_enddelimfield, &attr);
8446    }
8447
8448#define T(upper, lower, upbtype)                                     \
8449  case UPB_DESCRIPTOR_TYPE_##upper:                                  \
8450    if (packed) {                                                    \
8451      upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \
8452    } else {                                                         \
8453      upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \
8454    }                                                                \
8455    break;
8456
8457    switch (upb_fielddef_descriptortype(f)) {
8458      T(DOUBLE,   double,   double);
8459      T(FLOAT,    float,    float);
8460      T(INT64,    int64,    int64);
8461      T(INT32,    int32,    int32);
8462      T(FIXED64,  fixed64,  uint64);
8463      T(FIXED32,  fixed32,  uint32);
8464      T(BOOL,     bool,     bool);
8465      T(UINT32,   uint32,   uint32);
8466      T(UINT64,   uint64,   uint64);
8467      T(ENUM,     enum,     int32);
8468      T(SFIXED32, sfixed32, int32);
8469      T(SFIXED64, sfixed64, int64);
8470      T(SINT32,   sint32,   int32);
8471      T(SINT64,   sint64,   int64);
8472      case UPB_DESCRIPTOR_TYPE_STRING:
8473      case UPB_DESCRIPTOR_TYPE_BYTES:
8474        upb_handlers_setstartstr(h, f, encode_startstr, &attr);
8475        upb_handlers_setendstr(h, f, encode_enddelimfield, &attr);
8476        upb_handlers_setstring(h, f, encode_strbuf, &attr);
8477        break;
8478      case UPB_DESCRIPTOR_TYPE_MESSAGE:
8479        upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr);
8480        upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr);
8481        break;
8482      case UPB_DESCRIPTOR_TYPE_GROUP: {
8483        /* Endgroup takes a different tag (wire_type = END_GROUP). */
8484        upb_handlerattr attr2 = UPB_HANDLERATTR_INIT;
8485        new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2);
8486
8487        upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr);
8488        upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2);
8489
8490        break;
8491      }
8492    }
8493
8494#undef T
8495  }
8496}
8497
8498void upb_pb_encoder_reset(upb_pb_encoder *e) {
8499  e->segptr = NULL;
8500  e->top = NULL;
8501  e->depth = 0;
8502}
8503
8504
8505/* public API *****************************************************************/
8506
8507upb_handlercache *upb_pb_encoder_newcache(void) {
8508  return upb_handlercache_new(newhandlers_callback, NULL);
8509}
8510
8511upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h,
8512                                      upb_bytessink output) {
8513  const size_t initial_bufsize = 256;
8514  const size_t initial_segbufsize = 16;
8515  /* TODO(haberman): make this configurable. */
8516  const size_t stack_size = 64;
8517#ifndef NDEBUG
8518  const size_t size_before = upb_arena_bytesallocated(arena);
8519#endif
8520
8521  upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder));
8522  if (!e) return NULL;
8523
8524  e->buf = upb_arena_malloc(arena, initial_bufsize);
8525  e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf));
8526  e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack));
8527
8528  if (!e->buf || !e->segbuf || !e->stack) {
8529    return NULL;
8530  }
8531
8532  e->limit = e->buf + initial_bufsize;
8533  e->seglimit = e->segbuf + initial_segbufsize;
8534  e->stacklimit = e->stack + stack_size;
8535
8536  upb_pb_encoder_reset(e);
8537  upb_sink_reset(&e->input_, h, e);
8538
8539  e->arena = arena;
8540  e->output_ = output;
8541  e->subc = output.closure;
8542  e->ptr = e->buf;
8543
8544  /* If this fails, increase the value in encoder.h. */
8545  UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
8546                      UPB_PB_ENCODER_SIZE);
8547  return e;
8548}
8549
8550upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; }
8551/*
8552 * upb::pb::TextPrinter
8553 *
8554 * OPT: This is not optimized at all.  It uses printf() which parses the format
8555 * string every time, and it allocates memory for every put.
8556 */
8557
8558
8559#include <ctype.h>
8560#include <float.h>
8561#include <inttypes.h>
8562#include <stdarg.h>
8563#include <stdio.h>
8564#include <string.h>
8565
8566
8567
8568struct upb_textprinter {
8569  upb_sink input_;
8570  upb_bytessink output_;
8571  int indent_depth_;
8572  bool single_line_;
8573  void *subc;
8574};
8575
8576#define CHECK(x) if ((x) < 0) goto err;
8577
8578static const char *shortname(const char *longname) {
8579  const char *last = strrchr(longname, '.');
8580  return last ? last + 1 : longname;
8581}
8582
8583static int indent(upb_textprinter *p) {
8584  int i;
8585  if (!p->single_line_)
8586    for (i = 0; i < p->indent_depth_; i++)
8587      upb_bytessink_putbuf(p->output_, p->subc, "  ", 2, NULL);
8588  return 0;
8589}
8590
8591static int endfield(upb_textprinter *p) {
8592  const char ch = (p->single_line_ ? ' ' : '\n');
8593  upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL);
8594  return 0;
8595}
8596
8597static int putescaped(upb_textprinter *p, const char *buf, size_t len,
8598                      bool preserve_utf8) {
8599  /* Based on CEscapeInternal() from Google's protobuf release. */
8600  char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf);
8601  const char *end = buf + len;
8602
8603  /* I think hex is prettier and more useful, but proto2 uses octal; should
8604   * investigate whether it can parse hex also. */
8605  const bool use_hex = false;
8606  bool last_hex_escape = false; /* true if last output char was \xNN */
8607
8608  for (; buf < end; buf++) {
8609    bool is_hex_escape;
8610
8611    if (dstend - dst < 4) {
8612      upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8613      dst = dstbuf;
8614    }
8615
8616    is_hex_escape = false;
8617    switch (*buf) {
8618      case '\n': *(dst++) = '\\'; *(dst++) = 'n';  break;
8619      case '\r': *(dst++) = '\\'; *(dst++) = 'r';  break;
8620      case '\t': *(dst++) = '\\'; *(dst++) = 't';  break;
8621      case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break;
8622      case '\'': *(dst++) = '\\'; *(dst++) = '\''; break;
8623      case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break;
8624      default:
8625        /* Note that if we emit \xNN and the buf character after that is a hex
8626         * digit then that digit must be escaped too to prevent it being
8627         * interpreted as part of the character code by C. */
8628        if ((!preserve_utf8 || (uint8_t)*buf < 0x80) &&
8629            (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) {
8630          sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf);
8631          is_hex_escape = use_hex;
8632          dst += 4;
8633        } else {
8634          *(dst++) = *buf; break;
8635        }
8636    }
8637    last_hex_escape = is_hex_escape;
8638  }
8639  /* Flush remaining data. */
8640  upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL);
8641  return 0;
8642}
8643
8644bool putf(upb_textprinter *p, const char *fmt, ...) {
8645  va_list args;
8646  va_list args_copy;
8647  char *str;
8648  int written;
8649  int len;
8650  bool ok;
8651
8652  va_start(args, fmt);
8653
8654  /* Run once to get the length of the string. */
8655  _upb_va_copy(args_copy, args);
8656  len = _upb_vsnprintf(NULL, 0, fmt, args_copy);
8657  va_end(args_copy);
8658
8659  /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */
8660  str = upb_gmalloc(len + 1);
8661  if (!str) return false;
8662  written = vsprintf(str, fmt, args);
8663  va_end(args);
8664  UPB_ASSERT(written == len);
8665
8666  ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL);
8667  upb_gfree(str);
8668  return ok;
8669}
8670
8671
8672/* handlers *******************************************************************/
8673
8674static bool textprinter_startmsg(void *c, const void *hd) {
8675  upb_textprinter *p = c;
8676  UPB_UNUSED(hd);
8677  if (p->indent_depth_ == 0) {
8678    upb_bytessink_start(p->output_, 0, &p->subc);
8679  }
8680  return true;
8681}
8682
8683static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) {
8684  upb_textprinter *p = c;
8685  UPB_UNUSED(hd);
8686  UPB_UNUSED(s);
8687  if (p->indent_depth_ == 0) {
8688    upb_bytessink_end(p->output_);
8689  }
8690  return true;
8691}
8692
8693#define TYPE(name, ctype, fmt) \
8694  static bool textprinter_put ## name(void *closure, const void *handler_data, \
8695                                      ctype val) {                             \
8696    upb_textprinter *p = closure;                                              \
8697    const upb_fielddef *f = handler_data;                                      \
8698    CHECK(indent(p));                                                          \
8699    putf(p, "%s: " fmt, upb_fielddef_name(f), val);                            \
8700    CHECK(endfield(p));                                                        \
8701    return true;                                                               \
8702  err:                                                                         \
8703    return false;                                                              \
8704}
8705
8706static bool textprinter_putbool(void *closure, const void *handler_data,
8707                                bool val) {
8708  upb_textprinter *p = closure;
8709  const upb_fielddef *f = handler_data;
8710  CHECK(indent(p));
8711  putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false");
8712  CHECK(endfield(p));
8713  return true;
8714err:
8715  return false;
8716}
8717
8718#define STRINGIFY_HELPER(x) #x
8719#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x)
8720
8721TYPE(int32,  int32_t,  "%" PRId32)
8722TYPE(int64,  int64_t,  "%" PRId64)
8723TYPE(uint32, uint32_t, "%" PRIu32)
8724TYPE(uint64, uint64_t, "%" PRIu64)
8725TYPE(float,  float,    "%." STRINGIFY_MACROVAL(FLT_DIG) "g")
8726TYPE(double, double,   "%." STRINGIFY_MACROVAL(DBL_DIG) "g")
8727
8728#undef TYPE
8729
8730/* Output a symbolic value from the enum if found, else just print as int32. */
8731static bool textprinter_putenum(void *closure, const void *handler_data,
8732                                int32_t val) {
8733  upb_textprinter *p = closure;
8734  const upb_fielddef *f = handler_data;
8735  const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f);
8736  const char *label = upb_enumdef_iton(enum_def, val);
8737  if (label) {
8738    indent(p);
8739    putf(p, "%s: %s", upb_fielddef_name(f), label);
8740    endfield(p);
8741  } else {
8742    if (!textprinter_putint32(closure, handler_data, val))
8743      return false;
8744  }
8745  return true;
8746}
8747
8748static void *textprinter_startstr(void *closure, const void *handler_data,
8749                      size_t size_hint) {
8750  upb_textprinter *p = closure;
8751  const upb_fielddef *f = handler_data;
8752  UPB_UNUSED(size_hint);
8753  indent(p);
8754  putf(p, "%s: \"", upb_fielddef_name(f));
8755  return p;
8756}
8757
8758static bool textprinter_endstr(void *closure, const void *handler_data) {
8759  upb_textprinter *p = closure;
8760  UPB_UNUSED(handler_data);
8761  putf(p, "\"");
8762  endfield(p);
8763  return true;
8764}
8765
8766static size_t textprinter_putstr(void *closure, const void *hd, const char *buf,
8767                                 size_t len, const upb_bufhandle *handle) {
8768  upb_textprinter *p = closure;
8769  const upb_fielddef *f = hd;
8770  UPB_UNUSED(handle);
8771  CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING));
8772  return len;
8773err:
8774  return 0;
8775}
8776
8777static void *textprinter_startsubmsg(void *closure, const void *handler_data) {
8778  upb_textprinter *p = closure;
8779  const char *name = handler_data;
8780  CHECK(indent(p));
8781  putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n');
8782  p->indent_depth_++;
8783  return p;
8784err:
8785  return UPB_BREAK;
8786}
8787
8788static bool textprinter_endsubmsg(void *closure, const void *handler_data) {
8789  upb_textprinter *p = closure;
8790  UPB_UNUSED(handler_data);
8791  p->indent_depth_--;
8792  CHECK(indent(p));
8793  upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL);
8794  CHECK(endfield(p));
8795  return true;
8796err:
8797  return false;
8798}
8799
8800static void onmreg(const void *c, upb_handlers *h) {
8801  const upb_msgdef *m = upb_handlers_msgdef(h);
8802  upb_msg_field_iter i;
8803  UPB_UNUSED(c);
8804
8805  upb_handlers_setstartmsg(h, textprinter_startmsg, NULL);
8806  upb_handlers_setendmsg(h, textprinter_endmsg, NULL);
8807
8808  for(upb_msg_field_begin(&i, m);
8809      !upb_msg_field_done(&i);
8810      upb_msg_field_next(&i)) {
8811    upb_fielddef *f = upb_msg_iter_field(&i);
8812    upb_handlerattr attr = UPB_HANDLERATTR_INIT;
8813    attr.handler_data = f;
8814    switch (upb_fielddef_type(f)) {
8815      case UPB_TYPE_INT32:
8816        upb_handlers_setint32(h, f, textprinter_putint32, &attr);
8817        break;
8818      case UPB_TYPE_INT64:
8819        upb_handlers_setint64(h, f, textprinter_putint64, &attr);
8820        break;
8821      case UPB_TYPE_UINT32:
8822        upb_handlers_setuint32(h, f, textprinter_putuint32, &attr);
8823        break;
8824      case UPB_TYPE_UINT64:
8825        upb_handlers_setuint64(h, f, textprinter_putuint64, &attr);
8826        break;
8827      case UPB_TYPE_FLOAT:
8828        upb_handlers_setfloat(h, f, textprinter_putfloat, &attr);
8829        break;
8830      case UPB_TYPE_DOUBLE:
8831        upb_handlers_setdouble(h, f, textprinter_putdouble, &attr);
8832        break;
8833      case UPB_TYPE_BOOL:
8834        upb_handlers_setbool(h, f, textprinter_putbool, &attr);
8835        break;
8836      case UPB_TYPE_STRING:
8837      case UPB_TYPE_BYTES:
8838        upb_handlers_setstartstr(h, f, textprinter_startstr, &attr);
8839        upb_handlers_setstring(h, f, textprinter_putstr, &attr);
8840        upb_handlers_setendstr(h, f, textprinter_endstr, &attr);
8841        break;
8842      case UPB_TYPE_MESSAGE: {
8843        const char *name =
8844            upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP
8845                ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f)))
8846                : upb_fielddef_name(f);
8847        attr.handler_data = name;
8848        upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr);
8849        upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr);
8850        break;
8851      }
8852      case UPB_TYPE_ENUM:
8853        upb_handlers_setint32(h, f, textprinter_putenum, &attr);
8854        break;
8855    }
8856  }
8857}
8858
8859static void textprinter_reset(upb_textprinter *p, bool single_line) {
8860  p->single_line_ = single_line;
8861  p->indent_depth_ = 0;
8862}
8863
8864
8865/* Public API *****************************************************************/
8866
8867upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h,
8868                                        upb_bytessink output) {
8869  upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter));
8870  if (!p) return NULL;
8871
8872  p->output_ = output;
8873  upb_sink_reset(&p->input_, h, p);
8874  textprinter_reset(p, false);
8875
8876  return p;
8877}
8878
8879upb_handlercache *upb_textprinter_newcache(void) {
8880  return upb_handlercache_new(&onmreg, NULL);
8881}
8882
8883upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; }
8884
8885void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) {
8886  p->single_line_ = single_line;
8887}
8888
8889
8890/* Index is descriptor type. */
8891const uint8_t upb_pb_native_wire_types[] = {
8892  UPB_WIRE_TYPE_END_GROUP,     /* ENDGROUP */
8893  UPB_WIRE_TYPE_64BIT,         /* DOUBLE */
8894  UPB_WIRE_TYPE_32BIT,         /* FLOAT */
8895  UPB_WIRE_TYPE_VARINT,        /* INT64 */
8896  UPB_WIRE_TYPE_VARINT,        /* UINT64 */
8897  UPB_WIRE_TYPE_VARINT,        /* INT32 */
8898  UPB_WIRE_TYPE_64BIT,         /* FIXED64 */
8899  UPB_WIRE_TYPE_32BIT,         /* FIXED32 */
8900  UPB_WIRE_TYPE_VARINT,        /* BOOL */
8901  UPB_WIRE_TYPE_DELIMITED,     /* STRING */
8902  UPB_WIRE_TYPE_START_GROUP,   /* GROUP */
8903  UPB_WIRE_TYPE_DELIMITED,     /* MESSAGE */
8904  UPB_WIRE_TYPE_DELIMITED,     /* BYTES */
8905  UPB_WIRE_TYPE_VARINT,        /* UINT32 */
8906  UPB_WIRE_TYPE_VARINT,        /* ENUM */
8907  UPB_WIRE_TYPE_32BIT,         /* SFIXED32 */
8908  UPB_WIRE_TYPE_64BIT,         /* SFIXED64 */
8909  UPB_WIRE_TYPE_VARINT,        /* SINT32 */
8910  UPB_WIRE_TYPE_VARINT,        /* SINT64 */
8911};
8912
8913/* A basic branch-based decoder, uses 32-bit values to get good performance
8914 * on 32-bit architectures (but performs well on 64-bits also).
8915 * This scheme comes from the original Google Protobuf implementation
8916 * (proto2). */
8917upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) {
8918  upb_decoderet err = {NULL, 0};
8919  const char *p = r.p;
8920  uint32_t low = (uint32_t)r.val;
8921  uint32_t high = 0;
8922  uint32_t b;
8923  b = *(p++); low  |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8924  b = *(p++); low  |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8925  b = *(p++); low  |= (b & 0x7fU) << 28;
8926              high  = (b & 0x7fU) >>  4; if (!(b & 0x80)) goto done;
8927  b = *(p++); high |= (b & 0x7fU) <<  3; if (!(b & 0x80)) goto done;
8928  b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done;
8929  b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done;
8930  b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done;
8931  b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done;
8932  return err;
8933
8934done:
8935  r.val = ((uint64_t)high << 32) | low;
8936  r.p = p;
8937  return r;
8938}
8939
8940/* Like the previous, but uses 64-bit values. */
8941upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) {
8942  const char *p = r.p;
8943  uint64_t val = r.val;
8944  uint64_t b;
8945  upb_decoderet err = {NULL, 0};
8946  b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done;
8947  b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done;
8948  b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done;
8949  b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done;
8950  b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done;
8951  b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done;
8952  b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done;
8953  b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done;
8954  return err;
8955
8956done:
8957  r.val = val;
8958  r.p = p;
8959  return r;
8960}
8961
8962#line 1 "upb/json/parser.rl"
8963/*
8964** upb::json::Parser (upb_json_parser)
8965**
8966** A parser that uses the Ragel State Machine Compiler to generate
8967** the finite automata.
8968**
8969** Ragel only natively handles regular languages, but we can manually
8970** program it a bit to handle context-free languages like JSON, by using
8971** the "fcall" and "fret" constructs.
8972**
8973** This parser can handle the basics, but needs several things to be fleshed
8974** out:
8975**
8976** - handling of unicode escape sequences (including high surrogate pairs).
8977** - properly check and report errors for unknown fields, stack overflow,
8978**   improper array nesting (or lack of nesting).
8979** - handling of base64 sequences with padding characters.
8980** - handling of push-back (non-success returns from sink functions).
8981** - handling of keys/escape-sequences/etc that span input buffers.
8982*/
8983
8984#include <ctype.h>
8985#include <errno.h>
8986#include <float.h>
8987#include <math.h>
8988#include <stdint.h>
8989#include <stdio.h>
8990#include <stdlib.h>
8991#include <string.h>
8992
8993#include <time.h>
8994
8995
8996
8997#define UPB_JSON_MAX_DEPTH 64
8998
8999/* Type of value message */
9000enum {
9001  VALUE_NULLVALUE   = 0,
9002  VALUE_NUMBERVALUE = 1,
9003  VALUE_STRINGVALUE = 2,
9004  VALUE_BOOLVALUE   = 3,
9005  VALUE_STRUCTVALUE = 4,
9006  VALUE_LISTVALUE   = 5
9007};
9008
9009/* Forward declare */
9010static bool is_top_level(upb_json_parser *p);
9011static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type);
9012static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type);
9013
9014static bool is_number_wrapper_object(upb_json_parser *p);
9015static bool does_number_wrapper_start(upb_json_parser *p);
9016static bool does_number_wrapper_end(upb_json_parser *p);
9017
9018static bool is_string_wrapper_object(upb_json_parser *p);
9019static bool does_string_wrapper_start(upb_json_parser *p);
9020static bool does_string_wrapper_end(upb_json_parser *p);
9021
9022static bool does_fieldmask_start(upb_json_parser *p);
9023static bool does_fieldmask_end(upb_json_parser *p);
9024static void start_fieldmask_object(upb_json_parser *p);
9025static void end_fieldmask_object(upb_json_parser *p);
9026
9027static void start_wrapper_object(upb_json_parser *p);
9028static void end_wrapper_object(upb_json_parser *p);
9029
9030static void start_value_object(upb_json_parser *p, int value_type);
9031static void end_value_object(upb_json_parser *p);
9032
9033static void start_listvalue_object(upb_json_parser *p);
9034static void end_listvalue_object(upb_json_parser *p);
9035
9036static void start_structvalue_object(upb_json_parser *p);
9037static void end_structvalue_object(upb_json_parser *p);
9038
9039static void start_object(upb_json_parser *p);
9040static void end_object(upb_json_parser *p);
9041
9042static void start_any_object(upb_json_parser *p, const char *ptr);
9043static bool end_any_object(upb_json_parser *p, const char *ptr);
9044
9045static bool start_subobject(upb_json_parser *p);
9046static void end_subobject(upb_json_parser *p);
9047
9048static void start_member(upb_json_parser *p);
9049static void end_member(upb_json_parser *p);
9050static bool end_membername(upb_json_parser *p);
9051
9052static void start_any_member(upb_json_parser *p, const char *ptr);
9053static void end_any_member(upb_json_parser *p, const char *ptr);
9054static bool end_any_membername(upb_json_parser *p);
9055
9056size_t parse(void *closure, const void *hd, const char *buf, size_t size,
9057             const upb_bufhandle *handle);
9058static bool end(void *closure, const void *hd);
9059
9060static const char eof_ch = 'e';
9061
9062/* stringsink */
9063typedef struct {
9064  upb_byteshandler handler;
9065  upb_bytessink sink;
9066  char *ptr;
9067  size_t len, size;
9068} upb_stringsink;
9069
9070
9071static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) {
9072  upb_stringsink *sink = _sink;
9073  sink->len = 0;
9074  UPB_UNUSED(hd);
9075  UPB_UNUSED(size_hint);
9076  return sink;
9077}
9078
9079static size_t stringsink_string(void *_sink, const void *hd, const char *ptr,
9080                                size_t len, const upb_bufhandle *handle) {
9081  upb_stringsink *sink = _sink;
9082  size_t new_size = sink->size;
9083
9084  UPB_UNUSED(hd);
9085  UPB_UNUSED(handle);
9086
9087  while (sink->len + len > new_size) {
9088    new_size *= 2;
9089  }
9090
9091  if (new_size != sink->size) {
9092    sink->ptr = realloc(sink->ptr, new_size);
9093    sink->size = new_size;
9094  }
9095
9096  memcpy(sink->ptr + sink->len, ptr, len);
9097  sink->len += len;
9098
9099  return len;
9100}
9101
9102void upb_stringsink_init(upb_stringsink *sink) {
9103  upb_byteshandler_init(&sink->handler);
9104  upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL);
9105  upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL);
9106
9107  upb_bytessink_reset(&sink->sink, &sink->handler, sink);
9108
9109  sink->size = 32;
9110  sink->ptr = malloc(sink->size);
9111  sink->len = 0;
9112}
9113
9114void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); }
9115
9116typedef struct {
9117  /* For encoding Any value field in binary format. */
9118  upb_handlercache *encoder_handlercache;
9119  upb_stringsink stringsink;
9120
9121  /* For decoding Any value field in json format. */
9122  upb_json_codecache *parser_codecache;
9123  upb_sink sink;
9124  upb_json_parser *parser;
9125
9126  /* Mark the range of uninterpreted values in json input before type url. */
9127  const char *before_type_url_start;
9128  const char *before_type_url_end;
9129
9130  /* Mark the range of uninterpreted values in json input after type url. */
9131  const char *after_type_url_start;
9132} upb_jsonparser_any_frame;
9133
9134typedef struct {
9135  upb_sink sink;
9136
9137  /* The current message in which we're parsing, and the field whose value we're
9138   * expecting next. */
9139  const upb_msgdef *m;
9140  const upb_fielddef *f;
9141
9142  /* The table mapping json name to fielddef for this message. */
9143  const upb_strtable *name_table;
9144
9145  /* We are in a repeated-field context. We need this flag to decide whether to
9146   * handle the array as a normal repeated field or a
9147   * google.protobuf.ListValue/google.protobuf.Value. */
9148  bool is_repeated;
9149
9150  /* We are in a repeated-field context, ready to emit mapentries as
9151   * submessages. This flag alters the start-of-object (open-brace) behavior to
9152   * begin a sequence of mapentry messages rather than a single submessage. */
9153  bool is_map;
9154
9155  /* We are in a map-entry message context. This flag is set when parsing the
9156   * value field of a single map entry and indicates to all value-field parsers
9157   * (subobjects, strings, numbers, and bools) that the map-entry submessage
9158   * should end as soon as the value is parsed. */
9159  bool is_mapentry;
9160
9161  /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent
9162   * message's map field that we're currently parsing. This differs from |f|
9163   * because |f| is the field in the *current* message (i.e., the map-entry
9164   * message itself), not the parent's field that leads to this map. */
9165  const upb_fielddef *mapfield;
9166
9167  /* We are in an Any message context. This flag is set when parsing the Any
9168   * message and indicates to all field parsers (subobjects, strings, numbers,
9169   * and bools) that the parsed field should be serialized as binary data or
9170   * cached (type url not found yet). */
9171  bool is_any;
9172
9173  /* The type of packed message in Any. */
9174  upb_jsonparser_any_frame *any_frame;
9175
9176  /* True if the field to be parsed is unknown. */
9177  bool is_unknown_field;
9178} upb_jsonparser_frame;
9179
9180static void init_frame(upb_jsonparser_frame* frame) {
9181  frame->m = NULL;
9182  frame->f = NULL;
9183  frame->name_table = NULL;
9184  frame->is_repeated = false;
9185  frame->is_map = false;
9186  frame->is_mapentry = false;
9187  frame->mapfield = NULL;
9188  frame->is_any = false;
9189  frame->any_frame = NULL;
9190  frame->is_unknown_field = false;
9191}
9192
9193struct upb_json_parser {
9194  upb_arena *arena;
9195  const upb_json_parsermethod *method;
9196  upb_bytessink input_;
9197
9198  /* Stack to track the JSON scopes we are in. */
9199  upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH];
9200  upb_jsonparser_frame *top;
9201  upb_jsonparser_frame *limit;
9202
9203  upb_status *status;
9204
9205  /* Ragel's internal parsing stack for the parsing state machine. */
9206  int current_state;
9207  int parser_stack[UPB_JSON_MAX_DEPTH];
9208  int parser_top;
9209
9210  /* The handle for the current buffer. */
9211  const upb_bufhandle *handle;
9212
9213  /* Accumulate buffer.  See details in parser.rl. */
9214  const char *accumulated;
9215  size_t accumulated_len;
9216  char *accumulate_buf;
9217  size_t accumulate_buf_size;
9218
9219  /* Multi-part text data.  See details in parser.rl. */
9220  int multipart_state;
9221  upb_selector_t string_selector;
9222
9223  /* Input capture.  See details in parser.rl. */
9224  const char *capture;
9225
9226  /* Intermediate result of parsing a unicode escape sequence. */
9227  uint32_t digit;
9228
9229  /* For resolve type url in Any. */
9230  const upb_symtab *symtab;
9231
9232  /* Whether to proceed if unknown field is met. */
9233  bool ignore_json_unknown;
9234
9235  /* Cache for parsing timestamp due to base and zone are handled in different
9236   * handlers. */
9237  struct tm tm;
9238};
9239
9240static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) {
9241  upb_jsonparser_frame *inner;
9242  inner = p->top + 1;
9243  init_frame(inner);
9244  return inner;
9245}
9246
9247struct upb_json_codecache {
9248  upb_arena *arena;
9249  upb_inttable methods;   /* upb_msgdef* -> upb_json_parsermethod* */
9250};
9251
9252struct upb_json_parsermethod {
9253  const upb_json_codecache *cache;
9254  upb_byteshandler input_handler_;
9255
9256  /* Maps json_name -> fielddef */
9257  upb_strtable name_table;
9258};
9259
9260#define PARSER_CHECK_RETURN(x) if (!(x)) return false
9261
9262static upb_jsonparser_any_frame *json_parser_any_frame_new(
9263    upb_json_parser *p) {
9264  upb_jsonparser_any_frame *frame;
9265
9266  frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame));
9267
9268  frame->encoder_handlercache = upb_pb_encoder_newcache();
9269  frame->parser_codecache = upb_json_codecache_new();
9270  frame->parser = NULL;
9271  frame->before_type_url_start = NULL;
9272  frame->before_type_url_end = NULL;
9273  frame->after_type_url_start = NULL;
9274
9275  upb_stringsink_init(&frame->stringsink);
9276
9277  return frame;
9278}
9279
9280static void json_parser_any_frame_set_payload_type(
9281    upb_json_parser *p,
9282    upb_jsonparser_any_frame *frame,
9283    const upb_msgdef *payload_type) {
9284  const upb_handlers *h;
9285  const upb_json_parsermethod *parser_method;
9286  upb_pb_encoder *encoder;
9287
9288  /* Initialize encoder. */
9289  h = upb_handlercache_get(frame->encoder_handlercache, payload_type);
9290  encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink);
9291
9292  /* Initialize parser. */
9293  parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type);
9294  upb_sink_reset(&frame->sink, h, encoder);
9295  frame->parser =
9296      upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink,
9297                             p->status, p->ignore_json_unknown);
9298}
9299
9300static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) {
9301  upb_handlercache_free(frame->encoder_handlercache);
9302  upb_json_codecache_free(frame->parser_codecache);
9303  upb_stringsink_uninit(&frame->stringsink);
9304}
9305
9306static bool json_parser_any_frame_has_type_url(
9307  upb_jsonparser_any_frame *frame) {
9308  return frame->parser != NULL;
9309}
9310
9311static bool json_parser_any_frame_has_value_before_type_url(
9312  upb_jsonparser_any_frame *frame) {
9313  return frame->before_type_url_start != frame->before_type_url_end;
9314}
9315
9316static bool json_parser_any_frame_has_value_after_type_url(
9317  upb_jsonparser_any_frame *frame) {
9318  return frame->after_type_url_start != NULL;
9319}
9320
9321static bool json_parser_any_frame_has_value(
9322  upb_jsonparser_any_frame *frame) {
9323  return json_parser_any_frame_has_value_before_type_url(frame) ||
9324         json_parser_any_frame_has_value_after_type_url(frame);
9325}
9326
9327static void json_parser_any_frame_set_before_type_url_end(
9328    upb_jsonparser_any_frame *frame,
9329    const char *ptr) {
9330  if (frame->parser == NULL) {
9331    frame->before_type_url_end = ptr;
9332  }
9333}
9334
9335static void json_parser_any_frame_set_after_type_url_start_once(
9336    upb_jsonparser_any_frame *frame,
9337    const char *ptr) {
9338  if (json_parser_any_frame_has_type_url(frame) &&
9339      frame->after_type_url_start == NULL) {
9340    frame->after_type_url_start = ptr;
9341  }
9342}
9343
9344/* Used to signal that a capture has been suspended. */
9345static char suspend_capture;
9346
9347static upb_selector_t getsel_for_handlertype(upb_json_parser *p,
9348                                             upb_handlertype_t type) {
9349  upb_selector_t sel;
9350  bool ok = upb_handlers_getselector(p->top->f, type, &sel);
9351  UPB_ASSUME(ok);
9352  return sel;
9353}
9354
9355static upb_selector_t parser_getsel(upb_json_parser *p) {
9356  return getsel_for_handlertype(
9357      p, upb_handlers_getprimitivehandlertype(p->top->f));
9358}
9359
9360static bool check_stack(upb_json_parser *p) {
9361  if ((p->top + 1) == p->limit) {
9362    upb_status_seterrmsg(p->status, "Nesting too deep");
9363    return false;
9364  }
9365
9366  return true;
9367}
9368
9369static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) {
9370  upb_value v;
9371  const upb_json_codecache *cache = p->method->cache;
9372  bool ok;
9373  const upb_json_parsermethod *method;
9374
9375  ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v);
9376  UPB_ASSUME(ok);
9377  method = upb_value_getconstptr(v);
9378
9379  frame->name_table = &method->name_table;
9380}
9381
9382/* There are GCC/Clang built-ins for overflow checking which we could start
9383 * using if there was any performance benefit to it. */
9384
9385static bool checked_add(size_t a, size_t b, size_t *c) {
9386  if (SIZE_MAX - a < b) return false;
9387  *c = a + b;
9388  return true;
9389}
9390
9391static size_t saturating_multiply(size_t a, size_t b) {
9392  /* size_t is unsigned, so this is defined behavior even on overflow. */
9393  size_t ret = a * b;
9394  if (b != 0 && ret / b != a) {
9395    ret = SIZE_MAX;
9396  }
9397  return ret;
9398}
9399
9400
9401/* Base64 decoding ************************************************************/
9402
9403/* TODO(haberman): make this streaming. */
9404
9405static const signed char b64table[] = {
9406  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9407  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9408  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9409  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9410  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9411  -1,      -1,      -1,      62/*+*/, -1,      -1,      -1,      63/*/ */,
9412  52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/,
9413  60/*8*/, 61/*9*/, -1,      -1,      -1,      -1,      -1,      -1,
9414  -1,       0/*A*/,  1/*B*/,  2/*C*/,  3/*D*/,  4/*E*/,  5/*F*/,  6/*G*/,
9415  07/*H*/,  8/*I*/,  9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/,
9416  15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/,
9417  23/*X*/, 24/*Y*/, 25/*Z*/, -1,      -1,      -1,      -1,      -1,
9418  -1,      26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/,
9419  33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/,
9420  41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/,
9421  49/*x*/, 50/*y*/, 51/*z*/, -1,      -1,      -1,      -1,      -1,
9422  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9423  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9424  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9425  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9426  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9427  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9428  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9429  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9430  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9431  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9432  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9433  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9434  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9435  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9436  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1,
9437  -1,      -1,      -1,      -1,      -1,      -1,      -1,      -1
9438};
9439
9440/* Returns the table value sign-extended to 32 bits.  Knowing that the upper
9441 * bits will be 1 for unrecognized characters makes it easier to check for
9442 * this error condition later (see below). */
9443int32_t b64lookup(unsigned char ch) { return b64table[ch]; }
9444
9445/* Returns true if the given character is not a valid base64 character or
9446 * padding. */
9447bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; }
9448
9449static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr,
9450                        size_t len) {
9451  const char *limit = ptr + len;
9452  for (; ptr < limit; ptr += 4) {
9453    uint32_t val;
9454    char output[3];
9455
9456    if (limit - ptr < 4) {
9457      upb_status_seterrf(p->status,
9458                         "Base64 input for bytes field not a multiple of 4: %s",
9459                         upb_fielddef_name(p->top->f));
9460      return false;
9461    }
9462
9463    val = b64lookup(ptr[0]) << 18 |
9464          b64lookup(ptr[1]) << 12 |
9465          b64lookup(ptr[2]) << 6  |
9466          b64lookup(ptr[3]);
9467
9468    /* Test the upper bit; returns true if any of the characters returned -1. */
9469    if (val & 0x80000000) {
9470      goto otherchar;
9471    }
9472
9473    output[0] = val >> 16;
9474    output[1] = (val >> 8) & 0xff;
9475    output[2] = val & 0xff;
9476    upb_sink_putstring(p->top->sink, sel, output, 3, NULL);
9477  }
9478  return true;
9479
9480otherchar:
9481  if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) ||
9482      nonbase64(ptr[3]) ) {
9483    upb_status_seterrf(p->status,
9484                       "Non-base64 characters in bytes field: %s",
9485                       upb_fielddef_name(p->top->f));
9486    return false;
9487  } if (ptr[2] == '=') {
9488    uint32_t val;
9489    char output;
9490
9491    /* Last group contains only two input bytes, one output byte. */
9492    if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') {
9493      goto badpadding;
9494    }
9495
9496    val = b64lookup(ptr[0]) << 18 |
9497          b64lookup(ptr[1]) << 12;
9498
9499    UPB_ASSERT(!(val & 0x80000000));
9500    output = val >> 16;
9501    upb_sink_putstring(p->top->sink, sel, &output, 1, NULL);
9502    return true;
9503  } else {
9504    uint32_t val;
9505    char output[2];
9506
9507    /* Last group contains only three input bytes, two output bytes. */
9508    if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') {
9509      goto badpadding;
9510    }
9511
9512    val = b64lookup(ptr[0]) << 18 |
9513          b64lookup(ptr[1]) << 12 |
9514          b64lookup(ptr[2]) << 6;
9515
9516    output[0] = val >> 16;
9517    output[1] = (val >> 8) & 0xff;
9518    upb_sink_putstring(p->top->sink, sel, output, 2, NULL);
9519    return true;
9520  }
9521
9522badpadding:
9523  upb_status_seterrf(p->status,
9524                     "Incorrect base64 padding for field: %s (%.*s)",
9525                     upb_fielddef_name(p->top->f),
9526                     4, ptr);
9527  return false;
9528}
9529
9530
9531/* Accumulate buffer **********************************************************/
9532
9533/* Functionality for accumulating a buffer.
9534 *
9535 * Some parts of the parser need an entire value as a contiguous string.  For
9536 * example, to look up a member name in a hash table, or to turn a string into
9537 * a number, the relevant library routines need the input string to be in
9538 * contiguous memory, even if the value spanned two or more buffers in the
9539 * input.  These routines handle that.
9540 *
9541 * In the common case we can just point to the input buffer to get this
9542 * contiguous string and avoid any actual copy.  So we optimistically begin
9543 * this way.  But there are a few cases where we must instead copy into a
9544 * separate buffer:
9545 *
9546 *   1. The string was not contiguous in the input (it spanned buffers).
9547 *
9548 *   2. The string included escape sequences that need to be interpreted to get
9549 *      the true value in a contiguous buffer. */
9550
9551static void assert_accumulate_empty(upb_json_parser *p) {
9552  UPB_ASSERT(p->accumulated == NULL);
9553  UPB_ASSERT(p->accumulated_len == 0);
9554}
9555
9556static void accumulate_clear(upb_json_parser *p) {
9557  p->accumulated = NULL;
9558  p->accumulated_len = 0;
9559}
9560
9561/* Used internally by accumulate_append(). */
9562static bool accumulate_realloc(upb_json_parser *p, size_t need) {
9563  void *mem;
9564  size_t old_size = p->accumulate_buf_size;
9565  size_t new_size = UPB_MAX(old_size, 128);
9566  while (new_size < need) {
9567    new_size = saturating_multiply(new_size, 2);
9568  }
9569
9570  mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size);
9571  if (!mem) {
9572    upb_status_seterrmsg(p->status, "Out of memory allocating buffer.");
9573    return false;
9574  }
9575
9576  p->accumulate_buf = mem;
9577  p->accumulate_buf_size = new_size;
9578  return true;
9579}
9580
9581/* Logically appends the given data to the append buffer.
9582 * If "can_alias" is true, we will try to avoid actually copying, but the buffer
9583 * must be valid until the next accumulate_append() call (if any). */
9584static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len,
9585                              bool can_alias) {
9586  size_t need;
9587
9588  if (!p->accumulated && can_alias) {
9589    p->accumulated = buf;
9590    p->accumulated_len = len;
9591    return true;
9592  }
9593
9594  if (!checked_add(p->accumulated_len, len, &need)) {
9595    upb_status_seterrmsg(p->status, "Integer overflow.");
9596    return false;
9597  }
9598
9599  if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) {
9600    return false;
9601  }
9602
9603  if (p->accumulated != p->accumulate_buf) {
9604    memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len);
9605    p->accumulated = p->accumulate_buf;
9606  }
9607
9608  memcpy(p->accumulate_buf + p->accumulated_len, buf, len);
9609  p->accumulated_len += len;
9610  return true;
9611}
9612
9613/* Returns a pointer to the data accumulated since the last accumulate_clear()
9614 * call, and writes the length to *len.  This with point either to the input
9615 * buffer or a temporary accumulate buffer. */
9616static const char *accumulate_getptr(upb_json_parser *p, size_t *len) {
9617  UPB_ASSERT(p->accumulated);
9618  *len = p->accumulated_len;
9619  return p->accumulated;
9620}
9621
9622
9623/* Mult-part text data ********************************************************/
9624
9625/* When we have text data in the input, it can often come in multiple segments.
9626 * For example, there may be some raw string data followed by an escape
9627 * sequence.  The two segments are processed with different logic.  Also buffer
9628 * seams in the input can cause multiple segments.
9629 *
9630 * As we see segments, there are two main cases for how we want to process them:
9631 *
9632 *  1. we want to push the captured input directly to string handlers.
9633 *
9634 *  2. we need to accumulate all the parts into a contiguous buffer for further
9635 *     processing (field name lookup, string->number conversion, etc). */
9636
9637/* This is the set of states for p->multipart_state. */
9638enum {
9639  /* We are not currently processing multipart data. */
9640  MULTIPART_INACTIVE = 0,
9641
9642  /* We are processing multipart data by accumulating it into a contiguous
9643   * buffer. */
9644  MULTIPART_ACCUMULATE = 1,
9645
9646  /* We are processing multipart data by pushing each part directly to the
9647   * current string handlers. */
9648  MULTIPART_PUSHEAGERLY = 2
9649};
9650
9651/* Start a multi-part text value where we accumulate the data for processing at
9652 * the end. */
9653static void multipart_startaccum(upb_json_parser *p) {
9654  assert_accumulate_empty(p);
9655  UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
9656  p->multipart_state = MULTIPART_ACCUMULATE;
9657}
9658
9659/* Start a multi-part text value where we immediately push text data to a string
9660 * value with the given selector. */
9661static void multipart_start(upb_json_parser *p, upb_selector_t sel) {
9662  assert_accumulate_empty(p);
9663  UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE);
9664  p->multipart_state = MULTIPART_PUSHEAGERLY;
9665  p->string_selector = sel;
9666}
9667
9668static bool multipart_text(upb_json_parser *p, const char *buf, size_t len,
9669                           bool can_alias) {
9670  switch (p->multipart_state) {
9671    case MULTIPART_INACTIVE:
9672      upb_status_seterrmsg(
9673          p->status, "Internal error: unexpected state MULTIPART_INACTIVE");
9674      return false;
9675
9676    case MULTIPART_ACCUMULATE:
9677      if (!accumulate_append(p, buf, len, can_alias)) {
9678        return false;
9679      }
9680      break;
9681
9682    case MULTIPART_PUSHEAGERLY: {
9683      const upb_bufhandle *handle = can_alias ? p->handle : NULL;
9684      upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle);
9685      break;
9686    }
9687  }
9688
9689  return true;
9690}
9691
9692/* Note: this invalidates the accumulate buffer!  Call only after reading its
9693 * contents. */
9694static void multipart_end(upb_json_parser *p) {
9695  /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */
9696  p->multipart_state = MULTIPART_INACTIVE;
9697  accumulate_clear(p);
9698}
9699
9700
9701/* Input capture **************************************************************/
9702
9703/* Functionality for capturing a region of the input as text.  Gracefully
9704 * handles the case where a buffer seam occurs in the middle of the captured
9705 * region. */
9706
9707static void capture_begin(upb_json_parser *p, const char *ptr) {
9708  UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE);
9709  UPB_ASSERT(p->capture == NULL);
9710  p->capture = ptr;
9711}
9712
9713static bool capture_end(upb_json_parser *p, const char *ptr) {
9714  UPB_ASSERT(p->capture);
9715  if (multipart_text(p, p->capture, ptr - p->capture, true)) {
9716    p->capture = NULL;
9717    return true;
9718  } else {
9719    return false;
9720  }
9721}
9722
9723/* This is called at the end of each input buffer (ie. when we have hit a
9724 * buffer seam).  If we are in the middle of capturing the input, this
9725 * processes the unprocessed capture region. */
9726static void capture_suspend(upb_json_parser *p, const char **ptr) {
9727  if (!p->capture) return;
9728
9729  if (multipart_text(p, p->capture, *ptr - p->capture, false)) {
9730    /* We use this as a signal that we were in the middle of capturing, and
9731     * that capturing should resume at the beginning of the next buffer.
9732     *
9733     * We can't use *ptr here, because we have no guarantee that this pointer
9734     * will be valid when we resume (if the underlying memory is freed, then
9735     * using the pointer at all, even to compare to NULL, is likely undefined
9736     * behavior). */
9737    p->capture = &suspend_capture;
9738  } else {
9739    /* Need to back up the pointer to the beginning of the capture, since
9740     * we were not able to actually preserve it. */
9741    *ptr = p->capture;
9742  }
9743}
9744
9745static void capture_resume(upb_json_parser *p, const char *ptr) {
9746  if (p->capture) {
9747    UPB_ASSERT(p->capture == &suspend_capture);
9748    p->capture = ptr;
9749  }
9750}
9751
9752
9753/* Callbacks from the parser **************************************************/
9754
9755/* These are the functions called directly from the parser itself.
9756 * We define these in the same order as their declarations in the parser. */
9757
9758static char escape_char(char in) {
9759  switch (in) {
9760    case 'r': return '\r';
9761    case 't': return '\t';
9762    case 'n': return '\n';
9763    case 'f': return '\f';
9764    case 'b': return '\b';
9765    case '/': return '/';
9766    case '"': return '"';
9767    case '\\': return '\\';
9768    default:
9769      UPB_ASSERT(0);
9770      return 'x';
9771  }
9772}
9773
9774static bool escape(upb_json_parser *p, const char *ptr) {
9775  char ch = escape_char(*ptr);
9776  return multipart_text(p, &ch, 1, false);
9777}
9778
9779static void start_hex(upb_json_parser *p) {
9780  p->digit = 0;
9781}
9782
9783static void hexdigit(upb_json_parser *p, const char *ptr) {
9784  char ch = *ptr;
9785
9786  p->digit <<= 4;
9787
9788  if (ch >= '0' && ch <= '9') {
9789    p->digit += (ch - '0');
9790  } else if (ch >= 'a' && ch <= 'f') {
9791    p->digit += ((ch - 'a') + 10);
9792  } else {
9793    UPB_ASSERT(ch >= 'A' && ch <= 'F');
9794    p->digit += ((ch - 'A') + 10);
9795  }
9796}
9797
9798static bool end_hex(upb_json_parser *p) {
9799  uint32_t codepoint = p->digit;
9800
9801  /* emit the codepoint as UTF-8. */
9802  char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */
9803  int length = 0;
9804  if (codepoint <= 0x7F) {
9805    utf8[0] = codepoint;
9806    length = 1;
9807  } else if (codepoint <= 0x07FF) {
9808    utf8[1] = (codepoint & 0x3F) | 0x80;
9809    codepoint >>= 6;
9810    utf8[0] = (codepoint & 0x1F) | 0xC0;
9811    length = 2;
9812  } else /* codepoint <= 0xFFFF */ {
9813    utf8[2] = (codepoint & 0x3F) | 0x80;
9814    codepoint >>= 6;
9815    utf8[1] = (codepoint & 0x3F) | 0x80;
9816    codepoint >>= 6;
9817    utf8[0] = (codepoint & 0x0F) | 0xE0;
9818    length = 3;
9819  }
9820  /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate
9821   * we have to wait for the next escape to get the full code point). */
9822
9823  return multipart_text(p, utf8, length, false);
9824}
9825
9826static void start_text(upb_json_parser *p, const char *ptr) {
9827  capture_begin(p, ptr);
9828}
9829
9830static bool end_text(upb_json_parser *p, const char *ptr) {
9831  return capture_end(p, ptr);
9832}
9833
9834static bool start_number(upb_json_parser *p, const char *ptr) {
9835  if (is_top_level(p)) {
9836    if (is_number_wrapper_object(p)) {
9837      start_wrapper_object(p);
9838    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9839      start_value_object(p, VALUE_NUMBERVALUE);
9840    } else {
9841      return false;
9842    }
9843  } else if (does_number_wrapper_start(p)) {
9844    if (!start_subobject(p)) {
9845      return false;
9846    }
9847    start_wrapper_object(p);
9848  } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
9849    if (!start_subobject(p)) {
9850      return false;
9851    }
9852    start_value_object(p, VALUE_NUMBERVALUE);
9853  }
9854
9855  multipart_startaccum(p);
9856  capture_begin(p, ptr);
9857  return true;
9858}
9859
9860static bool parse_number(upb_json_parser *p, bool is_quoted);
9861
9862static bool end_number_nontop(upb_json_parser *p, const char *ptr) {
9863  if (!capture_end(p, ptr)) {
9864    return false;
9865  }
9866
9867  if (p->top->f == NULL) {
9868    multipart_end(p);
9869    return true;
9870  }
9871
9872  return parse_number(p, false);
9873}
9874
9875static bool end_number(upb_json_parser *p, const char *ptr) {
9876  if (!end_number_nontop(p, ptr)) {
9877    return false;
9878  }
9879
9880  if (does_number_wrapper_end(p)) {
9881    end_wrapper_object(p);
9882    if (!is_top_level(p)) {
9883      end_subobject(p);
9884    }
9885    return true;
9886  }
9887
9888  if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
9889    end_value_object(p);
9890    if (!is_top_level(p)) {
9891      end_subobject(p);
9892    }
9893    return true;
9894  }
9895
9896  return true;
9897}
9898
9899/* |buf| is NULL-terminated. |buf| itself will never include quotes;
9900 * |is_quoted| tells us whether this text originally appeared inside quotes. */
9901static bool parse_number_from_buffer(upb_json_parser *p, const char *buf,
9902                                     bool is_quoted) {
9903  size_t len = strlen(buf);
9904  const char *bufend = buf + len;
9905  char *end;
9906  upb_fieldtype_t type = upb_fielddef_type(p->top->f);
9907  double val;
9908  double dummy;
9909  double inf = UPB_INFINITY;
9910
9911  errno = 0;
9912
9913  if (len == 0 || buf[0] == ' ') {
9914    return false;
9915  }
9916
9917  /* For integer types, first try parsing with integer-specific routines.
9918   * If these succeed, they will be more accurate for int64/uint64 than
9919   * strtod().
9920   */
9921  switch (type) {
9922    case UPB_TYPE_ENUM:
9923    case UPB_TYPE_INT32: {
9924      long val = strtol(buf, &end, 0);
9925      if (errno == ERANGE || end != bufend) {
9926        break;
9927      } else if (val > INT32_MAX || val < INT32_MIN) {
9928        return false;
9929      } else {
9930        upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val);
9931        return true;
9932      }
9933    }
9934    case UPB_TYPE_UINT32: {
9935      unsigned long val = strtoul(buf, &end, 0);
9936      if (end != bufend) {
9937        break;
9938      } else if (val > UINT32_MAX || errno == ERANGE) {
9939        return false;
9940      } else {
9941        upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val);
9942        return true;
9943      }
9944    }
9945    /* XXX: We can't handle [u]int64 properly on 32-bit machines because
9946     * strto[u]ll isn't in C89. */
9947    case UPB_TYPE_INT64: {
9948      long val = strtol(buf, &end, 0);
9949      if (errno == ERANGE || end != bufend) {
9950        break;
9951      } else {
9952        upb_sink_putint64(p->top->sink, parser_getsel(p), val);
9953        return true;
9954      }
9955    }
9956    case UPB_TYPE_UINT64: {
9957      unsigned long val = strtoul(p->accumulated, &end, 0);
9958      if (end != bufend) {
9959        break;
9960      } else if (errno == ERANGE) {
9961        return false;
9962      } else {
9963        upb_sink_putuint64(p->top->sink, parser_getsel(p), val);
9964        return true;
9965      }
9966    }
9967    default:
9968      break;
9969  }
9970
9971  if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) {
9972    /* Quoted numbers for integer types are not allowed to be in double form. */
9973    return false;
9974  }
9975
9976  if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) {
9977    /* C89 does not have an INFINITY macro. */
9978    val = inf;
9979  } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) {
9980    val = -inf;
9981  } else {
9982    val = strtod(buf, &end);
9983    if (errno == ERANGE || end != bufend) {
9984      return false;
9985    }
9986  }
9987
9988  switch (type) {
9989#define CASE(capitaltype, smalltype, ctype, min, max)                     \
9990    case UPB_TYPE_ ## capitaltype: {                                      \
9991      if (modf(val, &dummy) != 0 || val > max || val < min) {             \
9992        return false;                                                     \
9993      } else {                                                            \
9994        upb_sink_put ## smalltype(p->top->sink, parser_getsel(p),        \
9995                                  (ctype)val);                            \
9996        return true;                                                      \
9997      }                                                                   \
9998      break;                                                              \
9999    }
10000    case UPB_TYPE_ENUM:
10001    CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX);
10002    CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX);
10003    CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX);
10004    CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX);
10005#undef CASE
10006
10007    case UPB_TYPE_DOUBLE:
10008      upb_sink_putdouble(p->top->sink, parser_getsel(p), val);
10009      return true;
10010    case UPB_TYPE_FLOAT:
10011      if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) {
10012        return false;
10013      } else {
10014        upb_sink_putfloat(p->top->sink, parser_getsel(p), val);
10015        return true;
10016      }
10017    default:
10018      return false;
10019  }
10020}
10021
10022static bool parse_number(upb_json_parser *p, bool is_quoted) {
10023  size_t len;
10024  const char *buf;
10025
10026  /* strtol() and friends unfortunately do not support specifying the length of
10027   * the input string, so we need to force a copy into a NULL-terminated buffer. */
10028  if (!multipart_text(p, "\0", 1, false)) {
10029    return false;
10030  }
10031
10032  buf = accumulate_getptr(p, &len);
10033
10034  if (parse_number_from_buffer(p, buf, is_quoted)) {
10035    multipart_end(p);
10036    return true;
10037  } else {
10038    upb_status_seterrf(p->status, "error parsing number: %s", buf);
10039    multipart_end(p);
10040    return false;
10041  }
10042}
10043
10044static bool parser_putbool(upb_json_parser *p, bool val) {
10045  bool ok;
10046
10047  if (p->top->f == NULL) {
10048    return true;
10049  }
10050
10051  if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) {
10052    upb_status_seterrf(p->status,
10053                       "Boolean value specified for non-bool field: %s",
10054                       upb_fielddef_name(p->top->f));
10055    return false;
10056  }
10057
10058  ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val);
10059  UPB_ASSERT(ok);
10060
10061  return true;
10062}
10063
10064static bool end_bool(upb_json_parser *p, bool val) {
10065  if (is_top_level(p)) {
10066    if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
10067      start_wrapper_object(p);
10068    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10069      start_value_object(p, VALUE_BOOLVALUE);
10070    } else {
10071      return false;
10072    }
10073  } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) {
10074    if (!start_subobject(p)) {
10075      return false;
10076    }
10077    start_wrapper_object(p);
10078  } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10079    if (!start_subobject(p)) {
10080      return false;
10081    }
10082    start_value_object(p, VALUE_BOOLVALUE);
10083  }
10084
10085  if (p->top->is_unknown_field) {
10086    return true;
10087  }
10088
10089  if (!parser_putbool(p, val)) {
10090    return false;
10091  }
10092
10093  if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) {
10094    end_wrapper_object(p);
10095    if (!is_top_level(p)) {
10096      end_subobject(p);
10097    }
10098    return true;
10099  }
10100
10101  if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10102    end_value_object(p);
10103    if (!is_top_level(p)) {
10104      end_subobject(p);
10105    }
10106    return true;
10107  }
10108
10109  return true;
10110}
10111
10112static bool end_null(upb_json_parser *p) {
10113  const char *zero_ptr = "0";
10114
10115  if (is_top_level(p)) {
10116    if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10117      start_value_object(p, VALUE_NULLVALUE);
10118    } else {
10119      return true;
10120    }
10121  } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10122    if (!start_subobject(p)) {
10123      return false;
10124    }
10125    start_value_object(p, VALUE_NULLVALUE);
10126  } else {
10127    return true;
10128  }
10129
10130  /* Fill null_value field. */
10131  multipart_startaccum(p);
10132  capture_begin(p, zero_ptr);
10133  capture_end(p, zero_ptr + 1);
10134  parse_number(p, false);
10135
10136  end_value_object(p);
10137  if (!is_top_level(p)) {
10138    end_subobject(p);
10139  }
10140
10141  return true;
10142}
10143
10144static bool start_any_stringval(upb_json_parser *p) {
10145  multipart_startaccum(p);
10146  return true;
10147}
10148
10149static bool start_stringval(upb_json_parser *p) {
10150  if (is_top_level(p)) {
10151    if (is_string_wrapper_object(p) ||
10152        is_number_wrapper_object(p)) {
10153      start_wrapper_object(p);
10154    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
10155      start_fieldmask_object(p);
10156      return true;
10157    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10158               is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10159      start_object(p);
10160    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10161      start_value_object(p, VALUE_STRINGVALUE);
10162    } else {
10163      return false;
10164    }
10165  } else if (does_string_wrapper_start(p) ||
10166             does_number_wrapper_start(p)) {
10167    if (!start_subobject(p)) {
10168      return false;
10169    }
10170    start_wrapper_object(p);
10171  } else if (does_fieldmask_start(p)) {
10172    if (!start_subobject(p)) {
10173      return false;
10174    }
10175    start_fieldmask_object(p);
10176    return true;
10177  } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) ||
10178             is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) {
10179    if (!start_subobject(p)) {
10180      return false;
10181    }
10182    start_object(p);
10183  } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
10184    if (!start_subobject(p)) {
10185      return false;
10186    }
10187    start_value_object(p, VALUE_STRINGVALUE);
10188  }
10189
10190  if (p->top->f == NULL) {
10191    multipart_startaccum(p);
10192    return true;
10193  }
10194
10195  if (p->top->is_any) {
10196    return start_any_stringval(p);
10197  }
10198
10199  if (upb_fielddef_isstring(p->top->f)) {
10200    upb_jsonparser_frame *inner;
10201    upb_selector_t sel;
10202
10203    if (!check_stack(p)) return false;
10204
10205    /* Start a new parser frame: parser frames correspond one-to-one with
10206     * handler frames, and string events occur in a sub-frame. */
10207    inner = start_jsonparser_frame(p);
10208    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10209    upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10210    inner->m = p->top->m;
10211    inner->f = p->top->f;
10212    p->top = inner;
10213
10214    if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) {
10215      /* For STRING fields we push data directly to the handlers as it is
10216       * parsed.  We don't do this yet for BYTES fields, because our base64
10217       * decoder is not streaming.
10218       *
10219       * TODO(haberman): make base64 decoding streaming also. */
10220      multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING));
10221      return true;
10222    } else {
10223      multipart_startaccum(p);
10224      return true;
10225    }
10226  } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL &&
10227             upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) {
10228    /* No need to push a frame -- numeric values in quotes remain in the
10229     * current parser frame.  These values must accmulate so we can convert
10230     * them all at once at the end. */
10231    multipart_startaccum(p);
10232    return true;
10233  } else {
10234    upb_status_seterrf(p->status,
10235                       "String specified for bool or submessage field: %s",
10236                       upb_fielddef_name(p->top->f));
10237    return false;
10238  }
10239}
10240
10241static bool end_any_stringval(upb_json_parser *p) {
10242  size_t len;
10243  const char *buf = accumulate_getptr(p, &len);
10244
10245  /* Set type_url */
10246  upb_selector_t sel;
10247  upb_jsonparser_frame *inner;
10248  if (!check_stack(p)) return false;
10249  inner = p->top + 1;
10250
10251  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10252  upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10253  sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10254  upb_sink_putstring(inner->sink, sel, buf, len, NULL);
10255  sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10256  upb_sink_endstr(inner->sink, sel);
10257
10258  multipart_end(p);
10259
10260  /* Resolve type url */
10261  if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) {
10262    const upb_msgdef *payload_type = NULL;
10263    buf += 20;
10264    len -= 20;
10265
10266    payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len);
10267    if (payload_type == NULL) {
10268      upb_status_seterrf(
10269          p->status, "Cannot find packed type: %.*s\n", (int)len, buf);
10270      return false;
10271    }
10272
10273    json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type);
10274
10275    return true;
10276  } else {
10277    upb_status_seterrf(
10278        p->status, "Invalid type url: %.*s\n", (int)len, buf);
10279    return false;
10280  }
10281}
10282
10283static bool end_stringval_nontop(upb_json_parser *p) {
10284  bool ok = true;
10285
10286  if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10287      is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) {
10288    multipart_end(p);
10289    return true;
10290  }
10291
10292  if (p->top->f == NULL) {
10293    multipart_end(p);
10294    return true;
10295  }
10296
10297  if (p->top->is_any) {
10298    return end_any_stringval(p);
10299  }
10300
10301  switch (upb_fielddef_type(p->top->f)) {
10302    case UPB_TYPE_BYTES:
10303      if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10304                       p->accumulated, p->accumulated_len)) {
10305        return false;
10306      }
10307      /* Fall through. */
10308
10309    case UPB_TYPE_STRING: {
10310      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10311      upb_sink_endstr(p->top->sink, sel);
10312      p->top--;
10313      break;
10314    }
10315
10316    case UPB_TYPE_ENUM: {
10317      /* Resolve enum symbolic name to integer value. */
10318      const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f);
10319
10320      size_t len;
10321      const char *buf = accumulate_getptr(p, &len);
10322
10323      int32_t int_val = 0;
10324      ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val);
10325
10326      if (ok) {
10327        upb_selector_t sel = parser_getsel(p);
10328        upb_sink_putint32(p->top->sink, sel, int_val);
10329      } else {
10330        upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf);
10331      }
10332
10333      break;
10334    }
10335
10336    case UPB_TYPE_INT32:
10337    case UPB_TYPE_INT64:
10338    case UPB_TYPE_UINT32:
10339    case UPB_TYPE_UINT64:
10340    case UPB_TYPE_DOUBLE:
10341    case UPB_TYPE_FLOAT:
10342      ok = parse_number(p, true);
10343      break;
10344
10345    default:
10346      UPB_ASSERT(false);
10347      upb_status_seterrmsg(p->status, "Internal error in JSON decoder");
10348      ok = false;
10349      break;
10350  }
10351
10352  multipart_end(p);
10353
10354  return ok;
10355}
10356
10357static bool end_stringval(upb_json_parser *p) {
10358  /* FieldMask's stringvals have been ended when handling them. Only need to
10359   * close FieldMask here.*/
10360  if (does_fieldmask_end(p)) {
10361    end_fieldmask_object(p);
10362    if (!is_top_level(p)) {
10363      end_subobject(p);
10364    }
10365    return true;
10366  }
10367
10368  if (!end_stringval_nontop(p)) {
10369    return false;
10370  }
10371
10372  if (does_string_wrapper_end(p) ||
10373      does_number_wrapper_end(p)) {
10374    end_wrapper_object(p);
10375    if (!is_top_level(p)) {
10376      end_subobject(p);
10377    }
10378    return true;
10379  }
10380
10381  if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
10382    end_value_object(p);
10383    if (!is_top_level(p)) {
10384      end_subobject(p);
10385    }
10386    return true;
10387  }
10388
10389  if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) ||
10390      is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) ||
10391      is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) {
10392    end_object(p);
10393    if (!is_top_level(p)) {
10394      end_subobject(p);
10395    }
10396    return true;
10397  }
10398
10399  return true;
10400}
10401
10402static void start_duration_base(upb_json_parser *p, const char *ptr) {
10403  capture_begin(p, ptr);
10404}
10405
10406static bool end_duration_base(upb_json_parser *p, const char *ptr) {
10407  size_t len;
10408  const char *buf;
10409  char seconds_buf[14];
10410  char nanos_buf[12];
10411  char *end;
10412  int64_t seconds = 0;
10413  int32_t nanos = 0;
10414  double val = 0.0;
10415  const char *seconds_membername = "seconds";
10416  const char *nanos_membername = "nanos";
10417  size_t fraction_start;
10418
10419  if (!capture_end(p, ptr)) {
10420    return false;
10421  }
10422
10423  buf = accumulate_getptr(p, &len);
10424
10425  memset(seconds_buf, 0, 14);
10426  memset(nanos_buf, 0, 12);
10427
10428  /* Find out base end. The maximus duration is 315576000000, which cannot be
10429   * represented by double without losing precision. Thus, we need to handle
10430   * fraction and base separately. */
10431  for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.';
10432       fraction_start++);
10433
10434  /* Parse base */
10435  memcpy(seconds_buf, buf, fraction_start);
10436  seconds = strtol(seconds_buf, &end, 10);
10437  if (errno == ERANGE || end != seconds_buf + fraction_start) {
10438    upb_status_seterrf(p->status, "error parsing duration: %s",
10439                       seconds_buf);
10440    return false;
10441  }
10442
10443  if (seconds > 315576000000) {
10444    upb_status_seterrf(p->status, "error parsing duration: "
10445                                   "maximum acceptable value is "
10446                                   "315576000000");
10447    return false;
10448  }
10449
10450  if (seconds < -315576000000) {
10451    upb_status_seterrf(p->status, "error parsing duration: "
10452                                   "minimum acceptable value is "
10453                                   "-315576000000");
10454    return false;
10455  }
10456
10457  /* Parse fraction */
10458  nanos_buf[0] = '0';
10459  memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start);
10460  val = strtod(nanos_buf, &end);
10461  if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) {
10462    upb_status_seterrf(p->status, "error parsing duration: %s",
10463                       nanos_buf);
10464    return false;
10465  }
10466
10467  nanos = val * 1000000000;
10468  if (seconds < 0) nanos = -nanos;
10469
10470  /* Clean up buffer */
10471  multipart_end(p);
10472
10473  /* Set seconds */
10474  start_member(p);
10475  capture_begin(p, seconds_membername);
10476  capture_end(p, seconds_membername + 7);
10477  end_membername(p);
10478  upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
10479  end_member(p);
10480
10481  /* Set nanos */
10482  start_member(p);
10483  capture_begin(p, nanos_membername);
10484  capture_end(p, nanos_membername + 5);
10485  end_membername(p);
10486  upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
10487  end_member(p);
10488
10489  /* Continue previous arena */
10490  multipart_startaccum(p);
10491
10492  return true;
10493}
10494
10495static int parse_timestamp_number(upb_json_parser *p) {
10496  size_t len;
10497  const char *buf;
10498  int val;
10499
10500  /* atoi() and friends unfortunately do not support specifying the length of
10501   * the input string, so we need to force a copy into a NULL-terminated buffer. */
10502  multipart_text(p, "\0", 1, false);
10503
10504  buf = accumulate_getptr(p, &len);
10505  val = atoi(buf);
10506  multipart_end(p);
10507  multipart_startaccum(p);
10508
10509  return val;
10510}
10511
10512static void start_year(upb_json_parser *p, const char *ptr) {
10513  capture_begin(p, ptr);
10514}
10515
10516static bool end_year(upb_json_parser *p, const char *ptr) {
10517  if (!capture_end(p, ptr)) {
10518    return false;
10519  }
10520  p->tm.tm_year = parse_timestamp_number(p) - 1900;
10521  return true;
10522}
10523
10524static void start_month(upb_json_parser *p, const char *ptr) {
10525  capture_begin(p, ptr);
10526}
10527
10528static bool end_month(upb_json_parser *p, const char *ptr) {
10529  if (!capture_end(p, ptr)) {
10530    return false;
10531  }
10532  p->tm.tm_mon = parse_timestamp_number(p) - 1;
10533  return true;
10534}
10535
10536static void start_day(upb_json_parser *p, const char *ptr) {
10537  capture_begin(p, ptr);
10538}
10539
10540static bool end_day(upb_json_parser *p, const char *ptr) {
10541  if (!capture_end(p, ptr)) {
10542    return false;
10543  }
10544  p->tm.tm_mday = parse_timestamp_number(p);
10545  return true;
10546}
10547
10548static void start_hour(upb_json_parser *p, const char *ptr) {
10549  capture_begin(p, ptr);
10550}
10551
10552static bool end_hour(upb_json_parser *p, const char *ptr) {
10553  if (!capture_end(p, ptr)) {
10554    return false;
10555  }
10556  p->tm.tm_hour = parse_timestamp_number(p);
10557  return true;
10558}
10559
10560static void start_minute(upb_json_parser *p, const char *ptr) {
10561  capture_begin(p, ptr);
10562}
10563
10564static bool end_minute(upb_json_parser *p, const char *ptr) {
10565  if (!capture_end(p, ptr)) {
10566    return false;
10567  }
10568  p->tm.tm_min = parse_timestamp_number(p);
10569  return true;
10570}
10571
10572static void start_second(upb_json_parser *p, const char *ptr) {
10573  capture_begin(p, ptr);
10574}
10575
10576static bool end_second(upb_json_parser *p, const char *ptr) {
10577  if (!capture_end(p, ptr)) {
10578    return false;
10579  }
10580  p->tm.tm_sec = parse_timestamp_number(p);
10581  return true;
10582}
10583
10584static void start_timestamp_base(upb_json_parser *p) {
10585  memset(&p->tm, 0, sizeof(struct tm));
10586}
10587
10588static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10589  capture_begin(p, ptr);
10590}
10591
10592static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) {
10593  size_t len;
10594  const char *buf;
10595  char nanos_buf[12];
10596  char *end;
10597  double val = 0.0;
10598  int32_t nanos;
10599  const char *nanos_membername = "nanos";
10600
10601  memset(nanos_buf, 0, 12);
10602
10603  if (!capture_end(p, ptr)) {
10604    return false;
10605  }
10606
10607  buf = accumulate_getptr(p, &len);
10608
10609  if (len > 10) {
10610    upb_status_seterrf(p->status,
10611        "error parsing timestamp: at most 9-digit fraction.");
10612    return false;
10613  }
10614
10615  /* Parse nanos */
10616  nanos_buf[0] = '0';
10617  memcpy(nanos_buf + 1, buf, len);
10618  val = strtod(nanos_buf, &end);
10619
10620  if (errno == ERANGE || end != nanos_buf + len + 1) {
10621    upb_status_seterrf(p->status, "error parsing timestamp nanos: %s",
10622                       nanos_buf);
10623    return false;
10624  }
10625
10626  nanos = val * 1000000000;
10627
10628  /* Clean up previous environment */
10629  multipart_end(p);
10630
10631  /* Set nanos */
10632  start_member(p);
10633  capture_begin(p, nanos_membername);
10634  capture_end(p, nanos_membername + 5);
10635  end_membername(p);
10636  upb_sink_putint32(p->top->sink, parser_getsel(p), nanos);
10637  end_member(p);
10638
10639  /* Continue previous environment */
10640  multipart_startaccum(p);
10641
10642  return true;
10643}
10644
10645static void start_timestamp_zone(upb_json_parser *p, const char *ptr) {
10646  capture_begin(p, ptr);
10647}
10648
10649static int div_round_up2(int n, int d) {
10650  return (n + d - 1) / d;
10651}
10652
10653/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */
10654static int epoch_days(int year, int month, int day) {
10655  static const uint16_t month_yday[12] = {0,   31,  59,  90,  120, 151,
10656                                          181, 212, 243, 273, 304, 334};
10657  int febs_since_0 = month > 2 ? year + 1 : year;
10658  int leap_days_since_0 = div_round_up2(febs_since_0, 4) -
10659                          div_round_up2(febs_since_0, 100) +
10660                          div_round_up2(febs_since_0, 400);
10661  int days_since_0 =
10662      365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0;
10663
10664  /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD).
10665   * Since the "BC" system does not have a year zero, 1 BC == year zero. */
10666  return days_since_0 - 719528;
10667}
10668
10669static int64_t upb_timegm(const struct tm *tp) {
10670  int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday);
10671  ret = (ret * 24) + tp->tm_hour;
10672  ret = (ret * 60) + tp->tm_min;
10673  ret = (ret * 60) + tp->tm_sec;
10674  return ret;
10675}
10676
10677static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) {
10678  size_t len;
10679  const char *buf;
10680  int hours;
10681  int64_t seconds;
10682  const char *seconds_membername = "seconds";
10683
10684  if (!capture_end(p, ptr)) {
10685    return false;
10686  }
10687
10688  buf = accumulate_getptr(p, &len);
10689
10690  if (buf[0] != 'Z') {
10691    if (sscanf(buf + 1, "%2d:00", &hours) != 1) {
10692      upb_status_seterrf(p->status, "error parsing timestamp offset");
10693      return false;
10694    }
10695
10696    if (buf[0] == '+') {
10697      hours = -hours;
10698    }
10699
10700    p->tm.tm_hour += hours;
10701  }
10702
10703  /* Normalize tm */
10704  seconds = upb_timegm(&p->tm);
10705
10706  /* Check timestamp boundary */
10707  if (seconds < -62135596800) {
10708    upb_status_seterrf(p->status, "error parsing timestamp: "
10709                                   "minimum acceptable value is "
10710                                   "0001-01-01T00:00:00Z");
10711    return false;
10712  }
10713
10714  /* Clean up previous environment */
10715  multipart_end(p);
10716
10717  /* Set seconds */
10718  start_member(p);
10719  capture_begin(p, seconds_membername);
10720  capture_end(p, seconds_membername + 7);
10721  end_membername(p);
10722  upb_sink_putint64(p->top->sink, parser_getsel(p), seconds);
10723  end_member(p);
10724
10725  /* Continue previous environment */
10726  multipart_startaccum(p);
10727
10728  return true;
10729}
10730
10731static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10732  capture_begin(p, ptr);
10733}
10734
10735static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) {
10736  return capture_end(p, ptr);
10737}
10738
10739static bool start_fieldmask_path(upb_json_parser *p) {
10740  upb_jsonparser_frame *inner;
10741  upb_selector_t sel;
10742
10743  if (!check_stack(p)) return false;
10744
10745  /* Start a new parser frame: parser frames correspond one-to-one with
10746   * handler frames, and string events occur in a sub-frame. */
10747  inner = start_jsonparser_frame(p);
10748  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10749  upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
10750  inner->m = p->top->m;
10751  inner->f = p->top->f;
10752  p->top = inner;
10753
10754  multipart_startaccum(p);
10755  return true;
10756}
10757
10758static bool lower_camel_push(
10759    upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) {
10760  const char *limit = ptr + len;
10761  bool first = true;
10762  for (;ptr < limit; ptr++) {
10763    if (*ptr >= 'A' && *ptr <= 'Z' && !first) {
10764      char lower = tolower(*ptr);
10765      upb_sink_putstring(p->top->sink, sel, "_", 1, NULL);
10766      upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL);
10767    } else {
10768      upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL);
10769    }
10770    first = false;
10771  }
10772  return true;
10773}
10774
10775static bool end_fieldmask_path(upb_json_parser *p) {
10776  upb_selector_t sel;
10777
10778  if (!lower_camel_push(
10779           p, getsel_for_handlertype(p, UPB_HANDLER_STRING),
10780           p->accumulated, p->accumulated_len)) {
10781    return false;
10782  }
10783
10784  sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10785  upb_sink_endstr(p->top->sink, sel);
10786  p->top--;
10787
10788  multipart_end(p);
10789  return true;
10790}
10791
10792static void start_member(upb_json_parser *p) {
10793  UPB_ASSERT(!p->top->f);
10794  multipart_startaccum(p);
10795}
10796
10797/* Helper: invoked during parse_mapentry() to emit the mapentry message's key
10798 * field based on the current contents of the accumulate buffer. */
10799static bool parse_mapentry_key(upb_json_parser *p) {
10800
10801  size_t len;
10802  const char *buf = accumulate_getptr(p, &len);
10803
10804  /* Emit the key field. We do a bit of ad-hoc parsing here because the
10805   * parser state machine has already decided that this is a string field
10806   * name, and we are reinterpreting it as some arbitrary key type. In
10807   * particular, integer and bool keys are quoted, so we need to parse the
10808   * quoted string contents here. */
10809
10810  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY);
10811  if (p->top->f == NULL) {
10812    upb_status_seterrmsg(p->status, "mapentry message has no key");
10813    return false;
10814  }
10815  switch (upb_fielddef_type(p->top->f)) {
10816    case UPB_TYPE_INT32:
10817    case UPB_TYPE_INT64:
10818    case UPB_TYPE_UINT32:
10819    case UPB_TYPE_UINT64:
10820      /* Invoke end_number. The accum buffer has the number's text already. */
10821      if (!parse_number(p, true)) {
10822        return false;
10823      }
10824      break;
10825    case UPB_TYPE_BOOL:
10826      if (len == 4 && !strncmp(buf, "true", 4)) {
10827        if (!parser_putbool(p, true)) {
10828          return false;
10829        }
10830      } else if (len == 5 && !strncmp(buf, "false", 5)) {
10831        if (!parser_putbool(p, false)) {
10832          return false;
10833        }
10834      } else {
10835        upb_status_seterrmsg(p->status,
10836                             "Map bool key not 'true' or 'false'");
10837        return false;
10838      }
10839      multipart_end(p);
10840      break;
10841    case UPB_TYPE_STRING:
10842    case UPB_TYPE_BYTES: {
10843      upb_sink subsink;
10844      upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
10845      upb_sink_startstr(p->top->sink, sel, len, &subsink);
10846      sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
10847      upb_sink_putstring(subsink, sel, buf, len, NULL);
10848      sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
10849      upb_sink_endstr(subsink, sel);
10850      multipart_end(p);
10851      break;
10852    }
10853    default:
10854      upb_status_seterrmsg(p->status, "Invalid field type for map key");
10855      return false;
10856  }
10857
10858  return true;
10859}
10860
10861/* Helper: emit one map entry (as a submessage in the map field sequence). This
10862 * is invoked from end_membername(), at the end of the map entry's key string,
10863 * with the map key in the accumulate buffer. It parses the key from that
10864 * buffer, emits the handler calls to start the mapentry submessage (setting up
10865 * its subframe in the process), and sets up state in the subframe so that the
10866 * value parser (invoked next) will emit the mapentry's value field and then
10867 * end the mapentry message. */
10868
10869static bool handle_mapentry(upb_json_parser *p) {
10870  const upb_fielddef *mapfield;
10871  const upb_msgdef *mapentrymsg;
10872  upb_jsonparser_frame *inner;
10873  upb_selector_t sel;
10874
10875  /* Map entry: p->top->sink is the seq frame, so we need to start a frame
10876   * for the mapentry itself, and then set |f| in that frame so that the map
10877   * value field is parsed, and also set a flag to end the frame after the
10878   * map-entry value is parsed. */
10879  if (!check_stack(p)) return false;
10880
10881  mapfield = p->top->mapfield;
10882  mapentrymsg = upb_fielddef_msgsubdef(mapfield);
10883
10884  inner = start_jsonparser_frame(p);
10885  p->top->f = mapfield;
10886  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
10887  upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
10888  inner->m = mapentrymsg;
10889  inner->mapfield = mapfield;
10890
10891  /* Don't set this to true *yet* -- we reuse parsing handlers below to push
10892   * the key field value to the sink, and these handlers will pop the frame
10893   * if they see is_mapentry (when invoked by the parser state machine, they
10894   * would have just seen the map-entry value, not key). */
10895  inner->is_mapentry = false;
10896  p->top = inner;
10897
10898  /* send STARTMSG in submsg frame. */
10899  upb_sink_startmsg(p->top->sink);
10900
10901  parse_mapentry_key(p);
10902
10903  /* Set up the value field to receive the map-entry value. */
10904  p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE);
10905  p->top->is_mapentry = true;  /* set up to pop frame after value is parsed. */
10906  p->top->mapfield = mapfield;
10907  if (p->top->f == NULL) {
10908    upb_status_seterrmsg(p->status, "mapentry message has no value");
10909    return false;
10910  }
10911
10912  return true;
10913}
10914
10915static bool end_membername(upb_json_parser *p) {
10916  UPB_ASSERT(!p->top->f);
10917
10918  if (!p->top->m) {
10919    p->top->is_unknown_field = true;
10920    multipart_end(p);
10921    return true;
10922  }
10923
10924  if (p->top->is_any) {
10925    return end_any_membername(p);
10926  } else if (p->top->is_map) {
10927    return handle_mapentry(p);
10928  } else {
10929    size_t len;
10930    const char *buf = accumulate_getptr(p, &len);
10931    upb_value v;
10932
10933    if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) {
10934      p->top->f = upb_value_getconstptr(v);
10935      multipart_end(p);
10936
10937      return true;
10938    } else if (p->ignore_json_unknown) {
10939      p->top->is_unknown_field = true;
10940      multipart_end(p);
10941      return true;
10942    } else {
10943      upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf);
10944      return false;
10945    }
10946  }
10947}
10948
10949static bool end_any_membername(upb_json_parser *p) {
10950  size_t len;
10951  const char *buf = accumulate_getptr(p, &len);
10952  upb_value v;
10953
10954  if (len == 5 && strncmp(buf, "@type", len) == 0) {
10955    upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v);
10956    p->top->f = upb_value_getconstptr(v);
10957    multipart_end(p);
10958    return true;
10959  } else {
10960    p->top->is_unknown_field = true;
10961    multipart_end(p);
10962    return true;
10963  }
10964}
10965
10966static void end_member(upb_json_parser *p) {
10967  /* If we just parsed a map-entry value, end that frame too. */
10968  if (p->top->is_mapentry) {
10969    upb_selector_t sel;
10970    bool ok;
10971    const upb_fielddef *mapfield;
10972
10973    UPB_ASSERT(p->top > p->stack);
10974    /* send ENDMSG on submsg. */
10975    upb_sink_endmsg(p->top->sink, p->status);
10976    mapfield = p->top->mapfield;
10977
10978    /* send ENDSUBMSG in repeated-field-of-mapentries frame. */
10979    p->top--;
10980    ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel);
10981    UPB_ASSUME(ok);
10982    upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
10983  }
10984
10985  p->top->f = NULL;
10986  p->top->is_unknown_field = false;
10987}
10988
10989static void start_any_member(upb_json_parser *p, const char *ptr) {
10990  start_member(p);
10991  json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr);
10992}
10993
10994static void end_any_member(upb_json_parser *p, const char *ptr) {
10995  json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr);
10996  end_member(p);
10997}
10998
10999static bool start_subobject(upb_json_parser *p) {
11000  if (p->top->is_unknown_field) {
11001    if (!check_stack(p)) return false;
11002
11003    p->top = start_jsonparser_frame(p);
11004    return true;
11005  }
11006
11007  if (upb_fielddef_ismap(p->top->f)) {
11008    upb_jsonparser_frame *inner;
11009    upb_selector_t sel;
11010
11011    /* Beginning of a map. Start a new parser frame in a repeated-field
11012     * context. */
11013    if (!check_stack(p)) return false;
11014
11015    inner = start_jsonparser_frame(p);
11016    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11017    upb_sink_startseq(p->top->sink, sel, &inner->sink);
11018    inner->m = upb_fielddef_msgsubdef(p->top->f);
11019    inner->mapfield = p->top->f;
11020    inner->is_map = true;
11021    p->top = inner;
11022
11023    return true;
11024  } else if (upb_fielddef_issubmsg(p->top->f)) {
11025    upb_jsonparser_frame *inner;
11026    upb_selector_t sel;
11027
11028    /* Beginning of a subobject. Start a new parser frame in the submsg
11029     * context. */
11030    if (!check_stack(p)) return false;
11031
11032    inner = start_jsonparser_frame(p);
11033    sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG);
11034    upb_sink_startsubmsg(p->top->sink, sel, &inner->sink);
11035    inner->m = upb_fielddef_msgsubdef(p->top->f);
11036    set_name_table(p, inner);
11037    p->top = inner;
11038
11039    if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
11040      p->top->is_any = true;
11041      p->top->any_frame = json_parser_any_frame_new(p);
11042    } else {
11043      p->top->is_any = false;
11044      p->top->any_frame = NULL;
11045    }
11046
11047    return true;
11048  } else {
11049    upb_status_seterrf(p->status,
11050                       "Object specified for non-message/group field: %s",
11051                       upb_fielddef_name(p->top->f));
11052    return false;
11053  }
11054}
11055
11056static bool start_subobject_full(upb_json_parser *p) {
11057  if (is_top_level(p)) {
11058    if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11059      start_value_object(p, VALUE_STRUCTVALUE);
11060      if (!start_subobject(p)) return false;
11061      start_structvalue_object(p);
11062    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
11063      start_structvalue_object(p);
11064    } else {
11065      return true;
11066    }
11067  } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) {
11068    if (!start_subobject(p)) return false;
11069    start_structvalue_object(p);
11070  } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) {
11071    if (!start_subobject(p)) return false;
11072    start_value_object(p, VALUE_STRUCTVALUE);
11073    if (!start_subobject(p)) return false;
11074    start_structvalue_object(p);
11075  }
11076
11077  return start_subobject(p);
11078}
11079
11080static void end_subobject(upb_json_parser *p) {
11081  if (is_top_level(p)) {
11082    return;
11083  }
11084
11085  if (p->top->is_map) {
11086    upb_selector_t sel;
11087    p->top--;
11088    sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11089    upb_sink_endseq(p->top->sink, sel);
11090  } else {
11091    upb_selector_t sel;
11092    bool is_unknown = p->top->m == NULL;
11093    p->top--;
11094    if (!is_unknown) {
11095      sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG);
11096      upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel);
11097    }
11098  }
11099}
11100
11101static void end_subobject_full(upb_json_parser *p) {
11102  end_subobject(p);
11103
11104  if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) {
11105    end_structvalue_object(p);
11106    if (!is_top_level(p)) {
11107      end_subobject(p);
11108    }
11109  }
11110
11111  if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11112    end_value_object(p);
11113    if (!is_top_level(p)) {
11114      end_subobject(p);
11115    }
11116  }
11117}
11118
11119static bool start_array(upb_json_parser *p) {
11120  upb_jsonparser_frame *inner;
11121  upb_selector_t sel;
11122
11123  if (is_top_level(p)) {
11124    if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11125      start_value_object(p, VALUE_LISTVALUE);
11126      if (!start_subobject(p)) return false;
11127      start_listvalue_object(p);
11128    } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11129      start_listvalue_object(p);
11130    } else {
11131      return false;
11132    }
11133  } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) &&
11134             (!upb_fielddef_isseq(p->top->f) ||
11135              p->top->is_repeated)) {
11136    if (!start_subobject(p)) return false;
11137    start_listvalue_object(p);
11138  } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) &&
11139             (!upb_fielddef_isseq(p->top->f) ||
11140              p->top->is_repeated)) {
11141    if (!start_subobject(p)) return false;
11142    start_value_object(p, VALUE_LISTVALUE);
11143    if (!start_subobject(p)) return false;
11144    start_listvalue_object(p);
11145  }
11146
11147  if (p->top->is_unknown_field) {
11148    inner = start_jsonparser_frame(p);
11149    inner->is_unknown_field = true;
11150    p->top = inner;
11151
11152    return true;
11153  }
11154
11155  if (!upb_fielddef_isseq(p->top->f)) {
11156    upb_status_seterrf(p->status,
11157                       "Array specified for non-repeated field: %s",
11158                       upb_fielddef_name(p->top->f));
11159    return false;
11160  }
11161
11162  if (!check_stack(p)) return false;
11163
11164  inner = start_jsonparser_frame(p);
11165  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ);
11166  upb_sink_startseq(p->top->sink, sel, &inner->sink);
11167  inner->m = p->top->m;
11168  inner->f = p->top->f;
11169  inner->is_repeated = true;
11170  p->top = inner;
11171
11172  return true;
11173}
11174
11175static void end_array(upb_json_parser *p) {
11176  upb_selector_t sel;
11177
11178  UPB_ASSERT(p->top > p->stack);
11179
11180  p->top--;
11181
11182  if (p->top->is_unknown_field) {
11183    return;
11184  }
11185
11186  sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ);
11187  upb_sink_endseq(p->top->sink, sel);
11188
11189  if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) {
11190    end_listvalue_object(p);
11191    if (!is_top_level(p)) {
11192      end_subobject(p);
11193    }
11194  }
11195
11196  if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) {
11197    end_value_object(p);
11198    if (!is_top_level(p)) {
11199      end_subobject(p);
11200    }
11201  }
11202}
11203
11204static void start_object(upb_json_parser *p) {
11205  if (!p->top->is_map && p->top->m != NULL) {
11206    upb_sink_startmsg(p->top->sink);
11207  }
11208}
11209
11210static void end_object(upb_json_parser *p) {
11211  if (!p->top->is_map && p->top->m != NULL) {
11212    upb_sink_endmsg(p->top->sink, p->status);
11213  }
11214}
11215
11216static void start_any_object(upb_json_parser *p, const char *ptr) {
11217  start_object(p);
11218  p->top->any_frame->before_type_url_start = ptr;
11219  p->top->any_frame->before_type_url_end = ptr;
11220}
11221
11222static bool end_any_object(upb_json_parser *p, const char *ptr) {
11223  const char *value_membername = "value";
11224  bool is_well_known_packed = false;
11225  const char *packed_end = ptr + 1;
11226  upb_selector_t sel;
11227  upb_jsonparser_frame *inner;
11228
11229  if (json_parser_any_frame_has_value(p->top->any_frame) &&
11230      !json_parser_any_frame_has_type_url(p->top->any_frame)) {
11231    upb_status_seterrmsg(p->status, "No valid type url");
11232    return false;
11233  }
11234
11235  /* Well known types data is represented as value field. */
11236  if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) !=
11237          UPB_WELLKNOWN_UNSPECIFIED) {
11238    is_well_known_packed = true;
11239
11240    if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11241      p->top->any_frame->before_type_url_start =
11242          memchr(p->top->any_frame->before_type_url_start, ':',
11243                 p->top->any_frame->before_type_url_end -
11244                 p->top->any_frame->before_type_url_start);
11245      if (p->top->any_frame->before_type_url_start == NULL) {
11246        upb_status_seterrmsg(p->status, "invalid data for well known type.");
11247        return false;
11248      }
11249      p->top->any_frame->before_type_url_start++;
11250    }
11251
11252    if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11253      p->top->any_frame->after_type_url_start =
11254          memchr(p->top->any_frame->after_type_url_start, ':',
11255                 (ptr + 1) -
11256                 p->top->any_frame->after_type_url_start);
11257      if (p->top->any_frame->after_type_url_start == NULL) {
11258        upb_status_seterrmsg(p->status, "Invalid data for well known type.");
11259        return false;
11260      }
11261      p->top->any_frame->after_type_url_start++;
11262      packed_end = ptr;
11263    }
11264  }
11265
11266  if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) {
11267    if (!parse(p->top->any_frame->parser, NULL,
11268               p->top->any_frame->before_type_url_start,
11269               p->top->any_frame->before_type_url_end -
11270               p->top->any_frame->before_type_url_start, NULL)) {
11271      return false;
11272    }
11273  } else {
11274    if (!is_well_known_packed) {
11275      if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) {
11276        return false;
11277      }
11278    }
11279  }
11280
11281  if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) &&
11282      json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11283    if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) {
11284      return false;
11285    }
11286  }
11287
11288  if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) {
11289    if (!parse(p->top->any_frame->parser, NULL,
11290               p->top->any_frame->after_type_url_start,
11291               packed_end - p->top->any_frame->after_type_url_start, NULL)) {
11292      return false;
11293    }
11294  } else {
11295    if (!is_well_known_packed) {
11296      if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) {
11297        return false;
11298      }
11299    }
11300  }
11301
11302  if (!end(p->top->any_frame->parser, NULL)) {
11303    return false;
11304  }
11305
11306  p->top->is_any = false;
11307
11308  /* Set value */
11309  start_member(p);
11310  capture_begin(p, value_membername);
11311  capture_end(p, value_membername + 5);
11312  end_membername(p);
11313
11314  if (!check_stack(p)) return false;
11315  inner = p->top + 1;
11316
11317  sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR);
11318  upb_sink_startstr(p->top->sink, sel, 0, &inner->sink);
11319  sel = getsel_for_handlertype(p, UPB_HANDLER_STRING);
11320  upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr,
11321                     p->top->any_frame->stringsink.len, NULL);
11322  sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR);
11323  upb_sink_endstr(inner->sink, sel);
11324
11325  end_member(p);
11326
11327  end_object(p);
11328
11329  /* Deallocate any parse frame. */
11330  json_parser_any_frame_free(p->top->any_frame);
11331
11332  return true;
11333}
11334
11335static bool is_string_wrapper(const upb_msgdef *m) {
11336  upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11337  return type == UPB_WELLKNOWN_STRINGVALUE ||
11338         type == UPB_WELLKNOWN_BYTESVALUE;
11339}
11340
11341static bool is_fieldmask(const upb_msgdef *m) {
11342  upb_wellknowntype_t type = upb_msgdef_wellknowntype(m);
11343  return type == UPB_WELLKNOWN_FIELDMASK;
11344}
11345
11346static void start_fieldmask_object(upb_json_parser *p) {
11347  const char *membername = "paths";
11348
11349  start_object(p);
11350
11351  /* Set up context for parsing value */
11352  start_member(p);
11353  capture_begin(p, membername);
11354  capture_end(p, membername + 5);
11355  end_membername(p);
11356
11357  start_array(p);
11358}
11359
11360static void end_fieldmask_object(upb_json_parser *p) {
11361  end_array(p);
11362  end_member(p);
11363  end_object(p);
11364}
11365
11366static void start_wrapper_object(upb_json_parser *p) {
11367  const char *membername = "value";
11368
11369  start_object(p);
11370
11371  /* Set up context for parsing value */
11372  start_member(p);
11373  capture_begin(p, membername);
11374  capture_end(p, membername + 5);
11375  end_membername(p);
11376}
11377
11378static void end_wrapper_object(upb_json_parser *p) {
11379  end_member(p);
11380  end_object(p);
11381}
11382
11383static void start_value_object(upb_json_parser *p, int value_type) {
11384  const char *nullmember = "null_value";
11385  const char *numbermember = "number_value";
11386  const char *stringmember = "string_value";
11387  const char *boolmember = "bool_value";
11388  const char *structmember = "struct_value";
11389  const char *listmember = "list_value";
11390  const char *membername = "";
11391
11392  switch (value_type) {
11393    case VALUE_NULLVALUE:
11394      membername = nullmember;
11395      break;
11396    case VALUE_NUMBERVALUE:
11397      membername = numbermember;
11398      break;
11399    case VALUE_STRINGVALUE:
11400      membername = stringmember;
11401      break;
11402    case VALUE_BOOLVALUE:
11403      membername = boolmember;
11404      break;
11405    case VALUE_STRUCTVALUE:
11406      membername = structmember;
11407      break;
11408    case VALUE_LISTVALUE:
11409      membername = listmember;
11410      break;
11411  }
11412
11413  start_object(p);
11414
11415  /* Set up context for parsing value */
11416  start_member(p);
11417  capture_begin(p, membername);
11418  capture_end(p, membername + strlen(membername));
11419  end_membername(p);
11420}
11421
11422static void end_value_object(upb_json_parser *p) {
11423  end_member(p);
11424  end_object(p);
11425}
11426
11427static void start_listvalue_object(upb_json_parser *p) {
11428  const char *membername = "values";
11429
11430  start_object(p);
11431
11432  /* Set up context for parsing value */
11433  start_member(p);
11434  capture_begin(p, membername);
11435  capture_end(p, membername + strlen(membername));
11436  end_membername(p);
11437}
11438
11439static void end_listvalue_object(upb_json_parser *p) {
11440  end_member(p);
11441  end_object(p);
11442}
11443
11444static void start_structvalue_object(upb_json_parser *p) {
11445  const char *membername = "fields";
11446
11447  start_object(p);
11448
11449  /* Set up context for parsing value */
11450  start_member(p);
11451  capture_begin(p, membername);
11452  capture_end(p, membername + strlen(membername));
11453  end_membername(p);
11454}
11455
11456static void end_structvalue_object(upb_json_parser *p) {
11457  end_member(p);
11458  end_object(p);
11459}
11460
11461static bool is_top_level(upb_json_parser *p) {
11462  return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field;
11463}
11464
11465static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) {
11466  return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type;
11467}
11468
11469static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) {
11470  return p->top->f != NULL &&
11471         upb_fielddef_issubmsg(p->top->f) &&
11472         (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f))
11473              == type);
11474}
11475
11476static bool does_number_wrapper_start(upb_json_parser *p) {
11477  return p->top->f != NULL &&
11478         upb_fielddef_issubmsg(p->top->f) &&
11479         upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f));
11480}
11481
11482static bool does_number_wrapper_end(upb_json_parser *p) {
11483  return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
11484}
11485
11486static bool is_number_wrapper_object(upb_json_parser *p) {
11487  return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m);
11488}
11489
11490static bool does_string_wrapper_start(upb_json_parser *p) {
11491  return p->top->f != NULL &&
11492         upb_fielddef_issubmsg(p->top->f) &&
11493         is_string_wrapper(upb_fielddef_msgsubdef(p->top->f));
11494}
11495
11496static bool does_string_wrapper_end(upb_json_parser *p) {
11497  return p->top->m != NULL && is_string_wrapper(p->top->m);
11498}
11499
11500static bool is_string_wrapper_object(upb_json_parser *p) {
11501  return p->top->m != NULL && is_string_wrapper(p->top->m);
11502}
11503
11504static bool does_fieldmask_start(upb_json_parser *p) {
11505  return p->top->f != NULL &&
11506         upb_fielddef_issubmsg(p->top->f) &&
11507         is_fieldmask(upb_fielddef_msgsubdef(p->top->f));
11508}
11509
11510static bool does_fieldmask_end(upb_json_parser *p) {
11511  return p->top->m != NULL && is_fieldmask(p->top->m);
11512}
11513
11514#define CHECK_RETURN_TOP(x) if (!(x)) goto error
11515
11516
11517/* The actual parser **********************************************************/
11518
11519/* What follows is the Ragel parser itself.  The language is specified in Ragel
11520 * and the actions call our C functions above.
11521 *
11522 * Ragel has an extensive set of functionality, and we use only a small part of
11523 * it.  There are many action types but we only use a few:
11524 *
11525 *   ">" -- transition into a machine
11526 *   "%" -- transition out of a machine
11527 *   "@" -- transition into a final state of a machine.
11528 *
11529 * "@" transitions are tricky because a machine can transition into a final
11530 * state repeatedly.  But in some cases we know this can't happen, for example
11531 * a string which is delimited by a final '"' can only transition into its
11532 * final state once, when the closing '"' is seen. */
11533
11534
11535#line 2780 "upb/json/parser.rl"
11536
11537
11538
11539#line 2583 "upb/json/parser.c"
11540static const char _json_actions[] = {
11541	0, 1, 0, 1, 1, 1, 3, 1,
11542	4, 1, 6, 1, 7, 1, 8, 1,
11543	9, 1, 11, 1, 12, 1, 13, 1,
11544	14, 1, 15, 1, 16, 1, 17, 1,
11545	18, 1, 19, 1, 20, 1, 22, 1,
11546	23, 1, 24, 1, 35, 1, 37, 1,
11547	39, 1, 40, 1, 42, 1, 43, 1,
11548	44, 1, 46, 1, 48, 1, 49, 1,
11549	50, 1, 51, 1, 53, 1, 54, 2,
11550	4, 9, 2, 5, 6, 2, 7, 3,
11551	2, 7, 9, 2, 21, 26, 2, 25,
11552	10, 2, 27, 28, 2, 29, 30, 2,
11553	32, 34, 2, 33, 31, 2, 38, 36,
11554	2, 40, 42, 2, 45, 2, 2, 46,
11555	54, 2, 47, 36, 2, 49, 54, 2,
11556	50, 54, 2, 51, 54, 2, 52, 41,
11557	2, 53, 54, 3, 32, 34, 35, 4,
11558	21, 26, 27, 28
11559};
11560
11561static const short _json_key_offsets[] = {
11562	0, 0, 12, 13, 18, 23, 28, 29,
11563	30, 31, 32, 33, 34, 35, 36, 37,
11564	38, 43, 44, 48, 53, 58, 63, 67,
11565	71, 74, 77, 79, 83, 87, 89, 91,
11566	96, 98, 100, 109, 115, 121, 127, 133,
11567	135, 139, 142, 144, 146, 149, 150, 154,
11568	156, 158, 160, 162, 163, 165, 167, 168,
11569	170, 172, 173, 175, 177, 178, 180, 182,
11570	183, 185, 187, 191, 193, 195, 196, 197,
11571	198, 199, 201, 206, 208, 210, 212, 221,
11572	222, 222, 222, 227, 232, 237, 238, 239,
11573	240, 241, 241, 242, 243, 244, 244, 245,
11574	246, 247, 247, 252, 253, 257, 262, 267,
11575	272, 276, 276, 279, 282, 285, 288, 291,
11576	294, 294, 294, 294, 294, 294
11577};
11578
11579static const char _json_trans_keys[] = {
11580	32, 34, 45, 91, 102, 110, 116, 123,
11581	9, 13, 48, 57, 34, 32, 93, 125,
11582	9, 13, 32, 44, 93, 9, 13, 32,
11583	93, 125, 9, 13, 97, 108, 115, 101,
11584	117, 108, 108, 114, 117, 101, 32, 34,
11585	125, 9, 13, 34, 32, 58, 9, 13,
11586	32, 93, 125, 9, 13, 32, 44, 125,
11587	9, 13, 32, 44, 125, 9, 13, 32,
11588	34, 9, 13, 45, 48, 49, 57, 48,
11589	49, 57, 46, 69, 101, 48, 57, 69,
11590	101, 48, 57, 43, 45, 48, 57, 48,
11591	57, 48, 57, 46, 69, 101, 48, 57,
11592	34, 92, 34, 92, 34, 47, 92, 98,
11593	102, 110, 114, 116, 117, 48, 57, 65,
11594	70, 97, 102, 48, 57, 65, 70, 97,
11595	102, 48, 57, 65, 70, 97, 102, 48,
11596	57, 65, 70, 97, 102, 34, 92, 45,
11597	48, 49, 57, 48, 49, 57, 46, 115,
11598	48, 57, 115, 48, 57, 34, 46, 115,
11599	48, 57, 48, 57, 48, 57, 48, 57,
11600	48, 57, 45, 48, 57, 48, 57, 45,
11601	48, 57, 48, 57, 84, 48, 57, 48,
11602	57, 58, 48, 57, 48, 57, 58, 48,
11603	57, 48, 57, 43, 45, 46, 90, 48,
11604	57, 48, 57, 58, 48, 48, 34, 48,
11605	57, 43, 45, 90, 48, 57, 34, 44,
11606	34, 44, 34, 44, 34, 45, 91, 102,
11607	110, 116, 123, 48, 57, 34, 32, 93,
11608	125, 9, 13, 32, 44, 93, 9, 13,
11609	32, 93, 125, 9, 13, 97, 108, 115,
11610	101, 117, 108, 108, 114, 117, 101, 32,
11611	34, 125, 9, 13, 34, 32, 58, 9,
11612	13, 32, 93, 125, 9, 13, 32, 44,
11613	125, 9, 13, 32, 44, 125, 9, 13,
11614	32, 34, 9, 13, 32, 9, 13, 32,
11615	9, 13, 32, 9, 13, 32, 9, 13,
11616	32, 9, 13, 32, 9, 13, 0
11617};
11618
11619static const char _json_single_lengths[] = {
11620	0, 8, 1, 3, 3, 3, 1, 1,
11621	1, 1, 1, 1, 1, 1, 1, 1,
11622	3, 1, 2, 3, 3, 3, 2, 2,
11623	1, 3, 0, 2, 2, 0, 0, 3,
11624	2, 2, 9, 0, 0, 0, 0, 2,
11625	2, 1, 2, 0, 1, 1, 2, 0,
11626	0, 0, 0, 1, 0, 0, 1, 0,
11627	0, 1, 0, 0, 1, 0, 0, 1,
11628	0, 0, 4, 0, 0, 1, 1, 1,
11629	1, 0, 3, 2, 2, 2, 7, 1,
11630	0, 0, 3, 3, 3, 1, 1, 1,
11631	1, 0, 1, 1, 1, 0, 1, 1,
11632	1, 0, 3, 1, 2, 3, 3, 3,
11633	2, 0, 1, 1, 1, 1, 1, 1,
11634	0, 0, 0, 0, 0, 0
11635};
11636
11637static const char _json_range_lengths[] = {
11638	0, 2, 0, 1, 1, 1, 0, 0,
11639	0, 0, 0, 0, 0, 0, 0, 0,
11640	1, 0, 1, 1, 1, 1, 1, 1,
11641	1, 0, 1, 1, 1, 1, 1, 1,
11642	0, 0, 0, 3, 3, 3, 3, 0,
11643	1, 1, 0, 1, 1, 0, 1, 1,
11644	1, 1, 1, 0, 1, 1, 0, 1,
11645	1, 0, 1, 1, 0, 1, 1, 0,
11646	1, 1, 0, 1, 1, 0, 0, 0,
11647	0, 1, 1, 0, 0, 0, 1, 0,
11648	0, 0, 1, 1, 1, 0, 0, 0,
11649	0, 0, 0, 0, 0, 0, 0, 0,
11650	0, 0, 1, 0, 1, 1, 1, 1,
11651	1, 0, 1, 1, 1, 1, 1, 1,
11652	0, 0, 0, 0, 0, 0
11653};
11654
11655static const short _json_index_offsets[] = {
11656	0, 0, 11, 13, 18, 23, 28, 30,
11657	32, 34, 36, 38, 40, 42, 44, 46,
11658	48, 53, 55, 59, 64, 69, 74, 78,
11659	82, 85, 89, 91, 95, 99, 101, 103,
11660	108, 111, 114, 124, 128, 132, 136, 140,
11661	143, 147, 150, 153, 155, 158, 160, 164,
11662	166, 168, 170, 172, 174, 176, 178, 180,
11663	182, 184, 186, 188, 190, 192, 194, 196,
11664	198, 200, 202, 207, 209, 211, 213, 215,
11665	217, 219, 221, 226, 229, 232, 235, 244,
11666	246, 247, 248, 253, 258, 263, 265, 267,
11667	269, 271, 272, 274, 276, 278, 279, 281,
11668	283, 285, 286, 291, 293, 297, 302, 307,
11669	312, 316, 317, 320, 323, 326, 329, 332,
11670	335, 336, 337, 338, 339, 340
11671};
11672
11673static const unsigned char _json_indicies[] = {
11674	0, 2, 3, 4, 5, 6, 7, 8,
11675	0, 3, 1, 9, 1, 11, 12, 1,
11676	11, 10, 13, 14, 12, 13, 1, 14,
11677	1, 1, 14, 10, 15, 1, 16, 1,
11678	17, 1, 18, 1, 19, 1, 20, 1,
11679	21, 1, 22, 1, 23, 1, 24, 1,
11680	25, 26, 27, 25, 1, 28, 1, 29,
11681	30, 29, 1, 30, 1, 1, 30, 31,
11682	32, 33, 34, 32, 1, 35, 36, 27,
11683	35, 1, 36, 26, 36, 1, 37, 38,
11684	39, 1, 38, 39, 1, 41, 42, 42,
11685	40, 43, 1, 42, 42, 43, 40, 44,
11686	44, 45, 1, 45, 1, 45, 40, 41,
11687	42, 42, 39, 40, 47, 48, 46, 50,
11688	51, 49, 52, 52, 52, 52, 52, 52,
11689	52, 52, 53, 1, 54, 54, 54, 1,
11690	55, 55, 55, 1, 56, 56, 56, 1,
11691	57, 57, 57, 1, 59, 60, 58, 61,
11692	62, 63, 1, 64, 65, 1, 66, 67,
11693	1, 68, 1, 67, 68, 1, 69, 1,
11694	66, 67, 65, 1, 70, 1, 71, 1,
11695	72, 1, 73, 1, 74, 1, 75, 1,
11696	76, 1, 77, 1, 78, 1, 79, 1,
11697	80, 1, 81, 1, 82, 1, 83, 1,
11698	84, 1, 85, 1, 86, 1, 87, 1,
11699	88, 1, 89, 89, 90, 91, 1, 92,
11700	1, 93, 1, 94, 1, 95, 1, 96,
11701	1, 97, 1, 98, 1, 99, 99, 100,
11702	98, 1, 102, 1, 101, 104, 105, 103,
11703	1, 1, 101, 106, 107, 108, 109, 110,
11704	111, 112, 107, 1, 113, 1, 114, 115,
11705	117, 118, 1, 117, 116, 119, 120, 118,
11706	119, 1, 120, 1, 1, 120, 116, 121,
11707	1, 122, 1, 123, 1, 124, 1, 125,
11708	126, 1, 127, 1, 128, 1, 129, 130,
11709	1, 131, 1, 132, 1, 133, 134, 135,
11710	136, 134, 1, 137, 1, 138, 139, 138,
11711	1, 139, 1, 1, 139, 140, 141, 142,
11712	143, 141, 1, 144, 145, 136, 144, 1,
11713	145, 135, 145, 1, 146, 147, 147, 1,
11714	148, 148, 1, 149, 149, 1, 150, 150,
11715	1, 151, 151, 1, 152, 152, 1, 1,
11716	1, 1, 1, 1, 1, 0
11717};
11718
11719static const char _json_trans_targs[] = {
11720	1, 0, 2, 107, 3, 6, 10, 13,
11721	16, 106, 4, 3, 106, 4, 5, 7,
11722	8, 9, 108, 11, 12, 109, 14, 15,
11723	110, 16, 17, 111, 18, 18, 19, 20,
11724	21, 22, 111, 21, 22, 24, 25, 31,
11725	112, 26, 28, 27, 29, 30, 33, 113,
11726	34, 33, 113, 34, 32, 35, 36, 37,
11727	38, 39, 33, 113, 34, 41, 42, 46,
11728	42, 46, 43, 45, 44, 114, 48, 49,
11729	50, 51, 52, 53, 54, 55, 56, 57,
11730	58, 59, 60, 61, 62, 63, 64, 65,
11731	66, 67, 73, 72, 68, 69, 70, 71,
11732	72, 115, 74, 67, 72, 76, 116, 76,
11733	116, 77, 79, 81, 82, 85, 90, 94,
11734	98, 80, 117, 117, 83, 82, 80, 83,
11735	84, 86, 87, 88, 89, 117, 91, 92,
11736	93, 117, 95, 96, 97, 117, 98, 99,
11737	105, 100, 100, 101, 102, 103, 104, 105,
11738	103, 104, 117, 106, 106, 106, 106, 106,
11739	106
11740};
11741
11742static const unsigned char _json_trans_actions[] = {
11743	0, 0, 113, 107, 53, 0, 0, 0,
11744	125, 59, 45, 0, 55, 0, 0, 0,
11745	0, 0, 0, 0, 0, 0, 0, 0,
11746	0, 0, 101, 51, 47, 0, 0, 45,
11747	49, 49, 104, 0, 0, 0, 0, 0,
11748	3, 0, 0, 0, 0, 0, 5, 15,
11749	0, 0, 71, 7, 13, 0, 74, 9,
11750	9, 9, 77, 80, 11, 37, 37, 37,
11751	0, 0, 0, 39, 0, 41, 86, 0,
11752	0, 0, 17, 19, 0, 21, 23, 0,
11753	25, 27, 0, 29, 31, 0, 33, 35,
11754	0, 135, 83, 135, 0, 0, 0, 0,
11755	0, 92, 0, 89, 89, 98, 43, 0,
11756	131, 95, 113, 107, 53, 0, 0, 0,
11757	125, 59, 69, 110, 45, 0, 55, 0,
11758	0, 0, 0, 0, 0, 119, 0, 0,
11759	0, 122, 0, 0, 0, 116, 0, 101,
11760	51, 47, 0, 0, 45, 49, 49, 104,
11761	0, 0, 128, 0, 57, 63, 65, 61,
11762	67
11763};
11764
11765static const unsigned char _json_eof_actions[] = {
11766	0, 0, 0, 0, 0, 0, 0, 0,
11767	0, 0, 0, 0, 0, 0, 0, 0,
11768	0, 0, 0, 0, 0, 0, 0, 0,
11769	0, 1, 0, 1, 0, 0, 1, 1,
11770	0, 0, 0, 0, 0, 0, 0, 0,
11771	0, 0, 0, 0, 0, 0, 0, 0,
11772	0, 0, 0, 0, 0, 0, 0, 0,
11773	0, 0, 0, 0, 0, 0, 0, 0,
11774	0, 0, 0, 0, 0, 0, 0, 0,
11775	0, 0, 0, 0, 0, 0, 0, 0,
11776	0, 0, 0, 0, 0, 0, 0, 0,
11777	0, 0, 0, 0, 0, 0, 0, 0,
11778	0, 0, 0, 0, 0, 0, 0, 0,
11779	0, 0, 0, 57, 63, 65, 61, 67,
11780	0, 0, 0, 0, 0, 0
11781};
11782
11783static const int json_start = 1;
11784
11785static const int json_en_number_machine = 23;
11786static const int json_en_string_machine = 32;
11787static const int json_en_duration_machine = 40;
11788static const int json_en_timestamp_machine = 47;
11789static const int json_en_fieldmask_machine = 75;
11790static const int json_en_value_machine = 78;
11791static const int json_en_main = 1;
11792
11793
11794#line 2783 "upb/json/parser.rl"
11795
11796size_t parse(void *closure, const void *hd, const char *buf, size_t size,
11797             const upb_bufhandle *handle) {
11798  upb_json_parser *parser = closure;
11799
11800  /* Variables used by Ragel's generated code. */
11801  int cs = parser->current_state;
11802  int *stack = parser->parser_stack;
11803  int top = parser->parser_top;
11804
11805  const char *p = buf;
11806  const char *pe = buf + size;
11807  const char *eof = &eof_ch;
11808
11809  parser->handle = handle;
11810
11811  UPB_UNUSED(hd);
11812  UPB_UNUSED(handle);
11813
11814  capture_resume(parser, buf);
11815
11816
11817#line 2861 "upb/json/parser.c"
11818	{
11819	int _klen;
11820	unsigned int _trans;
11821	const char *_acts;
11822	unsigned int _nacts;
11823	const char *_keys;
11824
11825	if ( p == pe )
11826		goto _test_eof;
11827	if ( cs == 0 )
11828		goto _out;
11829_resume:
11830	_keys = _json_trans_keys + _json_key_offsets[cs];
11831	_trans = _json_index_offsets[cs];
11832
11833	_klen = _json_single_lengths[cs];
11834	if ( _klen > 0 ) {
11835		const char *_lower = _keys;
11836		const char *_mid;
11837		const char *_upper = _keys + _klen - 1;
11838		while (1) {
11839			if ( _upper < _lower )
11840				break;
11841
11842			_mid = _lower + ((_upper-_lower) >> 1);
11843			if ( (*p) < *_mid )
11844				_upper = _mid - 1;
11845			else if ( (*p) > *_mid )
11846				_lower = _mid + 1;
11847			else {
11848				_trans += (unsigned int)(_mid - _keys);
11849				goto _match;
11850			}
11851		}
11852		_keys += _klen;
11853		_trans += _klen;
11854	}
11855
11856	_klen = _json_range_lengths[cs];
11857	if ( _klen > 0 ) {
11858		const char *_lower = _keys;
11859		const char *_mid;
11860		const char *_upper = _keys + (_klen<<1) - 2;
11861		while (1) {
11862			if ( _upper < _lower )
11863				break;
11864
11865			_mid = _lower + (((_upper-_lower) >> 1) & ~1);
11866			if ( (*p) < _mid[0] )
11867				_upper = _mid - 2;
11868			else if ( (*p) > _mid[1] )
11869				_lower = _mid + 2;
11870			else {
11871				_trans += (unsigned int)((_mid - _keys)>>1);
11872				goto _match;
11873			}
11874		}
11875		_trans += _klen;
11876	}
11877
11878_match:
11879	_trans = _json_indicies[_trans];
11880	cs = _json_trans_targs[_trans];
11881
11882	if ( _json_trans_actions[_trans] == 0 )
11883		goto _again;
11884
11885	_acts = _json_actions + _json_trans_actions[_trans];
11886	_nacts = (unsigned int) *_acts++;
11887	while ( _nacts-- > 0 )
11888	{
11889		switch ( *_acts++ )
11890		{
11891	case 1:
11892#line 2588 "upb/json/parser.rl"
11893	{ p--; {cs = stack[--top]; goto _again;} }
11894	break;
11895	case 2:
11896#line 2590 "upb/json/parser.rl"
11897	{ p--; {stack[top++] = cs; cs = 23;goto _again;} }
11898	break;
11899	case 3:
11900#line 2594 "upb/json/parser.rl"
11901	{ start_text(parser, p); }
11902	break;
11903	case 4:
11904#line 2595 "upb/json/parser.rl"
11905	{ CHECK_RETURN_TOP(end_text(parser, p)); }
11906	break;
11907	case 5:
11908#line 2601 "upb/json/parser.rl"
11909	{ start_hex(parser); }
11910	break;
11911	case 6:
11912#line 2602 "upb/json/parser.rl"
11913	{ hexdigit(parser, p); }
11914	break;
11915	case 7:
11916#line 2603 "upb/json/parser.rl"
11917	{ CHECK_RETURN_TOP(end_hex(parser)); }
11918	break;
11919	case 8:
11920#line 2609 "upb/json/parser.rl"
11921	{ CHECK_RETURN_TOP(escape(parser, p)); }
11922	break;
11923	case 9:
11924#line 2615 "upb/json/parser.rl"
11925	{ p--; {cs = stack[--top]; goto _again;} }
11926	break;
11927	case 10:
11928#line 2620 "upb/json/parser.rl"
11929	{ start_year(parser, p); }
11930	break;
11931	case 11:
11932#line 2621 "upb/json/parser.rl"
11933	{ CHECK_RETURN_TOP(end_year(parser, p)); }
11934	break;
11935	case 12:
11936#line 2625 "upb/json/parser.rl"
11937	{ start_month(parser, p); }
11938	break;
11939	case 13:
11940#line 2626 "upb/json/parser.rl"
11941	{ CHECK_RETURN_TOP(end_month(parser, p)); }
11942	break;
11943	case 14:
11944#line 2630 "upb/json/parser.rl"
11945	{ start_day(parser, p); }
11946	break;
11947	case 15:
11948#line 2631 "upb/json/parser.rl"
11949	{ CHECK_RETURN_TOP(end_day(parser, p)); }
11950	break;
11951	case 16:
11952#line 2635 "upb/json/parser.rl"
11953	{ start_hour(parser, p); }
11954	break;
11955	case 17:
11956#line 2636 "upb/json/parser.rl"
11957	{ CHECK_RETURN_TOP(end_hour(parser, p)); }
11958	break;
11959	case 18:
11960#line 2640 "upb/json/parser.rl"
11961	{ start_minute(parser, p); }
11962	break;
11963	case 19:
11964#line 2641 "upb/json/parser.rl"
11965	{ CHECK_RETURN_TOP(end_minute(parser, p)); }
11966	break;
11967	case 20:
11968#line 2645 "upb/json/parser.rl"
11969	{ start_second(parser, p); }
11970	break;
11971	case 21:
11972#line 2646 "upb/json/parser.rl"
11973	{ CHECK_RETURN_TOP(end_second(parser, p)); }
11974	break;
11975	case 22:
11976#line 2651 "upb/json/parser.rl"
11977	{ start_duration_base(parser, p); }
11978	break;
11979	case 23:
11980#line 2652 "upb/json/parser.rl"
11981	{ CHECK_RETURN_TOP(end_duration_base(parser, p)); }
11982	break;
11983	case 24:
11984#line 2654 "upb/json/parser.rl"
11985	{ p--; {cs = stack[--top]; goto _again;} }
11986	break;
11987	case 25:
11988#line 2659 "upb/json/parser.rl"
11989	{ start_timestamp_base(parser); }
11990	break;
11991	case 26:
11992#line 2661 "upb/json/parser.rl"
11993	{ start_timestamp_fraction(parser, p); }
11994	break;
11995	case 27:
11996#line 2662 "upb/json/parser.rl"
11997	{ CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); }
11998	break;
11999	case 28:
12000#line 2664 "upb/json/parser.rl"
12001	{ start_timestamp_zone(parser, p); }
12002	break;
12003	case 29:
12004#line 2665 "upb/json/parser.rl"
12005	{ CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); }
12006	break;
12007	case 30:
12008#line 2667 "upb/json/parser.rl"
12009	{ p--; {cs = stack[--top]; goto _again;} }
12010	break;
12011	case 31:
12012#line 2672 "upb/json/parser.rl"
12013	{ start_fieldmask_path_text(parser, p); }
12014	break;
12015	case 32:
12016#line 2673 "upb/json/parser.rl"
12017	{ end_fieldmask_path_text(parser, p); }
12018	break;
12019	case 33:
12020#line 2678 "upb/json/parser.rl"
12021	{ start_fieldmask_path(parser); }
12022	break;
12023	case 34:
12024#line 2679 "upb/json/parser.rl"
12025	{ end_fieldmask_path(parser); }
12026	break;
12027	case 35:
12028#line 2685 "upb/json/parser.rl"
12029	{ p--; {cs = stack[--top]; goto _again;} }
12030	break;
12031	case 36:
12032#line 2690 "upb/json/parser.rl"
12033	{
12034        if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) {
12035          {stack[top++] = cs; cs = 47;goto _again;}
12036        } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) {
12037          {stack[top++] = cs; cs = 40;goto _again;}
12038        } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) {
12039          {stack[top++] = cs; cs = 75;goto _again;}
12040        } else {
12041          {stack[top++] = cs; cs = 32;goto _again;}
12042        }
12043      }
12044	break;
12045	case 37:
12046#line 2703 "upb/json/parser.rl"
12047	{ p--; {stack[top++] = cs; cs = 78;goto _again;} }
12048	break;
12049	case 38:
12050#line 2708 "upb/json/parser.rl"
12051	{
12052        if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12053          start_any_member(parser, p);
12054        } else {
12055          start_member(parser);
12056        }
12057      }
12058	break;
12059	case 39:
12060#line 2715 "upb/json/parser.rl"
12061	{ CHECK_RETURN_TOP(end_membername(parser)); }
12062	break;
12063	case 40:
12064#line 2718 "upb/json/parser.rl"
12065	{
12066        if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12067          end_any_member(parser, p);
12068        } else {
12069          end_member(parser);
12070        }
12071      }
12072	break;
12073	case 41:
12074#line 2729 "upb/json/parser.rl"
12075	{
12076        if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12077          start_any_object(parser, p);
12078        } else {
12079          start_object(parser);
12080        }
12081      }
12082	break;
12083	case 42:
12084#line 2738 "upb/json/parser.rl"
12085	{
12086        if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) {
12087          CHECK_RETURN_TOP(end_any_object(parser, p));
12088        } else {
12089          end_object(parser);
12090        }
12091      }
12092	break;
12093	case 43:
12094#line 2750 "upb/json/parser.rl"
12095	{ CHECK_RETURN_TOP(start_array(parser)); }
12096	break;
12097	case 44:
12098#line 2754 "upb/json/parser.rl"
12099	{ end_array(parser); }
12100	break;
12101	case 45:
12102#line 2759 "upb/json/parser.rl"
12103	{ CHECK_RETURN_TOP(start_number(parser, p)); }
12104	break;
12105	case 46:
12106#line 2760 "upb/json/parser.rl"
12107	{ CHECK_RETURN_TOP(end_number(parser, p)); }
12108	break;
12109	case 47:
12110#line 2762 "upb/json/parser.rl"
12111	{ CHECK_RETURN_TOP(start_stringval(parser)); }
12112	break;
12113	case 48:
12114#line 2763 "upb/json/parser.rl"
12115	{ CHECK_RETURN_TOP(end_stringval(parser)); }
12116	break;
12117	case 49:
12118#line 2765 "upb/json/parser.rl"
12119	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
12120	break;
12121	case 50:
12122#line 2767 "upb/json/parser.rl"
12123	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
12124	break;
12125	case 51:
12126#line 2769 "upb/json/parser.rl"
12127	{ CHECK_RETURN_TOP(end_null(parser)); }
12128	break;
12129	case 52:
12130#line 2771 "upb/json/parser.rl"
12131	{ CHECK_RETURN_TOP(start_subobject_full(parser)); }
12132	break;
12133	case 53:
12134#line 2772 "upb/json/parser.rl"
12135	{ end_subobject_full(parser); }
12136	break;
12137	case 54:
12138#line 2777 "upb/json/parser.rl"
12139	{ p--; {cs = stack[--top]; goto _again;} }
12140	break;
12141#line 3185 "upb/json/parser.c"
12142		}
12143	}
12144
12145_again:
12146	if ( cs == 0 )
12147		goto _out;
12148	if ( ++p != pe )
12149		goto _resume;
12150	_test_eof: {}
12151	if ( p == eof )
12152	{
12153	const char *__acts = _json_actions + _json_eof_actions[cs];
12154	unsigned int __nacts = (unsigned int) *__acts++;
12155	while ( __nacts-- > 0 ) {
12156		switch ( *__acts++ ) {
12157	case 0:
12158#line 2586 "upb/json/parser.rl"
12159	{ p--; {cs = stack[--top]; 	if ( p == pe )
12160		goto _test_eof;
12161goto _again;} }
12162	break;
12163	case 46:
12164#line 2760 "upb/json/parser.rl"
12165	{ CHECK_RETURN_TOP(end_number(parser, p)); }
12166	break;
12167	case 49:
12168#line 2765 "upb/json/parser.rl"
12169	{ CHECK_RETURN_TOP(end_bool(parser, true)); }
12170	break;
12171	case 50:
12172#line 2767 "upb/json/parser.rl"
12173	{ CHECK_RETURN_TOP(end_bool(parser, false)); }
12174	break;
12175	case 51:
12176#line 2769 "upb/json/parser.rl"
12177	{ CHECK_RETURN_TOP(end_null(parser)); }
12178	break;
12179	case 53:
12180#line 2772 "upb/json/parser.rl"
12181	{ end_subobject_full(parser); }
12182	break;
12183#line 3227 "upb/json/parser.c"
12184		}
12185	}
12186	}
12187
12188	_out: {}
12189	}
12190
12191#line 2805 "upb/json/parser.rl"
12192
12193  if (p != pe) {
12194    upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p);
12195  } else {
12196    capture_suspend(parser, &p);
12197  }
12198
12199error:
12200  /* Save parsing state back to parser. */
12201  parser->current_state = cs;
12202  parser->parser_top = top;
12203
12204  return p - buf;
12205}
12206
12207static bool end(void *closure, const void *hd) {
12208  upb_json_parser *parser = closure;
12209
12210  /* Prevent compile warning on unused static constants. */
12211  UPB_UNUSED(json_start);
12212  UPB_UNUSED(json_en_duration_machine);
12213  UPB_UNUSED(json_en_fieldmask_machine);
12214  UPB_UNUSED(json_en_number_machine);
12215  UPB_UNUSED(json_en_string_machine);
12216  UPB_UNUSED(json_en_timestamp_machine);
12217  UPB_UNUSED(json_en_value_machine);
12218  UPB_UNUSED(json_en_main);
12219
12220  parse(parser, hd, &eof_ch, 0, NULL);
12221
12222  return parser->current_state >= 106;
12223}
12224
12225static void json_parser_reset(upb_json_parser *p) {
12226  int cs;
12227  int top;
12228
12229  p->top = p->stack;
12230  init_frame(p->top);
12231
12232  /* Emit Ragel initialization of the parser. */
12233
12234#line 3278 "upb/json/parser.c"
12235	{
12236	cs = json_start;
12237	top = 0;
12238	}
12239
12240#line 2847 "upb/json/parser.rl"
12241  p->current_state = cs;
12242  p->parser_top = top;
12243  accumulate_clear(p);
12244  p->multipart_state = MULTIPART_INACTIVE;
12245  p->capture = NULL;
12246  p->accumulated = NULL;
12247}
12248
12249static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c,
12250                                               const upb_msgdef *md) {
12251  upb_msg_field_iter i;
12252  upb_alloc *alloc = upb_arena_alloc(c->arena);
12253
12254  upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m));
12255
12256  m->cache = c;
12257
12258  upb_byteshandler_init(&m->input_handler_);
12259  upb_byteshandler_setstring(&m->input_handler_, parse, m);
12260  upb_byteshandler_setendstr(&m->input_handler_, end, m);
12261
12262  upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc);
12263
12264  /* Build name_table */
12265
12266  for(upb_msg_field_begin(&i, md);
12267      !upb_msg_field_done(&i);
12268      upb_msg_field_next(&i)) {
12269    const upb_fielddef *f = upb_msg_iter_field(&i);
12270    upb_value v = upb_value_constptr(f);
12271    const char *name;
12272
12273    /* Add an entry for the JSON name. */
12274    name = upb_fielddef_jsonname(f);
12275    upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
12276
12277    if (strcmp(name, upb_fielddef_name(f)) != 0) {
12278      /* Since the JSON name is different from the regular field name, add an
12279       * entry for the raw name (compliant proto3 JSON parsers must accept
12280       * both). */
12281      const char *name = upb_fielddef_name(f);
12282      upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc);
12283    }
12284  }
12285
12286  return m;
12287}
12288
12289/* Public API *****************************************************************/
12290
12291upb_json_parser *upb_json_parser_create(upb_arena *arena,
12292                                        const upb_json_parsermethod *method,
12293                                        const upb_symtab* symtab,
12294                                        upb_sink output,
12295                                        upb_status *status,
12296                                        bool ignore_json_unknown) {
12297#ifndef NDEBUG
12298  const size_t size_before = upb_arena_bytesallocated(arena);
12299#endif
12300  upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser));
12301  if (!p) return false;
12302
12303  p->arena = arena;
12304  p->method = method;
12305  p->status = status;
12306  p->limit = p->stack + UPB_JSON_MAX_DEPTH;
12307  p->accumulate_buf = NULL;
12308  p->accumulate_buf_size = 0;
12309  upb_bytessink_reset(&p->input_, &method->input_handler_, p);
12310
12311  json_parser_reset(p);
12312  p->top->sink = output;
12313  p->top->m = upb_handlers_msgdef(output.handlers);
12314  if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) {
12315    p->top->is_any = true;
12316    p->top->any_frame = json_parser_any_frame_new(p);
12317  } else {
12318    p->top->is_any = false;
12319    p->top->any_frame = NULL;
12320  }
12321  set_name_table(p, p->top);
12322  p->symtab = symtab;
12323
12324  p->ignore_json_unknown = ignore_json_unknown;
12325
12326  /* If this fails, uncomment and increase the value in parser.h. */
12327  /* fprintf(stderr, "%zd\n", upb_arena_bytesallocated(arena) - size_before); */
12328  UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <=
12329                      UPB_JSON_PARSER_SIZE);
12330  return p;
12331}
12332
12333upb_bytessink upb_json_parser_input(upb_json_parser *p) {
12334  return p->input_;
12335}
12336
12337const upb_byteshandler *upb_json_parsermethod_inputhandler(
12338    const upb_json_parsermethod *m) {
12339  return &m->input_handler_;
12340}
12341
12342upb_json_codecache *upb_json_codecache_new(void) {
12343  upb_alloc *alloc;
12344  upb_json_codecache *c;
12345
12346  c = upb_gmalloc(sizeof(*c));
12347
12348  c->arena = upb_arena_new();
12349  alloc = upb_arena_alloc(c->arena);
12350
12351  upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc);
12352
12353  return c;
12354}
12355
12356void upb_json_codecache_free(upb_json_codecache *c) {
12357  upb_arena_free(c->arena);
12358  upb_gfree(c);
12359}
12360
12361const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c,
12362                                                    const upb_msgdef *md) {
12363  upb_json_parsermethod *m;
12364  upb_value v;
12365  upb_msg_field_iter i;
12366  upb_alloc *alloc = upb_arena_alloc(c->arena);
12367
12368  if (upb_inttable_lookupptr(&c->methods, md, &v)) {
12369    return upb_value_getconstptr(v);
12370  }
12371
12372  m = parsermethod_new(c, md);
12373  v = upb_value_constptr(m);
12374
12375  if (!m) return NULL;
12376  if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL;
12377
12378  /* Populate parser methods for all submessages, so the name tables will
12379   * be available during parsing. */
12380  for(upb_msg_field_begin(&i, md);
12381      !upb_msg_field_done(&i);
12382      upb_msg_field_next(&i)) {
12383    upb_fielddef *f = upb_msg_iter_field(&i);
12384
12385    if (upb_fielddef_issubmsg(f)) {
12386      const upb_msgdef *subdef = upb_fielddef_msgsubdef(f);
12387      const upb_json_parsermethod *sub_method =
12388          upb_json_codecache_get(c, subdef);
12389
12390      if (!sub_method) return NULL;
12391    }
12392  }
12393
12394  return m;
12395}
12396/*
12397** This currently uses snprintf() to format primitives, and could be optimized
12398** further.
12399*/
12400
12401
12402#include <ctype.h>
12403#include <inttypes.h>
12404#include <stdint.h>
12405#include <string.h>
12406#include <time.h>
12407
12408
12409struct upb_json_printer {
12410  upb_sink input_;
12411  /* BytesSink closure. */
12412  void *subc_;
12413  upb_bytessink output_;
12414
12415  /* We track the depth so that we know when to emit startstr/endstr on the
12416   * output. */
12417  int depth_;
12418
12419  /* Have we emitted the first element? This state is necessary to emit commas
12420   * without leaving a trailing comma in arrays/maps. We keep this state per
12421   * frame depth.
12422   *
12423   * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages.
12424   * We count frames (contexts in which we separate elements by commas) as both
12425   * repeated fields and messages (maps), and the worst case is a
12426   * message->repeated field->submessage->repeated field->... nesting. */
12427  bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2];
12428
12429  /* To print timestamp, printer needs to cache its seconds and nanos values
12430   * and convert them when ending timestamp message. See comments of
12431   * printer_sethandlers_timestamp for more detail. */
12432  int64_t seconds;
12433  int32_t nanos;
12434};
12435
12436/* StringPiece; a pointer plus a length. */
12437typedef struct {
12438  char *ptr;
12439  size_t len;
12440} strpc;
12441
12442void freestrpc(void *ptr) {
12443  strpc *pc = ptr;
12444  upb_gfree(pc->ptr);
12445  upb_gfree(pc);
12446}
12447
12448typedef struct {
12449  bool preserve_fieldnames;
12450} upb_json_printercache;
12451
12452/* Convert fielddef name to JSON name and return as a string piece. */
12453strpc *newstrpc(upb_handlers *h, const upb_fielddef *f,
12454                bool preserve_fieldnames) {
12455  /* TODO(haberman): handle malloc failure. */
12456  strpc *ret = upb_gmalloc(sizeof(*ret));
12457  if (preserve_fieldnames) {
12458    ret->ptr = upb_gstrdup(upb_fielddef_name(f));
12459    ret->len = strlen(ret->ptr);
12460  } else {
12461    ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f));
12462    ret->len = strlen(ret->ptr);
12463  }
12464
12465  upb_handlers_addcleanup(h, ret, freestrpc);
12466  return ret;
12467}
12468
12469/* Convert a null-terminated const char* to a string piece. */
12470strpc *newstrpc_str(upb_handlers *h, const char * str) {
12471  strpc * ret = upb_gmalloc(sizeof(*ret));
12472  ret->ptr = upb_gstrdup(str);
12473  ret->len = strlen(str);
12474  upb_handlers_addcleanup(h, ret, freestrpc);
12475  return ret;
12476}
12477
12478/* ------------ JSON string printing: values, maps, arrays ------------------ */
12479
12480static void print_data(
12481    upb_json_printer *p, const char *buf, size_t len) {
12482  /* TODO: Will need to change if we support pushback from the sink. */
12483  size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL);
12484  UPB_ASSERT(n == len);
12485}
12486
12487static void print_comma(upb_json_printer *p) {
12488  if (!p->first_elem_[p->depth_]) {
12489    print_data(p, ",", 1);
12490  }
12491  p->first_elem_[p->depth_] = false;
12492}
12493
12494/* Helpers that print properly formatted elements to the JSON output stream. */
12495
12496/* Used for escaping control chars in strings. */
12497static const char kControlCharLimit = 0x20;
12498
12499UPB_INLINE bool is_json_escaped(char c) {
12500  /* See RFC 4627. */
12501  unsigned char uc = (unsigned char)c;
12502  return uc < kControlCharLimit || uc == '"' || uc == '\\';
12503}
12504
12505UPB_INLINE const char* json_nice_escape(char c) {
12506  switch (c) {
12507    case '"':  return "\\\"";
12508    case '\\': return "\\\\";
12509    case '\b': return "\\b";
12510    case '\f': return "\\f";
12511    case '\n': return "\\n";
12512    case '\r': return "\\r";
12513    case '\t': return "\\t";
12514    default:   return NULL;
12515  }
12516}
12517
12518/* Write a properly escaped string chunk. The surrounding quotes are *not*
12519 * printed; this is so that the caller has the option of emitting the string
12520 * content in chunks. */
12521static void putstring(upb_json_printer *p, const char *buf, size_t len) {
12522  const char* unescaped_run = NULL;
12523  unsigned int i;
12524  for (i = 0; i < len; i++) {
12525    char c = buf[i];
12526    /* Handle escaping. */
12527    if (is_json_escaped(c)) {
12528      /* Use a "nice" escape, like \n, if one exists for this character. */
12529      const char* escape = json_nice_escape(c);
12530      /* If we don't have a specific 'nice' escape code, use a \uXXXX-style
12531       * escape. */
12532      char escape_buf[8];
12533      if (!escape) {
12534        unsigned char byte = (unsigned char)c;
12535        _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte);
12536        escape = escape_buf;
12537      }
12538
12539      /* N.B. that we assume that the input encoding is equal to the output
12540       * encoding (both UTF-8 for  now), so for chars >= 0x20 and != \, ", we
12541       * can simply pass the bytes through. */
12542
12543      /* If there's a current run of unescaped chars, print that run first. */
12544      if (unescaped_run) {
12545        print_data(p, unescaped_run, &buf[i] - unescaped_run);
12546        unescaped_run = NULL;
12547      }
12548      /* Then print the escape code. */
12549      print_data(p, escape, strlen(escape));
12550    } else {
12551      /* Add to the current unescaped run of characters. */
12552      if (unescaped_run == NULL) {
12553        unescaped_run = &buf[i];
12554      }
12555    }
12556  }
12557
12558  /* If the string ended in a run of unescaped characters, print that last run. */
12559  if (unescaped_run) {
12560    print_data(p, unescaped_run, &buf[len] - unescaped_run);
12561  }
12562}
12563
12564#define CHKLENGTH(x) if (!(x)) return -1;
12565
12566/* Helpers that format floating point values according to our custom formats.
12567 * Right now we use %.8g and %.17g for float/double, respectively, to match
12568 * proto2::util::JsonFormat's defaults.  May want to change this later. */
12569
12570const char neginf[] = "\"-Infinity\"";
12571const char inf[] = "\"Infinity\"";
12572
12573static size_t fmt_double(double val, char* buf, size_t length) {
12574  if (val == UPB_INFINITY) {
12575    CHKLENGTH(length >= strlen(inf));
12576    strcpy(buf, inf);
12577    return strlen(inf);
12578  } else if (val == -UPB_INFINITY) {
12579    CHKLENGTH(length >= strlen(neginf));
12580    strcpy(buf, neginf);
12581    return strlen(neginf);
12582  } else {
12583    size_t n = _upb_snprintf(buf, length, "%.17g", val);
12584    CHKLENGTH(n > 0 && n < length);
12585    return n;
12586  }
12587}
12588
12589static size_t fmt_float(float val, char* buf, size_t length) {
12590  size_t n = _upb_snprintf(buf, length, "%.8g", val);
12591  CHKLENGTH(n > 0 && n < length);
12592  return n;
12593}
12594
12595static size_t fmt_bool(bool val, char* buf, size_t length) {
12596  size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false"));
12597  CHKLENGTH(n > 0 && n < length);
12598  return n;
12599}
12600
12601static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) {
12602  size_t n = _upb_snprintf(buf, length, "%" PRId64, val);
12603  CHKLENGTH(n > 0 && n < length);
12604  return n;
12605}
12606
12607static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) {
12608  size_t n = _upb_snprintf(buf, length, "%" PRIu64, val);
12609  CHKLENGTH(n > 0 && n < length);
12610  return n;
12611}
12612
12613static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) {
12614  size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val);
12615  CHKLENGTH(n > 0 && n < length);
12616  return n;
12617}
12618
12619static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) {
12620  size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val);
12621  CHKLENGTH(n > 0 && n < length);
12622  return n;
12623}
12624
12625/* Print a map key given a field name. Called by scalar field handlers and by
12626 * startseq for repeated fields. */
12627static bool putkey(void *closure, const void *handler_data) {
12628  upb_json_printer *p = closure;
12629  const strpc *key = handler_data;
12630  print_comma(p);
12631  print_data(p, "\"", 1);
12632  putstring(p, key->ptr, key->len);
12633  print_data(p, "\":", 2);
12634  return true;
12635}
12636
12637#define CHKFMT(val) if ((val) == (size_t)-1) return false;
12638#define CHK(val)    if (!(val)) return false;
12639
12640#define TYPE_HANDLERS(type, fmt_func)                                        \
12641  static bool put##type(void *closure, const void *handler_data, type val) { \
12642    upb_json_printer *p = closure;                                           \
12643    char data[64];                                                           \
12644    size_t length = fmt_func(val, data, sizeof(data));                       \
12645    UPB_UNUSED(handler_data);                                                \
12646    CHKFMT(length);                                                          \
12647    print_data(p, data, length);                                             \
12648    return true;                                                             \
12649  }                                                                          \
12650  static bool scalar_##type(void *closure, const void *handler_data,         \
12651                            type val) {                                      \
12652    CHK(putkey(closure, handler_data));                                      \
12653    CHK(put##type(closure, handler_data, val));                              \
12654    return true;                                                             \
12655  }                                                                          \
12656  static bool repeated_##type(void *closure, const void *handler_data,       \
12657                              type val) {                                    \
12658    upb_json_printer *p = closure;                                           \
12659    print_comma(p);                                                          \
12660    CHK(put##type(closure, handler_data, val));                              \
12661    return true;                                                             \
12662  }
12663
12664#define TYPE_HANDLERS_MAPKEY(type, fmt_func)                                 \
12665  static bool putmapkey_##type(void *closure, const void *handler_data,      \
12666                            type val) {                                      \
12667    upb_json_printer *p = closure;                                           \
12668    char data[64];                                                           \
12669    size_t length = fmt_func(val, data, sizeof(data));                       \
12670    UPB_UNUSED(handler_data);                                                \
12671    print_data(p, "\"", 1);                                                  \
12672    print_data(p, data, length);                                             \
12673    print_data(p, "\":", 2);                                                 \
12674    return true;                                                             \
12675  }
12676
12677TYPE_HANDLERS(double,   fmt_double)
12678TYPE_HANDLERS(float,    fmt_float)
12679TYPE_HANDLERS(bool,     fmt_bool)
12680TYPE_HANDLERS(int32_t,  fmt_int64_as_number)
12681TYPE_HANDLERS(uint32_t, fmt_int64_as_number)
12682TYPE_HANDLERS(int64_t,  fmt_int64_as_string)
12683TYPE_HANDLERS(uint64_t, fmt_uint64_as_string)
12684
12685/* double and float are not allowed to be map keys. */
12686TYPE_HANDLERS_MAPKEY(bool,     fmt_bool)
12687TYPE_HANDLERS_MAPKEY(int32_t,  fmt_int64_as_number)
12688TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number)
12689TYPE_HANDLERS_MAPKEY(int64_t,  fmt_int64_as_number)
12690TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number)
12691
12692#undef TYPE_HANDLERS
12693#undef TYPE_HANDLERS_MAPKEY
12694
12695typedef struct {
12696  void *keyname;
12697  const upb_enumdef *enumdef;
12698} EnumHandlerData;
12699
12700static bool scalar_enum(void *closure, const void *handler_data,
12701                        int32_t val) {
12702  const EnumHandlerData *hd = handler_data;
12703  upb_json_printer *p = closure;
12704  const char *symbolic_name;
12705
12706  CHK(putkey(closure, hd->keyname));
12707
12708  symbolic_name = upb_enumdef_iton(hd->enumdef, val);
12709  if (symbolic_name) {
12710    print_data(p, "\"", 1);
12711    putstring(p, symbolic_name, strlen(symbolic_name));
12712    print_data(p, "\"", 1);
12713  } else {
12714    putint32_t(closure, NULL, val);
12715  }
12716
12717  return true;
12718}
12719
12720static void print_enum_symbolic_name(upb_json_printer *p,
12721                                     const upb_enumdef *def,
12722                                     int32_t val) {
12723  const char *symbolic_name = upb_enumdef_iton(def, val);
12724  if (symbolic_name) {
12725    print_data(p, "\"", 1);
12726    putstring(p, symbolic_name, strlen(symbolic_name));
12727    print_data(p, "\"", 1);
12728  } else {
12729    putint32_t(p, NULL, val);
12730  }
12731}
12732
12733static bool repeated_enum(void *closure, const void *handler_data,
12734                          int32_t val) {
12735  const EnumHandlerData *hd = handler_data;
12736  upb_json_printer *p = closure;
12737  print_comma(p);
12738
12739  print_enum_symbolic_name(p, hd->enumdef, val);
12740
12741  return true;
12742}
12743
12744static bool mapvalue_enum(void *closure, const void *handler_data,
12745                          int32_t val) {
12746  const EnumHandlerData *hd = handler_data;
12747  upb_json_printer *p = closure;
12748
12749  print_enum_symbolic_name(p, hd->enumdef, val);
12750
12751  return true;
12752}
12753
12754static void *scalar_startsubmsg(void *closure, const void *handler_data) {
12755  return putkey(closure, handler_data) ? closure : UPB_BREAK;
12756}
12757
12758static void *repeated_startsubmsg(void *closure, const void *handler_data) {
12759  upb_json_printer *p = closure;
12760  UPB_UNUSED(handler_data);
12761  print_comma(p);
12762  return closure;
12763}
12764
12765static void start_frame(upb_json_printer *p) {
12766  p->depth_++;
12767  p->first_elem_[p->depth_] = true;
12768  print_data(p, "{", 1);
12769}
12770
12771static void end_frame(upb_json_printer *p) {
12772  print_data(p, "}", 1);
12773  p->depth_--;
12774}
12775
12776static bool printer_startmsg(void *closure, const void *handler_data) {
12777  upb_json_printer *p = closure;
12778  UPB_UNUSED(handler_data);
12779  if (p->depth_ == 0) {
12780    upb_bytessink_start(p->output_, 0, &p->subc_);
12781  }
12782  start_frame(p);
12783  return true;
12784}
12785
12786static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) {
12787  upb_json_printer *p = closure;
12788  UPB_UNUSED(handler_data);
12789  UPB_UNUSED(s);
12790  end_frame(p);
12791  if (p->depth_ == 0) {
12792    upb_bytessink_end(p->output_);
12793  }
12794  return true;
12795}
12796
12797static void *startseq(void *closure, const void *handler_data) {
12798  upb_json_printer *p = closure;
12799  CHK(putkey(closure, handler_data));
12800  p->depth_++;
12801  p->first_elem_[p->depth_] = true;
12802  print_data(p, "[", 1);
12803  return closure;
12804}
12805
12806static bool endseq(void *closure, const void *handler_data) {
12807  upb_json_printer *p = closure;
12808  UPB_UNUSED(handler_data);
12809  print_data(p, "]", 1);
12810  p->depth_--;
12811  return true;
12812}
12813
12814static void *startmap(void *closure, const void *handler_data) {
12815  upb_json_printer *p = closure;
12816  CHK(putkey(closure, handler_data));
12817  p->depth_++;
12818  p->first_elem_[p->depth_] = true;
12819  print_data(p, "{", 1);
12820  return closure;
12821}
12822
12823static bool endmap(void *closure, const void *handler_data) {
12824  upb_json_printer *p = closure;
12825  UPB_UNUSED(handler_data);
12826  print_data(p, "}", 1);
12827  p->depth_--;
12828  return true;
12829}
12830
12831static size_t putstr(void *closure, const void *handler_data, const char *str,
12832                     size_t len, const upb_bufhandle *handle) {
12833  upb_json_printer *p = closure;
12834  UPB_UNUSED(handler_data);
12835  UPB_UNUSED(handle);
12836  putstring(p, str, len);
12837  return len;
12838}
12839
12840/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */
12841static size_t putbytes(void *closure, const void *handler_data, const char *str,
12842                       size_t len, const upb_bufhandle *handle) {
12843  upb_json_printer *p = closure;
12844
12845  /* This is the regular base64, not the "web-safe" version. */
12846  static const char base64[] =
12847      "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
12848
12849  /* Base64-encode. */
12850  char data[16000];
12851  const char *limit = data + sizeof(data);
12852  const unsigned char *from = (const unsigned char*)str;
12853  char *to = data;
12854  size_t remaining = len;
12855  size_t bytes;
12856
12857  UPB_UNUSED(handler_data);
12858  UPB_UNUSED(handle);
12859
12860  print_data(p, "\"", 1);
12861
12862  while (remaining > 2) {
12863    if (limit - to < 4) {
12864      bytes = to - data;
12865      putstring(p, data, bytes);
12866      to = data;
12867    }
12868
12869    to[0] = base64[from[0] >> 2];
12870    to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12871    to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)];
12872    to[3] = base64[from[2] & 0x3f];
12873
12874    remaining -= 3;
12875    to += 4;
12876    from += 3;
12877  }
12878
12879  switch (remaining) {
12880    case 2:
12881      to[0] = base64[from[0] >> 2];
12882      to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)];
12883      to[2] = base64[(from[1] & 0xf) << 2];
12884      to[3] = '=';
12885      to += 4;
12886      from += 2;
12887      break;
12888    case 1:
12889      to[0] = base64[from[0] >> 2];
12890      to[1] = base64[((from[0] & 0x3) << 4)];
12891      to[2] = '=';
12892      to[3] = '=';
12893      to += 4;
12894      from += 1;
12895      break;
12896  }
12897
12898  bytes = to - data;
12899  putstring(p, data, bytes);
12900  print_data(p, "\"", 1);
12901  return len;
12902}
12903
12904static void *scalar_startstr(void *closure, const void *handler_data,
12905                             size_t size_hint) {
12906  upb_json_printer *p = closure;
12907  UPB_UNUSED(handler_data);
12908  UPB_UNUSED(size_hint);
12909  CHK(putkey(closure, handler_data));
12910  print_data(p, "\"", 1);
12911  return p;
12912}
12913
12914static size_t scalar_str(void *closure, const void *handler_data,
12915                         const char *str, size_t len,
12916                         const upb_bufhandle *handle) {
12917  CHK(putstr(closure, handler_data, str, len, handle));
12918  return len;
12919}
12920
12921static bool scalar_endstr(void *closure, const void *handler_data) {
12922  upb_json_printer *p = closure;
12923  UPB_UNUSED(handler_data);
12924  print_data(p, "\"", 1);
12925  return true;
12926}
12927
12928static void *repeated_startstr(void *closure, const void *handler_data,
12929                               size_t size_hint) {
12930  upb_json_printer *p = closure;
12931  UPB_UNUSED(handler_data);
12932  UPB_UNUSED(size_hint);
12933  print_comma(p);
12934  print_data(p, "\"", 1);
12935  return p;
12936}
12937
12938static size_t repeated_str(void *closure, const void *handler_data,
12939                           const char *str, size_t len,
12940                           const upb_bufhandle *handle) {
12941  CHK(putstr(closure, handler_data, str, len, handle));
12942  return len;
12943}
12944
12945static bool repeated_endstr(void *closure, const void *handler_data) {
12946  upb_json_printer *p = closure;
12947  UPB_UNUSED(handler_data);
12948  print_data(p, "\"", 1);
12949  return true;
12950}
12951
12952static void *mapkeyval_startstr(void *closure, const void *handler_data,
12953                                size_t size_hint) {
12954  upb_json_printer *p = closure;
12955  UPB_UNUSED(handler_data);
12956  UPB_UNUSED(size_hint);
12957  print_data(p, "\"", 1);
12958  return p;
12959}
12960
12961static size_t mapkey_str(void *closure, const void *handler_data,
12962                         const char *str, size_t len,
12963                         const upb_bufhandle *handle) {
12964  CHK(putstr(closure, handler_data, str, len, handle));
12965  return len;
12966}
12967
12968static bool mapkey_endstr(void *closure, const void *handler_data) {
12969  upb_json_printer *p = closure;
12970  UPB_UNUSED(handler_data);
12971  print_data(p, "\":", 2);
12972  return true;
12973}
12974
12975static bool mapvalue_endstr(void *closure, const void *handler_data) {
12976  upb_json_printer *p = closure;
12977  UPB_UNUSED(handler_data);
12978  print_data(p, "\"", 1);
12979  return true;
12980}
12981
12982static size_t scalar_bytes(void *closure, const void *handler_data,
12983                           const char *str, size_t len,
12984                           const upb_bufhandle *handle) {
12985  CHK(putkey(closure, handler_data));
12986  CHK(putbytes(closure, handler_data, str, len, handle));
12987  return len;
12988}
12989
12990static size_t repeated_bytes(void *closure, const void *handler_data,
12991                             const char *str, size_t len,
12992                             const upb_bufhandle *handle) {
12993  upb_json_printer *p = closure;
12994  print_comma(p);
12995  CHK(putbytes(closure, handler_data, str, len, handle));
12996  return len;
12997}
12998
12999static size_t mapkey_bytes(void *closure, const void *handler_data,
13000                           const char *str, size_t len,
13001                           const upb_bufhandle *handle) {
13002  upb_json_printer *p = closure;
13003  CHK(putbytes(closure, handler_data, str, len, handle));
13004  print_data(p, ":", 1);
13005  return len;
13006}
13007
13008static void set_enum_hd(upb_handlers *h,
13009                        const upb_fielddef *f,
13010                        bool preserve_fieldnames,
13011                        upb_handlerattr *attr) {
13012  EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData));
13013  hd->enumdef = upb_fielddef_enumsubdef(f);
13014  hd->keyname = newstrpc(h, f, preserve_fieldnames);
13015  upb_handlers_addcleanup(h, hd, upb_gfree);
13016  attr->handler_data = hd;
13017}
13018
13019/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair
13020 * in a map).
13021 *
13022 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated
13023 * key or value cases properly. The right way to do this is to allocate a
13024 * temporary structure at the start of a mapentry submessage, store key and
13025 * value data in it as key and value handlers are called, and then print the
13026 * key/value pair once at the end of the submessage. If we don't do this, we
13027 * should at least detect the case and throw an error. However, so far all of
13028 * our sources that emit mapentry messages do so canonically (with one key
13029 * field, and then one value field), so this is not a pressing concern at the
13030 * moment. */
13031void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames,
13032                                  upb_handlers *h) {
13033  const upb_msgdef *md = upb_handlers_msgdef(h);
13034
13035  /* A mapentry message is printed simply as '"key": value'. Rather than
13036   * special-case key and value for every type below, we just handle both
13037   * fields explicitly here. */
13038  const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY);
13039  const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE);
13040
13041  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13042
13043  UPB_UNUSED(closure);
13044
13045  switch (upb_fielddef_type(key_field)) {
13046    case UPB_TYPE_INT32:
13047      upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr);
13048      break;
13049    case UPB_TYPE_INT64:
13050      upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr);
13051      break;
13052    case UPB_TYPE_UINT32:
13053      upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr);
13054      break;
13055    case UPB_TYPE_UINT64:
13056      upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr);
13057      break;
13058    case UPB_TYPE_BOOL:
13059      upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr);
13060      break;
13061    case UPB_TYPE_STRING:
13062      upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr);
13063      upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr);
13064      upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr);
13065      break;
13066    case UPB_TYPE_BYTES:
13067      upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr);
13068      break;
13069    default:
13070      UPB_ASSERT(false);
13071      break;
13072  }
13073
13074  switch (upb_fielddef_type(value_field)) {
13075    case UPB_TYPE_INT32:
13076      upb_handlers_setint32(h, value_field, putint32_t, &empty_attr);
13077      break;
13078    case UPB_TYPE_INT64:
13079      upb_handlers_setint64(h, value_field, putint64_t, &empty_attr);
13080      break;
13081    case UPB_TYPE_UINT32:
13082      upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr);
13083      break;
13084    case UPB_TYPE_UINT64:
13085      upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr);
13086      break;
13087    case UPB_TYPE_BOOL:
13088      upb_handlers_setbool(h, value_field, putbool, &empty_attr);
13089      break;
13090    case UPB_TYPE_FLOAT:
13091      upb_handlers_setfloat(h, value_field, putfloat, &empty_attr);
13092      break;
13093    case UPB_TYPE_DOUBLE:
13094      upb_handlers_setdouble(h, value_field, putdouble, &empty_attr);
13095      break;
13096    case UPB_TYPE_STRING:
13097      upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr);
13098      upb_handlers_setstring(h, value_field, putstr, &empty_attr);
13099      upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr);
13100      break;
13101    case UPB_TYPE_BYTES:
13102      upb_handlers_setstring(h, value_field, putbytes, &empty_attr);
13103      break;
13104    case UPB_TYPE_ENUM: {
13105      upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
13106      set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr);
13107      upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr);
13108      break;
13109    }
13110    case UPB_TYPE_MESSAGE:
13111      /* No handler necessary -- the submsg handlers will print the message
13112       * as appropriate. */
13113      break;
13114  }
13115}
13116
13117static bool putseconds(void *closure, const void *handler_data,
13118                       int64_t seconds) {
13119  upb_json_printer *p = closure;
13120  p->seconds = seconds;
13121  UPB_UNUSED(handler_data);
13122  return true;
13123}
13124
13125static bool putnanos(void *closure, const void *handler_data,
13126                     int32_t nanos) {
13127  upb_json_printer *p = closure;
13128  p->nanos = nanos;
13129  UPB_UNUSED(handler_data);
13130  return true;
13131}
13132
13133static void *scalar_startstr_nokey(void *closure, const void *handler_data,
13134                                   size_t size_hint) {
13135  upb_json_printer *p = closure;
13136  UPB_UNUSED(handler_data);
13137  UPB_UNUSED(size_hint);
13138  print_data(p, "\"", 1);
13139  return p;
13140}
13141
13142static size_t putstr_nokey(void *closure, const void *handler_data,
13143                           const char *str, size_t len,
13144                           const upb_bufhandle *handle) {
13145  upb_json_printer *p = closure;
13146  UPB_UNUSED(handler_data);
13147  UPB_UNUSED(handle);
13148  print_data(p, "\"", 1);
13149  putstring(p, str, len);
13150  print_data(p, "\"", 1);
13151  return len + 2;
13152}
13153
13154static void *startseq_nokey(void *closure, const void *handler_data) {
13155  upb_json_printer *p = closure;
13156  UPB_UNUSED(handler_data);
13157  p->depth_++;
13158  p->first_elem_[p->depth_] = true;
13159  print_data(p, "[", 1);
13160  return closure;
13161}
13162
13163static void *startseq_fieldmask(void *closure, const void *handler_data) {
13164  upb_json_printer *p = closure;
13165  UPB_UNUSED(handler_data);
13166  p->depth_++;
13167  p->first_elem_[p->depth_] = true;
13168  return closure;
13169}
13170
13171static bool endseq_fieldmask(void *closure, const void *handler_data) {
13172  upb_json_printer *p = closure;
13173  UPB_UNUSED(handler_data);
13174  p->depth_--;
13175  return true;
13176}
13177
13178static void *repeated_startstr_fieldmask(
13179    void *closure, const void *handler_data,
13180    size_t size_hint) {
13181  upb_json_printer *p = closure;
13182  UPB_UNUSED(handler_data);
13183  UPB_UNUSED(size_hint);
13184  print_comma(p);
13185  return p;
13186}
13187
13188static size_t repeated_str_fieldmask(
13189    void *closure, const void *handler_data,
13190    const char *str, size_t len,
13191    const upb_bufhandle *handle) {
13192  const char* limit = str + len;
13193  bool upper = false;
13194  size_t result_len = 0;
13195  for (; str < limit; str++) {
13196    if (*str == '_') {
13197      upper = true;
13198      continue;
13199    }
13200    if (upper && *str >= 'a' && *str <= 'z') {
13201      char upper_char = toupper(*str);
13202      CHK(putstr(closure, handler_data, &upper_char, 1, handle));
13203    } else {
13204      CHK(putstr(closure, handler_data, str, 1, handle));
13205    }
13206    upper = false;
13207    result_len++;
13208  }
13209  return result_len;
13210}
13211
13212static void *startmap_nokey(void *closure, const void *handler_data) {
13213  upb_json_printer *p = closure;
13214  UPB_UNUSED(handler_data);
13215  p->depth_++;
13216  p->first_elem_[p->depth_] = true;
13217  print_data(p, "{", 1);
13218  return closure;
13219}
13220
13221static bool putnull(void *closure, const void *handler_data,
13222                    int32_t null) {
13223  upb_json_printer *p = closure;
13224  print_data(p, "null", 4);
13225  UPB_UNUSED(handler_data);
13226  UPB_UNUSED(null);
13227  return true;
13228}
13229
13230static bool printer_startdurationmsg(void *closure, const void *handler_data) {
13231  upb_json_printer *p = closure;
13232  UPB_UNUSED(handler_data);
13233  if (p->depth_ == 0) {
13234    upb_bytessink_start(p->output_, 0, &p->subc_);
13235  }
13236  return true;
13237}
13238
13239#define UPB_DURATION_MAX_JSON_LEN 23
13240#define UPB_DURATION_MAX_NANO_LEN 9
13241
13242static bool printer_enddurationmsg(void *closure, const void *handler_data,
13243                                   upb_status *s) {
13244  upb_json_printer *p = closure;
13245  char buffer[UPB_DURATION_MAX_JSON_LEN];
13246  size_t base_len;
13247  size_t curr;
13248  size_t i;
13249
13250  memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN);
13251
13252  if (p->seconds < -315576000000) {
13253    upb_status_seterrf(s, "error parsing duration: "
13254                          "minimum acceptable value is "
13255                          "-315576000000");
13256    return false;
13257  }
13258
13259  if (p->seconds > 315576000000) {
13260    upb_status_seterrf(s, "error serializing duration: "
13261                          "maximum acceptable value is "
13262                          "315576000000");
13263    return false;
13264  }
13265
13266  _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds);
13267  base_len = strlen(buffer);
13268
13269  if (p->nanos != 0) {
13270    char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3];
13271    _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13272                  p->nanos / 1000000000.0);
13273    /* Remove trailing 0. */
13274    for (i = UPB_DURATION_MAX_NANO_LEN + 2;
13275         nanos_buffer[i] == '0'; i--) {
13276      nanos_buffer[i] = 0;
13277    }
13278    strcpy(buffer + base_len, nanos_buffer + 1);
13279  }
13280
13281  curr = strlen(buffer);
13282  strcpy(buffer + curr, "s");
13283
13284  p->seconds = 0;
13285  p->nanos = 0;
13286
13287  print_data(p, "\"", 1);
13288  print_data(p, buffer, strlen(buffer));
13289  print_data(p, "\"", 1);
13290
13291  if (p->depth_ == 0) {
13292    upb_bytessink_end(p->output_);
13293  }
13294
13295  UPB_UNUSED(handler_data);
13296  return true;
13297}
13298
13299static bool printer_starttimestampmsg(void *closure, const void *handler_data) {
13300  upb_json_printer *p = closure;
13301  UPB_UNUSED(handler_data);
13302  if (p->depth_ == 0) {
13303    upb_bytessink_start(p->output_, 0, &p->subc_);
13304  }
13305  return true;
13306}
13307
13308#define UPB_TIMESTAMP_MAX_JSON_LEN 31
13309#define UPB_TIMESTAMP_BEFORE_NANO_LEN 19
13310#define UPB_TIMESTAMP_MAX_NANO_LEN 9
13311
13312static bool printer_endtimestampmsg(void *closure, const void *handler_data,
13313                                    upb_status *s) {
13314  upb_json_printer *p = closure;
13315  char buffer[UPB_TIMESTAMP_MAX_JSON_LEN];
13316  time_t time = p->seconds;
13317  size_t curr;
13318  size_t i;
13319  size_t year_length =
13320      strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time));
13321
13322  if (p->seconds < -62135596800) {
13323    upb_status_seterrf(s, "error parsing timestamp: "
13324                          "minimum acceptable value is "
13325                          "0001-01-01T00:00:00Z");
13326    return false;
13327  }
13328
13329  if (p->seconds > 253402300799) {
13330    upb_status_seterrf(s, "error parsing timestamp: "
13331                          "maximum acceptable value is "
13332                          "9999-12-31T23:59:59Z");
13333    return false;
13334  }
13335
13336  /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */
13337  for (i = 0; i < 4 - year_length; i++) {
13338    buffer[i] = '0';
13339  }
13340
13341  strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN,
13342           "%Y-%m-%dT%H:%M:%S", gmtime(&time));
13343  if (p->nanos != 0) {
13344    char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3];
13345    _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f",
13346                  p->nanos / 1000000000.0);
13347    /* Remove trailing 0. */
13348    for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2;
13349         nanos_buffer[i] == '0'; i--) {
13350      nanos_buffer[i] = 0;
13351    }
13352    strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1);
13353  }
13354
13355  curr = strlen(buffer);
13356  strcpy(buffer + curr, "Z");
13357
13358  p->seconds = 0;
13359  p->nanos = 0;
13360
13361  print_data(p, "\"", 1);
13362  print_data(p, buffer, strlen(buffer));
13363  print_data(p, "\"", 1);
13364
13365  if (p->depth_ == 0) {
13366    upb_bytessink_end(p->output_);
13367  }
13368
13369  UPB_UNUSED(handler_data);
13370  UPB_UNUSED(s);
13371  return true;
13372}
13373
13374static bool printer_startmsg_noframe(void *closure, const void *handler_data) {
13375  upb_json_printer *p = closure;
13376  UPB_UNUSED(handler_data);
13377  if (p->depth_ == 0) {
13378    upb_bytessink_start(p->output_, 0, &p->subc_);
13379  }
13380  return true;
13381}
13382
13383static bool printer_endmsg_noframe(
13384    void *closure, const void *handler_data, upb_status *s) {
13385  upb_json_printer *p = closure;
13386  UPB_UNUSED(handler_data);
13387  UPB_UNUSED(s);
13388  if (p->depth_ == 0) {
13389    upb_bytessink_end(p->output_);
13390  }
13391  return true;
13392}
13393
13394static bool printer_startmsg_fieldmask(
13395    void *closure, const void *handler_data) {
13396  upb_json_printer *p = closure;
13397  UPB_UNUSED(handler_data);
13398  if (p->depth_ == 0) {
13399    upb_bytessink_start(p->output_, 0, &p->subc_);
13400  }
13401  print_data(p, "\"", 1);
13402  return true;
13403}
13404
13405static bool printer_endmsg_fieldmask(
13406    void *closure, const void *handler_data, upb_status *s) {
13407  upb_json_printer *p = closure;
13408  UPB_UNUSED(handler_data);
13409  UPB_UNUSED(s);
13410  print_data(p, "\"", 1);
13411  if (p->depth_ == 0) {
13412    upb_bytessink_end(p->output_);
13413  }
13414  return true;
13415}
13416
13417static void *scalar_startstr_onlykey(
13418    void *closure, const void *handler_data, size_t size_hint) {
13419  upb_json_printer *p = closure;
13420  UPB_UNUSED(size_hint);
13421  CHK(putkey(closure, handler_data));
13422  return p;
13423}
13424
13425/* Set up handlers for an Any submessage. */
13426void printer_sethandlers_any(const void *closure, upb_handlers *h) {
13427  const upb_msgdef *md = upb_handlers_msgdef(h);
13428
13429  const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE);
13430  const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE);
13431
13432  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13433
13434  /* type_url's json name is "@type" */
13435  upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT;
13436  upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT;
13437  strpc *type_url_json_name = newstrpc_str(h, "@type");
13438  strpc *value_json_name = newstrpc_str(h, "value");
13439
13440  type_name_attr.handler_data = type_url_json_name;
13441  value_name_attr.handler_data = value_json_name;
13442
13443  /* Set up handlers. */
13444  upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13445  upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13446
13447  upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr);
13448  upb_handlers_setstring(h, type_field, scalar_str, &empty_attr);
13449  upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr);
13450
13451  /* This is not the full and correct JSON encoding for the Any value field. It
13452   * requires further processing by the wrapper code based on the type URL.
13453   */
13454  upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey,
13455                           &value_name_attr);
13456
13457  UPB_UNUSED(closure);
13458}
13459
13460/* Set up handlers for a fieldmask submessage. */
13461void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) {
13462  const upb_msgdef *md = upb_handlers_msgdef(h);
13463  const upb_fielddef* f = upb_msgdef_itof(md, 1);
13464
13465  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13466
13467  upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr);
13468  upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr);
13469
13470  upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr);
13471  upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr);
13472
13473  upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr);
13474  upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr);
13475
13476  UPB_UNUSED(closure);
13477}
13478
13479/* Set up handlers for a duration submessage. */
13480void printer_sethandlers_duration(const void *closure, upb_handlers *h) {
13481  const upb_msgdef *md = upb_handlers_msgdef(h);
13482
13483  const upb_fielddef* seconds_field =
13484      upb_msgdef_itof(md, UPB_DURATION_SECONDS);
13485  const upb_fielddef* nanos_field =
13486      upb_msgdef_itof(md, UPB_DURATION_NANOS);
13487
13488  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13489
13490  upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr);
13491  upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13492  upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13493  upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr);
13494
13495  UPB_UNUSED(closure);
13496}
13497
13498/* Set up handlers for a timestamp submessage. Instead of printing fields
13499 * separately, the json representation of timestamp follows RFC 3339 */
13500void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) {
13501  const upb_msgdef *md = upb_handlers_msgdef(h);
13502
13503  const upb_fielddef* seconds_field =
13504      upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS);
13505  const upb_fielddef* nanos_field =
13506      upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS);
13507
13508  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13509
13510  upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr);
13511  upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr);
13512  upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr);
13513  upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr);
13514
13515  UPB_UNUSED(closure);
13516}
13517
13518void printer_sethandlers_value(const void *closure, upb_handlers *h) {
13519  const upb_msgdef *md = upb_handlers_msgdef(h);
13520  upb_msg_field_iter i;
13521
13522  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13523
13524  upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13525  upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13526
13527  upb_msg_field_begin(&i, md);
13528  for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13529    const upb_fielddef *f = upb_msg_iter_field(&i);
13530
13531    switch (upb_fielddef_type(f)) {
13532      case UPB_TYPE_ENUM:
13533        upb_handlers_setint32(h, f, putnull, &empty_attr);
13534        break;
13535      case UPB_TYPE_DOUBLE:
13536        upb_handlers_setdouble(h, f, putdouble, &empty_attr);
13537        break;
13538      case UPB_TYPE_STRING:
13539        upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr);
13540        upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13541        upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13542        break;
13543      case UPB_TYPE_BOOL:
13544        upb_handlers_setbool(h, f, putbool, &empty_attr);
13545        break;
13546      case UPB_TYPE_MESSAGE:
13547        break;
13548      default:
13549        UPB_ASSERT(false);
13550        break;
13551    }
13552  }
13553
13554  UPB_UNUSED(closure);
13555}
13556
13557#define WRAPPER_SETHANDLERS(wrapper, type, putmethod)                      \
13558void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \
13559  const upb_msgdef *md = upb_handlers_msgdef(h);                           \
13560  const upb_fielddef* f = upb_msgdef_itof(md, 1);                          \
13561  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;                \
13562  upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);      \
13563  upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);          \
13564  upb_handlers_set##type(h, f, putmethod, &empty_attr);                    \
13565  UPB_UNUSED(closure);                                                     \
13566}
13567
13568WRAPPER_SETHANDLERS(doublevalue, double, putdouble)
13569WRAPPER_SETHANDLERS(floatvalue,  float,  putfloat)
13570WRAPPER_SETHANDLERS(int64value,  int64,  putint64_t)
13571WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t)
13572WRAPPER_SETHANDLERS(int32value,  int32,  putint32_t)
13573WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t)
13574WRAPPER_SETHANDLERS(boolvalue,   bool,   putbool)
13575WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey)
13576WRAPPER_SETHANDLERS(bytesvalue,  string, putbytes)
13577
13578#undef WRAPPER_SETHANDLERS
13579
13580void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) {
13581  const upb_msgdef *md = upb_handlers_msgdef(h);
13582  const upb_fielddef* f = upb_msgdef_itof(md, 1);
13583
13584  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13585
13586  upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr);
13587  upb_handlers_setendseq(h, f, endseq, &empty_attr);
13588
13589  upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13590  upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13591
13592  upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13593
13594  UPB_UNUSED(closure);
13595}
13596
13597void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) {
13598  const upb_msgdef *md = upb_handlers_msgdef(h);
13599  const upb_fielddef* f = upb_msgdef_itof(md, 1);
13600
13601  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13602
13603  upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr);
13604  upb_handlers_setendseq(h, f, endmap, &empty_attr);
13605
13606  upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr);
13607  upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr);
13608
13609  upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr);
13610
13611  UPB_UNUSED(closure);
13612}
13613
13614void printer_sethandlers(const void *closure, upb_handlers *h) {
13615  const upb_msgdef *md = upb_handlers_msgdef(h);
13616  bool is_mapentry = upb_msgdef_mapentry(md);
13617  upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT;
13618  upb_msg_field_iter i;
13619  const upb_json_printercache *cache = closure;
13620  const bool preserve_fieldnames = cache->preserve_fieldnames;
13621
13622  if (is_mapentry) {
13623    /* mapentry messages are sufficiently different that we handle them
13624     * separately. */
13625    printer_sethandlers_mapentry(closure, preserve_fieldnames, h);
13626    return;
13627  }
13628
13629  switch (upb_msgdef_wellknowntype(md)) {
13630    case UPB_WELLKNOWN_UNSPECIFIED:
13631      break;
13632    case UPB_WELLKNOWN_ANY:
13633      printer_sethandlers_any(closure, h);
13634      return;
13635    case UPB_WELLKNOWN_FIELDMASK:
13636      printer_sethandlers_fieldmask(closure, h);
13637      return;
13638    case UPB_WELLKNOWN_DURATION:
13639      printer_sethandlers_duration(closure, h);
13640      return;
13641    case UPB_WELLKNOWN_TIMESTAMP:
13642      printer_sethandlers_timestamp(closure, h);
13643      return;
13644    case UPB_WELLKNOWN_VALUE:
13645      printer_sethandlers_value(closure, h);
13646      return;
13647    case UPB_WELLKNOWN_LISTVALUE:
13648      printer_sethandlers_listvalue(closure, h);
13649      return;
13650    case UPB_WELLKNOWN_STRUCT:
13651      printer_sethandlers_structvalue(closure, h);
13652      return;
13653#define WRAPPER(wellknowntype, name)        \
13654  case wellknowntype:                       \
13655    printer_sethandlers_##name(closure, h); \
13656    return;                                 \
13657
13658    WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue);
13659    WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue);
13660    WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value);
13661    WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value);
13662    WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value);
13663    WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value);
13664    WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue);
13665    WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue);
13666    WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue);
13667
13668#undef WRAPPER
13669  }
13670
13671  upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr);
13672  upb_handlers_setendmsg(h, printer_endmsg, &empty_attr);
13673
13674#define TYPE(type, name, ctype)                                               \
13675  case type:                                                                  \
13676    if (upb_fielddef_isseq(f)) {                                              \
13677      upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr);            \
13678    } else {                                                                  \
13679      upb_handlers_set##name(h, f, scalar_##ctype, &name_attr);               \
13680    }                                                                         \
13681    break;
13682
13683  upb_msg_field_begin(&i, md);
13684  for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) {
13685    const upb_fielddef *f = upb_msg_iter_field(&i);
13686
13687    upb_handlerattr name_attr = UPB_HANDLERATTR_INIT;
13688    name_attr.handler_data = newstrpc(h, f, preserve_fieldnames);
13689
13690    if (upb_fielddef_ismap(f)) {
13691      upb_handlers_setstartseq(h, f, startmap, &name_attr);
13692      upb_handlers_setendseq(h, f, endmap, &name_attr);
13693    } else if (upb_fielddef_isseq(f)) {
13694      upb_handlers_setstartseq(h, f, startseq, &name_attr);
13695      upb_handlers_setendseq(h, f, endseq, &empty_attr);
13696    }
13697
13698    switch (upb_fielddef_type(f)) {
13699      TYPE(UPB_TYPE_FLOAT,  float,  float);
13700      TYPE(UPB_TYPE_DOUBLE, double, double);
13701      TYPE(UPB_TYPE_BOOL,   bool,   bool);
13702      TYPE(UPB_TYPE_INT32,  int32,  int32_t);
13703      TYPE(UPB_TYPE_UINT32, uint32, uint32_t);
13704      TYPE(UPB_TYPE_INT64,  int64,  int64_t);
13705      TYPE(UPB_TYPE_UINT64, uint64, uint64_t);
13706      case UPB_TYPE_ENUM: {
13707        /* For now, we always emit symbolic names for enums. We may want an
13708         * option later to control this behavior, but we will wait for a real
13709         * need first. */
13710        upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT;
13711        set_enum_hd(h, f, preserve_fieldnames, &enum_attr);
13712
13713        if (upb_fielddef_isseq(f)) {
13714          upb_handlers_setint32(h, f, repeated_enum, &enum_attr);
13715        } else {
13716          upb_handlers_setint32(h, f, scalar_enum, &enum_attr);
13717        }
13718
13719        break;
13720      }
13721      case UPB_TYPE_STRING:
13722        if (upb_fielddef_isseq(f)) {
13723          upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr);
13724          upb_handlers_setstring(h, f, repeated_str, &empty_attr);
13725          upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr);
13726        } else {
13727          upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr);
13728          upb_handlers_setstring(h, f, scalar_str, &empty_attr);
13729          upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr);
13730        }
13731        break;
13732      case UPB_TYPE_BYTES:
13733        /* XXX: this doesn't support strings that span buffers yet. The base64
13734         * encoder will need to be made resumable for this to work properly. */
13735        if (upb_fielddef_isseq(f)) {
13736          upb_handlers_setstring(h, f, repeated_bytes, &empty_attr);
13737        } else {
13738          upb_handlers_setstring(h, f, scalar_bytes, &name_attr);
13739        }
13740        break;
13741      case UPB_TYPE_MESSAGE:
13742        if (upb_fielddef_isseq(f)) {
13743          upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr);
13744        } else {
13745          upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr);
13746        }
13747        break;
13748    }
13749  }
13750
13751#undef TYPE
13752}
13753
13754static void json_printer_reset(upb_json_printer *p) {
13755  p->depth_ = 0;
13756}
13757
13758
13759/* Public API *****************************************************************/
13760
13761upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h,
13762                                          upb_bytessink output) {
13763#ifndef NDEBUG
13764  size_t size_before = upb_arena_bytesallocated(a);
13765#endif
13766
13767  upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer));
13768  if (!p) return NULL;
13769
13770  p->output_ = output;
13771  json_printer_reset(p);
13772  upb_sink_reset(&p->input_, h, p);
13773  p->seconds = 0;
13774  p->nanos = 0;
13775
13776  /* If this fails, increase the value in printer.h. */
13777  UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <=
13778                      UPB_JSON_PRINTER_SIZE);
13779  return p;
13780}
13781
13782upb_sink upb_json_printer_input(upb_json_printer *p) {
13783  return p->input_;
13784}
13785
13786upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) {
13787  upb_json_printercache *cache = upb_gmalloc(sizeof(*cache));
13788  upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache);
13789
13790  cache->preserve_fieldnames = preserve_proto_fieldnames;
13791  upb_handlercache_addcleanup(ret, cache, upb_gfree);
13792
13793  return ret;
13794}
13795/* See port_def.inc.  This should #undef all macros #defined there. */
13796
13797#undef UPB_MAPTYPE_STRING
13798#undef UPB_SIZE
13799#undef UPB_PTR_AT
13800#undef UPB_READ_ONEOF
13801#undef UPB_WRITE_ONEOF
13802#undef UPB_INLINE
13803#undef UPB_FORCEINLINE
13804#undef UPB_NOINLINE
13805#undef UPB_NORETURN
13806#undef UPB_MAX
13807#undef UPB_MIN
13808#undef UPB_UNUSED
13809#undef UPB_ASSUME
13810#undef UPB_ASSERT
13811#undef UPB_ASSERT_DEBUGVAR
13812#undef UPB_UNREACHABLE
13813#undef UPB_INFINITY
13814#undef UPB_MSVC_VSNPRINTF
13815#undef _upb_snprintf
13816#undef _upb_vsnprintf
13817#undef _upb_va_copy
13818